Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import M2M100ForConditionalGeneration | |
from tokenization_small100 import SMALL100Tokenizer | |
langs = """Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba), Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English (en), Spanish (es), Estonian (et), Persian (fa), Fulah (ff), Finnish (fi), French (fr), Western Frisian (fy), Irish (ga), Gaelic; Scottish Gaelic (gd), Galician (gl), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Croatian (hr), Haitian; Haitian Creole (ht), Hungarian (hu), Armenian (hy), Indonesian (id), Igbo (ig), Iloko (ilo), Icelandic (is), Italian (it), Japanese (ja), Javanese (jv), Georgian (ka), Kazakh (kk), Central Khmer (km), Kannada (kn), | |
Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk), | |
Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn), | |
Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)""" | |
lang_list = [lang.strip().split(" ")[-1][1:-1] for lang in langs.split(',')] | |
model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100") | |
tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100") | |
def translate(lang, text): | |
tokenizer.tgt_lang = lang | |
encoded_text = tokenizer(text, return_tensors="pt") | |
generated_tokens = model.generate(**encoded_text) | |
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
with gr.Blocks(analytics_enabled=False) as app: | |
Source = gr.Textbox( label="Source" ) | |
Language = gr.Dropdown( lang_list, label="Language" ) | |
Translate = gr.Button( "Translate" ) | |
Result = gr.Textbox( label="Result" ) | |
Translate.click( | |
translate, | |
inputs=[ Source, Language ], | |
outputs=[Result], | |
api_name="translate", | |
) | |
app.launch( enable_queue=True, inline=True ) | |
block.queue( concurrency_count=2 ) | |