Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,28 +7,23 @@ langs = """Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijan
|
|
7 |
Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk),
|
8 |
Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn),
|
9 |
Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)"""
|
10 |
-
lang_list = [lang.strip() for lang in langs.split(',')]
|
11 |
|
12 |
model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
|
13 |
tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
In this paper, they propose a compact and shallow massively multilingual MT model, and achieve competitive results with M2M-100, while being super smaller and faster. More details are provided [here](https://huggingface.co/alirezamsh/small100). Currently running on 2 vCPU - 16GB RAM."""
|
18 |
-
|
19 |
-
def small100_tr(lang, text):
|
20 |
-
|
21 |
-
lang = lang.split(" ")[-1][1:-1]
|
22 |
-
|
23 |
tokenizer.tgt_lang = lang
|
24 |
encoded_text = tokenizer(text, return_tensors="pt")
|
25 |
generated_tokens = model.generate(**encoded_text)
|
26 |
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
7 |
Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk),
|
8 |
Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn),
|
9 |
Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)"""
|
10 |
+
lang_list = [lang.strip().split(" ")[-1][1:-1] for lang in langs.split(',')]
|
11 |
|
12 |
model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
|
13 |
tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
|
14 |
|
15 |
+
def translate(lang, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
tokenizer.tgt_lang = lang
|
17 |
encoded_text = tokenizer(text, return_tensors="pt")
|
18 |
generated_tokens = model.generate(**encoded_text)
|
19 |
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
20 |
|
21 |
+
Output = gr.outputs.Textbox()
|
22 |
+
gr.Interface(
|
23 |
+
translate,
|
24 |
+
inputs=[
|
25 |
+
gr.inputs.Dropdown( lang_list, label="To Language" ),
|
26 |
+
'text'
|
27 |
+
],
|
28 |
+
outputs=[ Output ],
|
29 |
+
).launch()
|