jin-nin commited on
Commit
3e2a2f3
Β·
1 Parent(s): c7fe81f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -7,28 +7,23 @@ langs = """Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijan
7
  Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk),
8
  Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn),
9
  Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)"""
10
- lang_list = [lang.strip() for lang in langs.split(',')]
11
 
12
  model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
13
  tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
14
 
15
- description = """This is a demo for the paper [*SMaLL-100: Introducing Shallow Multilingual Machine Translation Model for Low-Resource Languages*](https://arxiv.org/abs/2210.11621) by Alireza Mohammadshahi, Vassilina Nikoulina, Alexandre Berard, Caroline Brun, James Henderson, Laurent Besacier
16
-
17
- In this paper, they propose a compact and shallow massively multilingual MT model, and achieve competitive results with M2M-100, while being super smaller and faster. More details are provided [here](https://huggingface.co/alirezamsh/small100). Currently running on 2 vCPU - 16GB RAM."""
18
-
19
- def small100_tr(lang, text):
20
-
21
- lang = lang.split(" ")[-1][1:-1]
22
-
23
  tokenizer.tgt_lang = lang
24
  encoded_text = tokenizer(text, return_tensors="pt")
25
  generated_tokens = model.generate(**encoded_text)
26
  return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
27
 
28
- examples = [["French (fr)", "Life is like a box of chocolates."]]
29
-
30
- output_text = gr.outputs.Textbox()
31
- gr.Interface(small100_tr, inputs=[gr.inputs.Dropdown(lang_list, label=" Target Language"), 'text'], outputs=output_text, title="SMaLL100: Translate much faster between 100 languages",
32
- description=description,
33
- examples=examples
34
- ).launch()
 
 
 
7
  Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk),
8
  Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn),
9
  Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)"""
10
+ lang_list = [lang.strip().split(" ")[-1][1:-1] for lang in langs.split(',')]
11
 
12
  model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
13
  tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
14
 
15
+ def translate(lang, text):
 
 
 
 
 
 
 
16
  tokenizer.tgt_lang = lang
17
  encoded_text = tokenizer(text, return_tensors="pt")
18
  generated_tokens = model.generate(**encoded_text)
19
  return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
20
 
21
+ Output = gr.outputs.Textbox()
22
+ gr.Interface(
23
+ translate,
24
+ inputs=[
25
+ gr.inputs.Dropdown( lang_list, label="To Language" ),
26
+ 'text'
27
+ ],
28
+ outputs=[ Output ],
29
+ ).launch()