breadlicker45 commited on
Commit
c239aa3
·
verified ·
1 Parent(s): 6495b05

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
3
+
4
+ # Load the model and tokenizer
5
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
6
+ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
7
+
8
+ # Define the language codes supported by the model
9
+ language_codes = {
10
+ "Arabic": "ar_AR",
11
+ "Czech": "cs_CZ",
12
+ "German": "de_DE",
13
+ "English": "en_XX",
14
+ "Spanish": "es_XX",
15
+ "Estonian": "et_EE",
16
+ "Finnish": "fi_FI",
17
+ "French": "fr_XX",
18
+ "Gujarati": "gu_IN",
19
+ "Hindi": "hi_IN",
20
+ "Italian": "it_IT",
21
+ "Japanese": "ja_XX",
22
+ "Kazakh": "kk_KZ",
23
+ "Korean": "ko_KR",
24
+ "Lithuanian": "lt_LT",
25
+ "Latvian": "lv_LV",
26
+ "Burmese": "my_MM",
27
+ "Nepali": "ne_NP",
28
+ "Dutch": "nl_XX",
29
+ "Romanian": "ro_RO",
30
+ "Russian": "ru_RU",
31
+ "Sinhala": "si_LK",
32
+ "Turkish": "tr_TR",
33
+ "Vietnamese": "vi_VN",
34
+ "Chinese": "zh_CN",
35
+ }
36
+
37
+ def translate(text, src_lang, tgt_lang):
38
+ # Set the source language
39
+ tokenizer.src_lang = language_codes[src_lang]
40
+
41
+ # Tokenize the input text
42
+ encoded = tokenizer(text, return_tensors="pt")
43
+
44
+ # Generate translation
45
+ generated_tokens = model.generate(
46
+ **encoded,
47
+ forced_bos_token_id=tokenizer.lang_code_to_id[language_codes[tgt_lang]]
48
+ )
49
+
50
+ # Decode the generated tokens
51
+ translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
52
+
53
+ return translation
54
+
55
+ # Create the Gradio interface
56
+ iface = gr.Interface(
57
+ fn=translate,
58
+ inputs=[
59
+ gr.Textbox(label="Input Text"),
60
+ gr.Dropdown(choices=list(language_codes.keys()), label="Source Language"),
61
+ gr.Dropdown(choices=list(language_codes.keys()), label="Target Language"),
62
+ ],
63
+ outputs=gr.Textbox(label="Translated Text"),
64
+ title="Multilingual Translation with MBart",
65
+ description="Translate text between multiple languages using the MBart model.",
66
+ )
67
+
68
+ # Launch the interface
69
+ iface.launch()