import gradio as gr from transformers import MBartForConditionalGeneration, MBart50TokenizerFast import tiktoken # Load the model and tokenizer model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") # Define the language codes supported by the model language_codes = { "Arabic": "ar_AR", "Czech": "cs_CZ", "German": "de_DE", "English": "en_XX", "Spanish": "es_XX", "Estonian": "et_EE", "Finnish": "fi_FI", "French": "fr_XX", "Gujarati": "gu_IN", "Hindi": "hi_IN", "Italian": "it_IT", "Japanese": "ja_XX", "Kazakh": "kk_KZ", "Korean": "ko_KR", "Lithuanian": "lt_LT", "Latvian": "lv_LV", "Burmese": "my_MM", "Nepali": "ne_NP", "Dutch": "nl_XX", "Romanian": "ro_RO", "Russian": "ru_RU", "Sinhala": "si_LK", "Turkish": "tr_TR", "Vietnamese": "vi_VN", "Chinese": "zh_CN", } def translate(text, src_lang, tgt_lang): # Set the source language tokenizer.src_lang = language_codes[src_lang] # Tokenize the input text encoded = tokenizer(text, return_tensors="pt") # Generate translation generated_tokens = model.generate( **encoded, forced_bos_token_id=tokenizer.lang_code_to_id[language_codes[tgt_lang]] ) # Decode the generated tokens translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] return translation # Create the Gradio interface iface = gr.Interface( fn=translate, inputs=[ gr.Textbox(label="Input Text"), gr.Dropdown(choices=list(language_codes.keys()), label="Source Language"), gr.Dropdown(choices=list(language_codes.keys()), label="Target Language"), ], outputs=gr.Textbox(label="Translated Text"), title="Multilingual Translation with MBart", description="Translate text between multiple languages using the MBart model.", ) # Launch the interface iface.launch()