import streamlit as st from transformers import MarianTokenizer, MarianMTModel import torch @st.cache_resource def _load_default_model(): model_name = "Helsinki-NLP/opus-mt-en-fr" tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return tokenizer, model @st.cache_resource def load_model(source_lang, target_lang): try: if source_lang == target_lang: # Avoid same language error return _load_default_model() model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return tokenizer, model except Exception as e: st.warning(f"No direct model for {source_lang} to {target_lang}. Using cached en-fr.") return _load_default_model() @st.cache_data def translate_cached(text, source_lang, target_lang): src_code = {"English": "en", "French": "fr", "Spanish": "es", "German": "de", "Hindi": "hi", "Chinese": "zh", "Arabic": "ar", "Russian": "ru", "Japanese": "ja"}.get(source_lang, "en") tgt_code = {"English": "en", "French": "fr", "Spanish": "es", "German": "de", "Hindi": "hi", "Chinese": "zh", "Arabic": "ar", "Russian": "ru", "Japanese": "ja"}.get(target_lang, "fr") tokenizer, model = load_model(src_code, tgt_code) inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=500) with torch.no_grad(): translated = model.generate(**inputs, max_length=500) return tokenizer.decode(translated[0], skip_special_tokens=True) def translate(text, source_lang, target_lang): if not text: return "No text provided." try: return translate_cached(text, source_lang, target_lang) except Exception as e: st.error(f"Translation error: {str(e)}. Using input as fallback.") return text LANGUAGES = {"English": "en", "French": "fr", "Spanish": "es", "German": "de", "Hindi": "hi", "Chinese": "zh", "Arabic": "ar", "Russian": "ru", "Japanese": "ja"}