Update translation.py
Browse files- translation.py +5 -22
translation.py
CHANGED
@@ -20,7 +20,8 @@ def load_model(src_lang, tgt_lang):
|
|
20 |
model = MarianMTModel.from_pretrained(model_name)
|
21 |
return tokenizer, model
|
22 |
except Exception as e:
|
23 |
-
|
|
|
24 |
|
25 |
# Preload default model globally
|
26 |
DEFAULT_TOKENIZER, DEFAULT_MODEL = _load_default_model()
|
@@ -33,15 +34,8 @@ def translate(text, source_lang, target_lang):
|
|
33 |
src_code = LANGUAGES.get(source_lang, "en")
|
34 |
tgt_code = LANGUAGES.get(target_lang, "fr")
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
raise Exception(f"Translation from {source_lang} to {target_lang} is not supported. Supported pairs: {SUPPORTED_PAIRS.get(src_code, [])}")
|
39 |
-
|
40 |
-
# Use preloaded model if en-fr, else load dynamically
|
41 |
-
if src_code == "en" and tgt_code == "fr":
|
42 |
-
tokenizer, model = DEFAULT_TOKENIZER, DEFAULT_MODEL
|
43 |
-
else:
|
44 |
-
tokenizer, model = load_model(src_code, tgt_code)
|
45 |
|
46 |
# Perform translation
|
47 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
|
@@ -61,15 +55,4 @@ LANGUAGES = {
|
|
61 |
"Japanese": "ja"
|
62 |
}
|
63 |
|
64 |
-
#
|
65 |
-
SUPPORTED_PAIRS = {
|
66 |
-
"en": ["fr", "es", "de", "zh", "ru"], # English to French, Spanish, German, Chinese, Russian
|
67 |
-
"fr": ["en"], # French to English (limited support)
|
68 |
-
"es": ["en"], # Spanish to English
|
69 |
-
"de": ["en"], # German to English
|
70 |
-
"zh": ["en"], # Chinese to English
|
71 |
-
"ru": ["en"], # Russian to English
|
72 |
-
"hi": [], # Hindi not supported as source
|
73 |
-
"ar": [], # Arabic not supported as source
|
74 |
-
"ja": [], # Japanese not supported as source
|
75 |
-
}
|
|
|
20 |
model = MarianMTModel.from_pretrained(model_name)
|
21 |
return tokenizer, model
|
22 |
except Exception as e:
|
23 |
+
st.warning(f"Model for {src_lang} to {tgt_lang} not available. Falling back to en-fr.")
|
24 |
+
return _load_default_model() # Fallback to preloaded en-fr model
|
25 |
|
26 |
# Preload default model globally
|
27 |
DEFAULT_TOKENIZER, DEFAULT_MODEL = _load_default_model()
|
|
|
34 |
src_code = LANGUAGES.get(source_lang, "en")
|
35 |
tgt_code = LANGUAGES.get(target_lang, "fr")
|
36 |
|
37 |
+
# Attempt to load the specific model, fall back to en-fr if it fails
|
38 |
+
tokenizer, model = load_model(src_code, tgt_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# Perform translation
|
41 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
|
|
|
55 |
"Japanese": "ja"
|
56 |
}
|
57 |
|
58 |
+
# Removed SUPPORTED_PAIRS to revert to original behavior
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|