Krishna086 commited on
Commit
e22e364
·
verified ·
1 Parent(s): 8040f65

Update translation.py

Browse files
Files changed (1) hide show
  1. translation.py +35 -19
translation.py CHANGED
@@ -1,23 +1,19 @@
1
  import streamlit as st
2
  from transformers import MarianTokenizer, MarianMTModel
3
 
4
- # Dictionary of supported languages with MarianMT codes
5
- LANGUAGES = {
6
- "English": "en",
7
- "French": "fr",
8
- "Spanish": "es",
9
- "German": "de",
10
- "Chinese": "zh",
11
- "Arabic": "ar",
12
- "Russian": "ru",
13
- "Hindi": "hi",
14
- "Japanese": "ja"
15
- }
16
 
17
- # Cache the model loading for faster performance
18
  @st.cache_resource
19
  def load_model(src_lang, tgt_lang):
20
- """Load the MarianMT model and tokenizer for a language pair."""
21
  model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
22
  try:
23
  tokenizer = MarianTokenizer.from_pretrained(model_name)
@@ -26,17 +22,37 @@ def load_model(src_lang, tgt_lang):
26
  except Exception as e:
27
  raise Exception(f"Model for {src_lang} to {tgt_lang} not available: {str(e)}")
28
 
 
 
 
29
  def translate(text, source_lang, target_lang):
30
- """Translate text from source language to target language."""
31
  if not text:
32
  return "Please provide text to translate."
33
 
34
- # Get language codes from dictionary
35
  src_code = LANGUAGES.get(source_lang)
36
  tgt_code = LANGUAGES.get(target_lang)
37
 
38
- # Load model and translate
39
- tokenizer, model = load_model(src_code, tgt_code)
 
 
 
 
 
40
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
41
  translated = model.generate(**inputs)
42
- return tokenizer.decode(translated[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from transformers import MarianTokenizer, MarianMTModel
3
 
4
+ # Preload default model for English to French
5
+ @st.cache_resource
6
+ def _load_default_model():
7
+ """Load default MarianMT model (en-fr)."""
8
+ model_name = "Helsinki-NLP/opus-mt-en-fr"
9
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
10
+ model = MarianMTModel.from_pretrained(model_name)
11
+ return tokenizer, model
 
 
 
 
12
 
13
+ # Cache other models dynamically
14
  @st.cache_resource
15
  def load_model(src_lang, tgt_lang):
16
+ """Load MarianMT model for a specific language pair."""
17
  model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
18
  try:
19
  tokenizer = MarianTokenizer.from_pretrained(model_name)
 
22
  except Exception as e:
23
  raise Exception(f"Model for {src_lang} to {tgt_lang} not available: {str(e)}")
24
 
25
+ # Preload default model globally
26
+ DEFAULT_TOKENIZER, DEFAULT_MODEL = _load_default_model()
27
+
28
  def translate(text, source_lang, target_lang):
29
+ """Translate text from source to target language."""
30
  if not text:
31
  return "Please provide text to translate."
32
 
 
33
  src_code = LANGUAGES.get(source_lang)
34
  tgt_code = LANGUAGES.get(target_lang)
35
 
36
+ # Use preloaded model if en-fr, else load dynamically
37
+ if src_code == "en" and tgt_code == "fr":
38
+ tokenizer, model = DEFAULT_TOKENIZER, DEFAULT_MODEL
39
+ else:
40
+ tokenizer, model = load_model(src_code, tgt_code)
41
+
42
+ # Perform translation
43
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
44
  translated = model.generate(**inputs)
45
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
46
+
47
+ # Language dictionary (limited for speed)
48
+ LANGUAGES = {
49
+ "English": "en",
50
+ "French": "fr",
51
+ "Spanish": "es",
52
+ "German": "de",
53
+ "Chinese": "zh",
54
+ "Arabic": "ar",
55
+ "Russian": "ru",
56
+ "Hindi": "hi",
57
+ "Japanese": "ja"
58
+ }