adnaniqbal001 commited on
Commit
c08f982
·
verified ·
1 Parent(s): 42b655a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -104
app.py CHANGED
@@ -1,111 +1,34 @@
1
  import streamlit as st
2
- import speech_recognition as sr
3
  from transformers import MarianMTModel, MarianTokenizer
4
- from gtts import gTTS
5
- from io import BytesIO
6
- import queue
7
- import threading
8
- import pyaudio
9
 
10
- def load_model(source_lang, target_lang):
11
- model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
12
- try:
13
- tokenizer = MarianTokenizer.from_pretrained(model_name)
14
- model = MarianMTModel.from_pretrained(model_name)
15
- return tokenizer, model
16
- except Exception as e:
17
- st.error(f"Failed to load model for {source_lang} to {target_lang}. Ensure the language pair is supported. Error: {e}")
18
- return None, None
19
 
20
- def translate_text(tokenizer, model, text):
21
- if not text:
22
- return ""
23
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
24
- outputs = model.generate(**inputs)
25
- translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
- return translated_text
27
-
28
- def text_to_audio(text, lang):
29
- tts = gTTS(text=text, lang=lang)
30
- audio_file = BytesIO()
31
- tts.write_to_fp(audio_file)
32
- audio_file.seek(0)
33
- return audio_file
34
-
35
- def recognize_speech_live(q):
36
- recognizer = sr.Recognizer()
37
- mic = sr.Microphone()
38
-
39
- with mic as source:
40
- recognizer.adjust_for_ambient_noise(source)
41
- st.info("Start speaking...")
42
- while True:
43
- try:
44
- audio_data = recognizer.listen(source)
45
- text = recognizer.recognize_google(audio_data)
46
- q.put(text)
47
- except sr.UnknownValueError:
48
- q.put("[Unintelligible]")
49
- except Exception as e:
50
- st.error(f"Error during speech recognition: {e}")
51
- break
52
-
53
- def main():
54
- st.title("Real-Time Audio Language Translation")
55
- st.write("Translate spoken words in real time using open-source models.")
56
-
57
- # Language selection
58
- languages = {
59
- "English": "en",
60
- "Spanish": "es",
61
- "French": "fr",
62
- "German": "de",
63
- "Italian": "it",
64
- "Russian": "ru",
65
- "Chinese": "zh",
66
- "Japanese": "ja",
67
- "Korean": "ko",
68
- }
69
-
70
- source_language = st.selectbox("Select source language:", options=list(languages.keys()))
71
- target_language = st.selectbox("Select target language:", options=list(languages.keys()))
72
-
73
- if source_language == target_language:
74
- st.warning("Source and target languages must be different.")
75
- return
76
-
77
- source_lang_code = languages[source_language]
78
- target_lang_code = languages[target_language]
79
 
80
- # Load the model
81
- tokenizer, model = load_model(source_lang_code, target_lang_code)
82
- if not (tokenizer and model):
83
- return
84
-
85
- # Real-time speech recognition
86
- q = queue.Queue()
87
- transcription_placeholder = st.empty()
88
- translation_placeholder = st.empty()
89
- audio_placeholder = st.empty()
90
-
91
- if st.button("Start Real-Time Translation"):
92
- st.write("Processing...")
93
-
94
- # Start speech recognition in a separate thread
95
- threading.Thread(target=recognize_speech_live, args=(q,), daemon=True).start()
96
-
97
- while True:
98
- if not q.empty():
99
- spoken_text = q.get()
100
- transcription_placeholder.text_area("Transcribed Text:", spoken_text, height=100)
101
-
102
- # Translate text
103
- translated_text = translate_text(tokenizer, model, spoken_text)
104
- translation_placeholder.text_area("Translated Text:", translated_text, height=100)
105
-
106
- # Generate and play translated audio
107
- translated_audio = text_to_audio(translated_text, target_lang_code)
108
- audio_placeholder.audio(translated_audio, format="audio/mp3")
109
 
110
- if __name__ == "__main__":
111
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  from transformers import MarianMTModel, MarianTokenizer
 
 
 
 
 
3
 
4
+ # Load pre-trained model and tokenizer
5
+ model_name = 'Helsinki-NLP/opus-mt-ur-de'
6
+ model = MarianMTModel.from_pretrained(model_name)
7
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
 
 
 
 
 
8
 
9
+ # Function to translate text
10
+ def translate_text(text, src_lang, tgt_lang):
11
+ # Tokenize input text
12
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Translate and decode
15
+ translated = model.generate(**inputs)
16
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
17
+ return translated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Streamlit app layout
20
+ st.title("Real-Time Urdu to German Translation")
21
+ st.write("Enter Urdu text below, and the app will translate it into German.")
22
+
23
+ # Input text area for Urdu text
24
+ input_text = st.text_area("Urdu Text", "", height=200)
25
+
26
+ # Translate when the button is pressed
27
+ if st.button("Translate"):
28
+ if input_text:
29
+ # Translate the text
30
+ translated_text = translate_text(input_text, "ur", "de")
31
+ st.subheader("Translated German Text:")
32
+ st.write(translated_text)
33
+ else:
34
+ st.write("Please enter some Urdu text to translate.")