adnaniqbal001 commited on
Commit
f95d18e
·
verified ·
1 Parent(s): ce97b0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -40
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import streamlit as st
2
- import os
3
  import speech_recognition as sr
4
  from transformers import MarianMTModel, MarianTokenizer
5
  from gtts import gTTS
6
  from io import BytesIO
7
- import tempfile
 
 
8
 
9
  def load_model(source_lang, target_lang):
10
  model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
@@ -24,19 +25,6 @@ def translate_text(tokenizer, model, text):
24
  translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
  return translated_text
26
 
27
- def audio_to_text(audio_file):
28
- recognizer = sr.Recognizer()
29
- with sr.AudioFile(audio_file) as source:
30
- audio_data = recognizer.record(source)
31
- try:
32
- text = recognizer.recognize_google(audio_data)
33
- return text
34
- except sr.UnknownValueError:
35
- st.error("Speech Recognition could not understand the audio.")
36
- except sr.RequestError as e:
37
- st.error(f"Could not request results from Speech Recognition service; {e}")
38
- return ""
39
-
40
  def text_to_audio(text, lang):
41
  tts = gTTS(text=text, lang=lang)
42
  audio_file = BytesIO()
@@ -44,10 +32,28 @@ def text_to_audio(text, lang):
44
  audio_file.seek(0)
45
  return audio_file
46
 
47
- def main():
48
- st.title("Audio Language Translation App")
49
- st.write("Translate audio between multiple languages using open-source models.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
 
51
  # Language selection
52
  languages = {
53
  "English": "en",
@@ -71,35 +77,35 @@ def main():
71
  source_lang_code = languages[source_language]
72
  target_lang_code = languages[target_language]
73
 
74
- # Load the model and tokenizer
75
  tokenizer, model = load_model(source_lang_code, target_lang_code)
 
 
76
 
77
- if tokenizer and model:
78
- # Audio input
79
- uploaded_audio = st.file_uploader("Upload an audio file (WAV format):", type=["wav"])
 
 
80
 
81
- if uploaded_audio is not None:
82
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
83
- temp_audio.write(uploaded_audio.read())
84
- temp_audio_path = temp_audio.name
85
 
86
- st.audio(uploaded_audio, format="audio/wav")
 
87
 
88
- with st.spinner("Converting audio to text..."):
89
- input_text = audio_to_text(temp_audio_path)
90
- st.success("Audio converted to text!")
91
- st.text_area("Transcribed text:", input_text, height=100)
92
 
93
- if st.button("Translate and Generate Audio"):
94
- with st.spinner("Translating text..."):
95
- translated_text = translate_text(tokenizer, model, input_text)
96
- st.success("Translation completed!")
97
- st.text_area("Translated text:", translated_text, height=100)
98
 
99
- with st.spinner("Generating audio..."):
100
- output_audio = text_to_audio(translated_text, target_lang_code)
101
- st.success("Audio generated!")
102
- st.audio(output_audio, format="audio/mp3")
103
 
104
  if __name__ == "__main__":
105
  main()
 
1
  import streamlit as st
 
2
  import speech_recognition as sr
3
  from transformers import MarianMTModel, MarianTokenizer
4
  from gtts import gTTS
5
  from io import BytesIO
6
+ import queue
7
+ import threading
8
+ import pyaudio
9
 
10
  def load_model(source_lang, target_lang):
11
  model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
 
25
  translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
  return translated_text
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def text_to_audio(text, lang):
29
  tts = gTTS(text=text, lang=lang)
30
  audio_file = BytesIO()
 
32
  audio_file.seek(0)
33
  return audio_file
34
 
35
+ def recognize_speech_live(q):
36
+ recognizer = sr.Recognizer()
37
+ mic = sr.Microphone()
38
+
39
+ with mic as source:
40
+ recognizer.adjust_for_ambient_noise(source)
41
+ st.info("Start speaking...")
42
+ while True:
43
+ try:
44
+ audio_data = recognizer.listen(source)
45
+ text = recognizer.recognize_google(audio_data)
46
+ q.put(text)
47
+ except sr.UnknownValueError:
48
+ q.put("[Unintelligible]")
49
+ except Exception as e:
50
+ st.error(f"Error during speech recognition: {e}")
51
+ break
52
 
53
+ def main():
54
+ st.title("Real-Time Audio Language Translation")
55
+ st.write("Translate spoken words in real time using open-source models.")
56
+
57
  # Language selection
58
  languages = {
59
  "English": "en",
 
77
  source_lang_code = languages[source_language]
78
  target_lang_code = languages[target_language]
79
 
80
+ # Load the model
81
  tokenizer, model = load_model(source_lang_code, target_lang_code)
82
+ if not (tokenizer and model):
83
+ return
84
 
85
+ # Real-time speech recognition
86
+ q = queue.Queue()
87
+ transcription_placeholder = st.empty()
88
+ translation_placeholder = st.empty()
89
+ audio_placeholder = st.empty()
90
 
91
+ if st.button("Start Real-Time Translation"):
92
+ st.write("Processing...")
 
 
93
 
94
+ # Start speech recognition in a separate thread
95
+ threading.Thread(target=recognize_speech_live, args=(q,), daemon=True).start()
96
 
97
+ while True:
98
+ if not q.empty():
99
+ spoken_text = q.get()
100
+ transcription_placeholder.text_area("Transcribed Text:", spoken_text, height=100)
101
 
102
+ # Translate text
103
+ translated_text = translate_text(tokenizer, model, spoken_text)
104
+ translation_placeholder.text_area("Translated Text:", translated_text, height=100)
 
 
105
 
106
+ # Generate and play translated audio
107
+ translated_audio = text_to_audio(translated_text, target_lang_code)
108
+ audio_placeholder.audio(translated_audio, format="audio/mp3")
 
109
 
110
  if __name__ == "__main__":
111
  main()