Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
import streamlit as st
|
2 |
-
import os
|
3 |
import speech_recognition as sr
|
4 |
from transformers import MarianMTModel, MarianTokenizer
|
5 |
from gtts import gTTS
|
6 |
from io import BytesIO
|
7 |
-
import
|
|
|
|
|
8 |
|
9 |
def load_model(source_lang, target_lang):
|
10 |
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
@@ -24,19 +25,6 @@ def translate_text(tokenizer, model, text):
|
|
24 |
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
25 |
return translated_text
|
26 |
|
27 |
-
def audio_to_text(audio_file):
|
28 |
-
recognizer = sr.Recognizer()
|
29 |
-
with sr.AudioFile(audio_file) as source:
|
30 |
-
audio_data = recognizer.record(source)
|
31 |
-
try:
|
32 |
-
text = recognizer.recognize_google(audio_data)
|
33 |
-
return text
|
34 |
-
except sr.UnknownValueError:
|
35 |
-
st.error("Speech Recognition could not understand the audio.")
|
36 |
-
except sr.RequestError as e:
|
37 |
-
st.error(f"Could not request results from Speech Recognition service; {e}")
|
38 |
-
return ""
|
39 |
-
|
40 |
def text_to_audio(text, lang):
|
41 |
tts = gTTS(text=text, lang=lang)
|
42 |
audio_file = BytesIO()
|
@@ -44,10 +32,28 @@ def text_to_audio(text, lang):
|
|
44 |
audio_file.seek(0)
|
45 |
return audio_file
|
46 |
|
47 |
-
def
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
|
|
|
|
|
|
|
|
51 |
# Language selection
|
52 |
languages = {
|
53 |
"English": "en",
|
@@ -71,35 +77,35 @@ def main():
|
|
71 |
source_lang_code = languages[source_language]
|
72 |
target_lang_code = languages[target_language]
|
73 |
|
74 |
-
# Load the model
|
75 |
tokenizer, model = load_model(source_lang_code, target_lang_code)
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
temp_audio.write(uploaded_audio.read())
|
84 |
-
temp_audio_path = temp_audio.name
|
85 |
|
86 |
-
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
st.success("Translation completed!")
|
97 |
-
st.text_area("Translated text:", translated_text, height=100)
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
st.audio(output_audio, format="audio/mp3")
|
103 |
|
104 |
if __name__ == "__main__":
|
105 |
main()
|
|
|
1 |
import streamlit as st
|
|
|
2 |
import speech_recognition as sr
|
3 |
from transformers import MarianMTModel, MarianTokenizer
|
4 |
from gtts import gTTS
|
5 |
from io import BytesIO
|
6 |
+
import queue
|
7 |
+
import threading
|
8 |
+
import pyaudio
|
9 |
|
10 |
def load_model(source_lang, target_lang):
|
11 |
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
|
|
25 |
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
26 |
return translated_text
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def text_to_audio(text, lang):
|
29 |
tts = gTTS(text=text, lang=lang)
|
30 |
audio_file = BytesIO()
|
|
|
32 |
audio_file.seek(0)
|
33 |
return audio_file
|
34 |
|
35 |
+
def recognize_speech_live(q):
|
36 |
+
recognizer = sr.Recognizer()
|
37 |
+
mic = sr.Microphone()
|
38 |
+
|
39 |
+
with mic as source:
|
40 |
+
recognizer.adjust_for_ambient_noise(source)
|
41 |
+
st.info("Start speaking...")
|
42 |
+
while True:
|
43 |
+
try:
|
44 |
+
audio_data = recognizer.listen(source)
|
45 |
+
text = recognizer.recognize_google(audio_data)
|
46 |
+
q.put(text)
|
47 |
+
except sr.UnknownValueError:
|
48 |
+
q.put("[Unintelligible]")
|
49 |
+
except Exception as e:
|
50 |
+
st.error(f"Error during speech recognition: {e}")
|
51 |
+
break
|
52 |
|
53 |
+
def main():
|
54 |
+
st.title("Real-Time Audio Language Translation")
|
55 |
+
st.write("Translate spoken words in real time using open-source models.")
|
56 |
+
|
57 |
# Language selection
|
58 |
languages = {
|
59 |
"English": "en",
|
|
|
77 |
source_lang_code = languages[source_language]
|
78 |
target_lang_code = languages[target_language]
|
79 |
|
80 |
+
# Load the model
|
81 |
tokenizer, model = load_model(source_lang_code, target_lang_code)
|
82 |
+
if not (tokenizer and model):
|
83 |
+
return
|
84 |
|
85 |
+
# Real-time speech recognition
|
86 |
+
q = queue.Queue()
|
87 |
+
transcription_placeholder = st.empty()
|
88 |
+
translation_placeholder = st.empty()
|
89 |
+
audio_placeholder = st.empty()
|
90 |
|
91 |
+
if st.button("Start Real-Time Translation"):
|
92 |
+
st.write("Processing...")
|
|
|
|
|
93 |
|
94 |
+
# Start speech recognition in a separate thread
|
95 |
+
threading.Thread(target=recognize_speech_live, args=(q,), daemon=True).start()
|
96 |
|
97 |
+
while True:
|
98 |
+
if not q.empty():
|
99 |
+
spoken_text = q.get()
|
100 |
+
transcription_placeholder.text_area("Transcribed Text:", spoken_text, height=100)
|
101 |
|
102 |
+
# Translate text
|
103 |
+
translated_text = translate_text(tokenizer, model, spoken_text)
|
104 |
+
translation_placeholder.text_area("Translated Text:", translated_text, height=100)
|
|
|
|
|
105 |
|
106 |
+
# Generate and play translated audio
|
107 |
+
translated_audio = text_to_audio(translated_text, target_lang_code)
|
108 |
+
audio_placeholder.audio(translated_audio, format="audio/mp3")
|
|
|
109 |
|
110 |
if __name__ == "__main__":
|
111 |
main()
|