Spaces:
Build error
Build error
import streamlit as st | |
import speech_recognition as sr | |
from transformers import MarianMTModel, MarianTokenizer | |
from gtts import gTTS | |
from io import BytesIO | |
import queue | |
import threading | |
import pyaudio | |
def load_model(source_lang, target_lang): | |
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" | |
try: | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
return tokenizer, model | |
except Exception as e: | |
st.error(f"Failed to load model for {source_lang} to {target_lang}. Ensure the language pair is supported. Error: {e}") | |
return None, None | |
def translate_text(tokenizer, model, text): | |
if not text: | |
return "" | |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
outputs = model.generate(**inputs) | |
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return translated_text | |
def text_to_audio(text, lang): | |
tts = gTTS(text=text, lang=lang) | |
audio_file = BytesIO() | |
tts.write_to_fp(audio_file) | |
audio_file.seek(0) | |
return audio_file | |
def recognize_speech_live(q): | |
recognizer = sr.Recognizer() | |
mic = sr.Microphone() | |
with mic as source: | |
recognizer.adjust_for_ambient_noise(source) | |
st.info("Start speaking...") | |
while True: | |
try: | |
audio_data = recognizer.listen(source) | |
text = recognizer.recognize_google(audio_data) | |
q.put(text) | |
except sr.UnknownValueError: | |
q.put("[Unintelligible]") | |
except Exception as e: | |
st.error(f"Error during speech recognition: {e}") | |
break | |
def main(): | |
st.title("Real-Time Audio Language Translation") | |
st.write("Translate spoken words in real time using open-source models.") | |
# Language selection | |
languages = { | |
"English": "en", | |
"Spanish": "es", | |
"French": "fr", | |
"German": "de", | |
"Italian": "it", | |
"Russian": "ru", | |
"Chinese": "zh", | |
"Japanese": "ja", | |
"Korean": "ko", | |
} | |
source_language = st.selectbox("Select source language:", options=list(languages.keys())) | |
target_language = st.selectbox("Select target language:", options=list(languages.keys())) | |
if source_language == target_language: | |
st.warning("Source and target languages must be different.") | |
return | |
source_lang_code = languages[source_language] | |
target_lang_code = languages[target_language] | |
# Load the model | |
tokenizer, model = load_model(source_lang_code, target_lang_code) | |
if not (tokenizer and model): | |
return | |
# Real-time speech recognition | |
q = queue.Queue() | |
transcription_placeholder = st.empty() | |
translation_placeholder = st.empty() | |
audio_placeholder = st.empty() | |
if st.button("Start Real-Time Translation"): | |
st.write("Processing...") | |
# Start speech recognition in a separate thread | |
threading.Thread(target=recognize_speech_live, args=(q,), daemon=True).start() | |
while True: | |
if not q.empty(): | |
spoken_text = q.get() | |
transcription_placeholder.text_area("Transcribed Text:", spoken_text, height=100) | |
# Translate text | |
translated_text = translate_text(tokenizer, model, spoken_text) | |
translation_placeholder.text_area("Translated Text:", translated_text, height=100) | |
# Generate and play translated audio | |
translated_audio = text_to_audio(translated_text, target_lang_code) | |
audio_placeholder.audio(translated_audio, format="audio/mp3") | |
if __name__ == "__main__": | |
main() | |