import streamlit as st import speech_recognition as sr from transformers import MarianMTModel, MarianTokenizer from gtts import gTTS from io import BytesIO import queue import threading import pyaudio def load_model(source_lang, target_lang): model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" try: tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return tokenizer, model except Exception as e: st.error(f"Failed to load model for {source_lang} to {target_lang}. Ensure the language pair is supported. Error: {e}") return None, None def translate_text(tokenizer, model, text): if not text: return "" inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) outputs = model.generate(**inputs) translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return translated_text def text_to_audio(text, lang): tts = gTTS(text=text, lang=lang) audio_file = BytesIO() tts.write_to_fp(audio_file) audio_file.seek(0) return audio_file def recognize_speech_live(q): recognizer = sr.Recognizer() mic = sr.Microphone() with mic as source: recognizer.adjust_for_ambient_noise(source) st.info("Start speaking...") while True: try: audio_data = recognizer.listen(source) text = recognizer.recognize_google(audio_data) q.put(text) except sr.UnknownValueError: q.put("[Unintelligible]") except Exception as e: st.error(f"Error during speech recognition: {e}") break def main(): st.title("Real-Time Audio Language Translation") st.write("Translate spoken words in real time using open-source models.") # Language selection languages = { "English": "en", "Spanish": "es", "French": "fr", "German": "de", "Italian": "it", "Russian": "ru", "Chinese": "zh", "Japanese": "ja", "Korean": "ko", } source_language = st.selectbox("Select source language:", options=list(languages.keys())) target_language = st.selectbox("Select target language:", options=list(languages.keys())) if source_language == target_language: st.warning("Source and target languages must be different.") return source_lang_code = languages[source_language] target_lang_code = languages[target_language] # Load the model tokenizer, model = load_model(source_lang_code, target_lang_code) if not (tokenizer and model): return # Real-time speech recognition q = queue.Queue() transcription_placeholder = st.empty() translation_placeholder = st.empty() audio_placeholder = st.empty() if st.button("Start Real-Time Translation"): st.write("Processing...") # Start speech recognition in a separate thread threading.Thread(target=recognize_speech_live, args=(q,), daemon=True).start() while True: if not q.empty(): spoken_text = q.get() transcription_placeholder.text_area("Transcribed Text:", spoken_text, height=100) # Translate text translated_text = translate_text(tokenizer, model, spoken_text) translation_placeholder.text_area("Translated Text:", translated_text, height=100) # Generate and play translated audio translated_audio = text_to_audio(translated_text, target_lang_code) audio_placeholder.audio(translated_audio, format="audio/mp3") if __name__ == "__main__": main()