import streamlit as st import os import speech_recognition as sr from transformers import MarianMTModel, MarianTokenizer from gtts import gTTS from io import BytesIO import tempfile def load_model(source_lang, target_lang): model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" try: tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return tokenizer, model except Exception as e: st.error(f"Failed to load model for {source_lang} to {target_lang}. Ensure the language pair is supported. Error: {e}") return None, None def translate_text(tokenizer, model, text): if not text: return "" inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) outputs = model.generate(**inputs) translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return translated_text def audio_to_text(audio_file): recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) return text except sr.UnknownValueError: st.error("Speech Recognition could not understand the audio.") except sr.RequestError as e: st.error(f"Could not request results from Speech Recognition service; {e}") return "" def text_to_audio(text, lang): tts = gTTS(text=text, lang=lang) audio_file = BytesIO() tts.write_to_fp(audio_file) audio_file.seek(0) return audio_file def main(): st.title("Audio Language Translation App") st.write("Translate audio between multiple languages using open-source models.") # Language selection languages = { "English": "en", "Spanish": "es", "French": "fr", "German": "de", "Italian": "it", "Russian": "ru", "Chinese": "zh", "Japanese": "ja", "Korean": "ko", } source_language = st.selectbox("Select source language:", options=list(languages.keys())) target_language = st.selectbox("Select target language:", options=list(languages.keys())) if source_language == target_language: st.warning("Source and target languages must be different.") return source_lang_code = languages[source_language] target_lang_code = languages[target_language] # Load the model and tokenizer tokenizer, model = load_model(source_lang_code, target_lang_code) if tokenizer and model: # Audio input uploaded_audio = st.file_uploader("Upload an audio file (WAV format):", type=["wav"]) if uploaded_audio is not None: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: temp_audio.write(uploaded_audio.read()) temp_audio_path = temp_audio.name st.audio(uploaded_audio, format="audio/wav") with st.spinner("Converting audio to text..."): input_text = audio_to_text(temp_audio_path) st.success("Audio converted to text!") st.text_area("Transcribed text:", input_text, height=100) if st.button("Translate and Generate Audio"): with st.spinner("Translating text..."): translated_text = translate_text(tokenizer, model, input_text) st.success("Translation completed!") st.text_area("Translated text:", translated_text, height=100) with st.spinner("Generating audio..."): output_audio = text_to_audio(translated_text, target_lang_code) st.success("Audio generated!") st.audio(output_audio, format="audio/mp3") if __name__ == "__main__": main()