Spaces:
Build error
Build error
import streamlit as st | |
import os | |
import speech_recognition as sr | |
from transformers import MarianMTModel, MarianTokenizer | |
from gtts import gTTS | |
from io import BytesIO | |
import tempfile | |
def load_model(source_lang, target_lang): | |
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" | |
try: | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
return tokenizer, model | |
except Exception as e: | |
st.error(f"Failed to load model for {source_lang} to {target_lang}. Ensure the language pair is supported. Error: {e}") | |
return None, None | |
def translate_text(tokenizer, model, text): | |
if not text: | |
return "" | |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
outputs = model.generate(**inputs) | |
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return translated_text | |
def audio_to_text(audio_file): | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(audio_file) as source: | |
audio_data = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio_data) | |
return text | |
except sr.UnknownValueError: | |
st.error("Speech Recognition could not understand the audio.") | |
except sr.RequestError as e: | |
st.error(f"Could not request results from Speech Recognition service; {e}") | |
return "" | |
def text_to_audio(text, lang): | |
tts = gTTS(text=text, lang=lang) | |
audio_file = BytesIO() | |
tts.write_to_fp(audio_file) | |
audio_file.seek(0) | |
return audio_file | |
def main(): | |
st.title("Audio Language Translation App") | |
st.write("Translate audio between multiple languages using open-source models.") | |
# Language selection | |
languages = { | |
"English": "en", | |
"Spanish": "es", | |
"French": "fr", | |
"German": "de", | |
"Italian": "it", | |
"Russian": "ru", | |
"Chinese": "zh", | |
"Japanese": "ja", | |
"Korean": "ko", | |
} | |
source_language = st.selectbox("Select source language:", options=list(languages.keys())) | |
target_language = st.selectbox("Select target language:", options=list(languages.keys())) | |
if source_language == target_language: | |
st.warning("Source and target languages must be different.") | |
return | |
source_lang_code = languages[source_language] | |
target_lang_code = languages[target_language] | |
# Load the model and tokenizer | |
tokenizer, model = load_model(source_lang_code, target_lang_code) | |
if tokenizer and model: | |
# Audio input | |
uploaded_audio = st.file_uploader("Upload an audio file (WAV format):", type=["wav"]) | |
if uploaded_audio is not None: | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
temp_audio.write(uploaded_audio.read()) | |
temp_audio_path = temp_audio.name | |
st.audio(uploaded_audio, format="audio/wav") | |
with st.spinner("Converting audio to text..."): | |
input_text = audio_to_text(temp_audio_path) | |
st.success("Audio converted to text!") | |
st.text_area("Transcribed text:", input_text, height=100) | |
if st.button("Translate and Generate Audio"): | |
with st.spinner("Translating text..."): | |
translated_text = translate_text(tokenizer, model, input_text) | |
st.success("Translation completed!") | |
st.text_area("Translated text:", translated_text, height=100) | |
with st.spinner("Generating audio..."): | |
output_audio = text_to_audio(translated_text, target_lang_code) | |
st.success("Audio generated!") | |
st.audio(output_audio, format="audio/mp3") | |
if __name__ == "__main__": | |
main() | |