import gradio as gr import librosa from asr import transcribe, ASR_EXAMPLES, ASR_NOTE from lid import identify # Import Language Identification model # Function to detect language and transcribe speech def auto_transcribe(audio): detected_lang = identify(audio) # Detect language print(f"Detected Language: {detected_lang}") # Debugging: Print detected language # Check if the detected language is valid if detected_lang not in ["eng", "swh", "en", "sw"]: return f"Error: Only English and Swahili are supported. Detected: {detected_lang}" # Normalize language codes if needed if detected_lang in ["en"]: detected_lang = "eng" elif detected_lang in ["sw"]: detected_lang = "swh" # Transcribe using detected language return transcribe(audio, lang=detected_lang) # Speech-to-Text Interface mms_transcribe = gr.Interface( fn=auto_transcribe, inputs=gr.Audio(), outputs="text", examples=ASR_EXAMPLES, title="Speech-to-Text (Auto Language Detection)", description="Automatically detects whether speech is in Swahili or English and transcribes it.", article=ASR_NOTE, allow_flagging="never", ) # Main Gradio App with gr.Blocks() as demo: gr.Markdown("
MMS Speech-to-Text
") gr.HTML("