Spaces:

gautamthulasiraman
/

audio_lang_detector

Sleeping

gautamthulasiraman commited on Apr 24

Commit

f112b10

verified ·

1 Parent(s): ac7c9e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import gradio as gr
 from pydub import AudioSegment
 from faster_whisper import WhisperModel
-import os
-# Load model from Hugging Face (it will download & cache automatically)
-model = WhisperModel("openai/whisper-large-v3-turbo", compute_type="int8")
 def convert_to_wav(input_path):
     audio = AudioSegment.from_file(input_path)
     audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
@@ -13,16 +13,18 @@ def convert_to_wav(input_path):
     audio.export(output_path, format="wav")
     return output_path
 def transcribe_and_detect_lang(file_path):
     wav_path = convert_to_wav(file_path)
     segments, info = model.transcribe(wav_path)
     transcript = "\n".join([seg.text for seg in segments])
     return f"🌐 Detected Language: {info.language}\n\n📝 Transcript:\n{transcript}"
 gr.Interface(
     fn=transcribe_and_detect_lang,
-    inputs=gr.Audio(type="filepath", label="🎧 Upload Audio File"),
-    outputs=gr.Textbox(label="📋 Output"),
-    title="🌍 Whisper Language Identifier",
-    description="Upload an audio file in any language (Tamil, Hindi, English, etc.) and detect its language + get transcription."
 ).launch()

 import gradio as gr
 from pydub import AudioSegment
 from faster_whisper import WhisperModel
+# ✅ Correct model for faster-whisper (not OpenAI's)
+model = WhisperModel("guillaumekln/faster-whisper-large-v3", compute_type="int8")
+# 🔄 Convert to 16kHz mono WAV for whisper
 def convert_to_wav(input_path):
     audio = AudioSegment.from_file(input_path)
     audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
     audio.export(output_path, format="wav")
     return output_path
+# 🧠 Transcribe and detect language
 def transcribe_and_detect_lang(file_path):
     wav_path = convert_to_wav(file_path)
     segments, info = model.transcribe(wav_path)
     transcript = "\n".join([seg.text for seg in segments])
     return f"🌐 Detected Language: {info.language}\n\n📝 Transcript:\n{transcript}"
+# 🚀 UI with Gradio
 gr.Interface(
     fn=transcribe_and_detect_lang,
+    inputs=gr.Audio(type="filepath", label="🎧 Upload Audio"),
+    outputs=gr.Textbox(label="📋 Transcript + Language"),
+    title="🌍 Language Identifier with Whisper",
+    description="Upload any audio file (English, Tamil, Hindi, etc.), and this app detects the language and gives the transcript.",
 ).launch()