gautamthulasiraman commited on
Commit
f112b10
Β·
verified Β·
1 Parent(s): ac7c9e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
  from pydub import AudioSegment
3
  from faster_whisper import WhisperModel
4
- import os
5
 
6
- # Load model from Hugging Face (it will download & cache automatically)
7
- model = WhisperModel("openai/whisper-large-v3-turbo", compute_type="int8")
8
 
 
9
  def convert_to_wav(input_path):
10
  audio = AudioSegment.from_file(input_path)
11
  audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
@@ -13,16 +13,18 @@ def convert_to_wav(input_path):
13
  audio.export(output_path, format="wav")
14
  return output_path
15
 
 
16
  def transcribe_and_detect_lang(file_path):
17
  wav_path = convert_to_wav(file_path)
18
  segments, info = model.transcribe(wav_path)
19
  transcript = "\n".join([seg.text for seg in segments])
20
  return f"🌐 Detected Language: {info.language}\n\nπŸ“ Transcript:\n{transcript}"
21
 
 
22
  gr.Interface(
23
  fn=transcribe_and_detect_lang,
24
- inputs=gr.Audio(type="filepath", label="🎧 Upload Audio File"),
25
- outputs=gr.Textbox(label="πŸ“‹ Output"),
26
- title="🌍 Whisper Language Identifier",
27
- description="Upload an audio file in any language (Tamil, Hindi, English, etc.) and detect its language + get transcription."
28
  ).launch()
 
1
  import gradio as gr
2
  from pydub import AudioSegment
3
  from faster_whisper import WhisperModel
 
4
 
5
+ # βœ… Correct model for faster-whisper (not OpenAI's)
6
+ model = WhisperModel("guillaumekln/faster-whisper-large-v3", compute_type="int8")
7
 
8
+ # πŸ”„ Convert to 16kHz mono WAV for whisper
9
  def convert_to_wav(input_path):
10
  audio = AudioSegment.from_file(input_path)
11
  audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
 
13
  audio.export(output_path, format="wav")
14
  return output_path
15
 
16
+ # 🧠 Transcribe and detect language
17
  def transcribe_and_detect_lang(file_path):
18
  wav_path = convert_to_wav(file_path)
19
  segments, info = model.transcribe(wav_path)
20
  transcript = "\n".join([seg.text for seg in segments])
21
  return f"🌐 Detected Language: {info.language}\n\nπŸ“ Transcript:\n{transcript}"
22
 
23
+ # πŸš€ UI with Gradio
24
  gr.Interface(
25
  fn=transcribe_and_detect_lang,
26
+ inputs=gr.Audio(type="filepath", label="🎧 Upload Audio"),
27
+ outputs=gr.Textbox(label="πŸ“‹ Transcript + Language"),
28
+ title="🌍 Language Identifier with Whisper",
29
+ description="Upload any audio file (English, Tamil, Hindi, etc.), and this app detects the language and gives the transcript.",
30
  ).launch()