Luigi commited on
Commit
a8b76d6
·
1 Parent(s): fe1810a

allow uer to enable / disable multilingual feature on whiper

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -143,7 +143,7 @@ def get_diarization_pipe():
143
 
144
 
145
  # —————— Whisper Transcription ——————
146
- def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
147
  """
148
  Generator-based streaming transcription with accumulation using Faster-Whisper on CPU.
149
  Yields (accumulated_text, diar_html) tuples for Gradio streaming.
@@ -172,6 +172,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
172
  language=None if language == "auto" else language,
173
  vad_filter=True,
174
  batch_size=16,
 
175
  )
176
  os.unlink(tmp.name)
177
  text = converter.convert("".join(s.text for s in segments).strip())
@@ -182,7 +183,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
182
 
183
 
184
  @spaces.GPU
185
- def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
186
  """
187
  Generator-based streaming transcription with accumulation using Faster-Whisper on CUDA.
188
  Yields (accumulated_text, diar_html) tuples for Gradio streaming.
@@ -212,6 +213,7 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
212
  language=None if language == "auto" else language,
213
  vad_filter=True,
214
  batch_size=16,
 
215
  )
216
  os.unlink(tmp.name)
217
  text = converter.convert("".join(s.text for s in segments).strip())
@@ -219,12 +221,12 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
219
  yield "", format_diarization_html(snippets)
220
  return
221
 
222
- def transcribe_fwhisper_stream(model_id, language, audio_path, device_sel):
223
  """Dispatch to CPU or GPU streaming generators, preserving two-value yields."""
224
  if device_sel == "GPU" and torch.cuda.is_available():
225
- yield from _transcribe_fwhisper_gpu_stream(model_id, language, audio_path)
226
  else:
227
- yield from _transcribe_fwhisper_cpu_stream(model_id, language, audio_path)
228
 
229
  # —————— SenseVoice Transcription ——————
230
  def _transcribe_sense_cpu_stream(model_id: str, language: str, audio_path: str,
@@ -324,6 +326,7 @@ with Demo:
324
  whisper_dd = gr.Dropdown(choices=WHISPER_MODELS, value=WHISPER_MODELS[0], label="Whisper Model")
325
  whisper_lang = gr.Dropdown(choices=WHISPER_LANGUAGES, value="auto", label="Whisper Language")
326
  device_radio = gr.Radio(choices=["GPU","CPU"], value="GPU", label="Device")
 
327
  btn_w = gr.Button("Transcribe with Faster-Whisper")
328
 
329
  with gr.Column():
@@ -353,7 +356,7 @@ with Demo:
353
  # wire the callbacks into those shared boxes
354
  btn_w.click(
355
  fn=transcribe_fwhisper_stream,
356
- inputs=[whisper_dd, whisper_lang, audio_input, device_radio],
357
  outputs=[out_w, out_w_d]
358
  )
359
  btn_s.click(
 
143
 
144
 
145
  # —————— Whisper Transcription ——————
146
+ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_multilingual_en):
147
  """
148
  Generator-based streaming transcription with accumulation using Faster-Whisper on CPU.
149
  Yields (accumulated_text, diar_html) tuples for Gradio streaming.
 
172
  language=None if language == "auto" else language,
173
  vad_filter=True,
174
  batch_size=16,
175
+ multilingual=whisper_multilingual_en,
176
  )
177
  os.unlink(tmp.name)
178
  text = converter.convert("".join(s.text for s in segments).strip())
 
183
 
184
 
185
  @spaces.GPU
186
+ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_multilingual_en):
187
  """
188
  Generator-based streaming transcription with accumulation using Faster-Whisper on CUDA.
189
  Yields (accumulated_text, diar_html) tuples for Gradio streaming.
 
213
  language=None if language == "auto" else language,
214
  vad_filter=True,
215
  batch_size=16,
216
+ multilingual=whisper_multilingual_en,
217
  )
218
  os.unlink(tmp.name)
219
  text = converter.convert("".join(s.text for s in segments).strip())
 
221
  yield "", format_diarization_html(snippets)
222
  return
223
 
224
+ def transcribe_fwhisper_stream(model_id, language, audio_path, device_sel, whisper_multilingual_en):
225
  """Dispatch to CPU or GPU streaming generators, preserving two-value yields."""
226
  if device_sel == "GPU" and torch.cuda.is_available():
227
+ yield from _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_multilingual_en)
228
  else:
229
+ yield from _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_multilingual_en)
230
 
231
  # —————— SenseVoice Transcription ——————
232
  def _transcribe_sense_cpu_stream(model_id: str, language: str, audio_path: str,
 
326
  whisper_dd = gr.Dropdown(choices=WHISPER_MODELS, value=WHISPER_MODELS[0], label="Whisper Model")
327
  whisper_lang = gr.Dropdown(choices=WHISPER_LANGUAGES, value="auto", label="Whisper Language")
328
  device_radio = gr.Radio(choices=["GPU","CPU"], value="GPU", label="Device")
329
+ whisper_multilingual_en = gr.Checkbox(label="Multilingual", value=True)
330
  btn_w = gr.Button("Transcribe with Faster-Whisper")
331
 
332
  with gr.Column():
 
356
  # wire the callbacks into those shared boxes
357
  btn_w.click(
358
  fn=transcribe_fwhisper_stream,
359
+ inputs=[whisper_dd, whisper_lang, audio_input, device_radio, whisper_multilingual_en],
360
  outputs=[out_w, out_w_d]
361
  )
362
  btn_s.click(