Spaces:
Running
on
Zero
Running
on
Zero
allow uer to enable / disable multilingual feature on whiper
Browse files
app.py
CHANGED
@@ -143,7 +143,7 @@ def get_diarization_pipe():
|
|
143 |
|
144 |
|
145 |
# —————— Whisper Transcription ——————
|
146 |
-
def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
|
147 |
"""
|
148 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CPU.
|
149 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
@@ -172,6 +172,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
|
|
172 |
language=None if language == "auto" else language,
|
173 |
vad_filter=True,
|
174 |
batch_size=16,
|
|
|
175 |
)
|
176 |
os.unlink(tmp.name)
|
177 |
text = converter.convert("".join(s.text for s in segments).strip())
|
@@ -182,7 +183,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
|
|
182 |
|
183 |
|
184 |
@spaces.GPU
|
185 |
-
def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
|
186 |
"""
|
187 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CUDA.
|
188 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
@@ -212,6 +213,7 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
|
|
212 |
language=None if language == "auto" else language,
|
213 |
vad_filter=True,
|
214 |
batch_size=16,
|
|
|
215 |
)
|
216 |
os.unlink(tmp.name)
|
217 |
text = converter.convert("".join(s.text for s in segments).strip())
|
@@ -219,12 +221,12 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
|
|
219 |
yield "", format_diarization_html(snippets)
|
220 |
return
|
221 |
|
222 |
-
def transcribe_fwhisper_stream(model_id, language, audio_path, device_sel):
|
223 |
"""Dispatch to CPU or GPU streaming generators, preserving two-value yields."""
|
224 |
if device_sel == "GPU" and torch.cuda.is_available():
|
225 |
-
yield from _transcribe_fwhisper_gpu_stream(model_id, language, audio_path)
|
226 |
else:
|
227 |
-
yield from _transcribe_fwhisper_cpu_stream(model_id, language, audio_path)
|
228 |
|
229 |
# —————— SenseVoice Transcription ——————
|
230 |
def _transcribe_sense_cpu_stream(model_id: str, language: str, audio_path: str,
|
@@ -324,6 +326,7 @@ with Demo:
|
|
324 |
whisper_dd = gr.Dropdown(choices=WHISPER_MODELS, value=WHISPER_MODELS[0], label="Whisper Model")
|
325 |
whisper_lang = gr.Dropdown(choices=WHISPER_LANGUAGES, value="auto", label="Whisper Language")
|
326 |
device_radio = gr.Radio(choices=["GPU","CPU"], value="GPU", label="Device")
|
|
|
327 |
btn_w = gr.Button("Transcribe with Faster-Whisper")
|
328 |
|
329 |
with gr.Column():
|
@@ -353,7 +356,7 @@ with Demo:
|
|
353 |
# wire the callbacks into those shared boxes
|
354 |
btn_w.click(
|
355 |
fn=transcribe_fwhisper_stream,
|
356 |
-
inputs=[whisper_dd, whisper_lang, audio_input, device_radio],
|
357 |
outputs=[out_w, out_w_d]
|
358 |
)
|
359 |
btn_s.click(
|
|
|
143 |
|
144 |
|
145 |
# —————— Whisper Transcription ——————
|
146 |
+
def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_multilingual_en):
|
147 |
"""
|
148 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CPU.
|
149 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
|
|
172 |
language=None if language == "auto" else language,
|
173 |
vad_filter=True,
|
174 |
batch_size=16,
|
175 |
+
multilingual=whisper_multilingual_en,
|
176 |
)
|
177 |
os.unlink(tmp.name)
|
178 |
text = converter.convert("".join(s.text for s in segments).strip())
|
|
|
183 |
|
184 |
|
185 |
@spaces.GPU
|
186 |
+
def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_multilingual_en):
|
187 |
"""
|
188 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CUDA.
|
189 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
|
|
213 |
language=None if language == "auto" else language,
|
214 |
vad_filter=True,
|
215 |
batch_size=16,
|
216 |
+
multilingual=whisper_multilingual_en,
|
217 |
)
|
218 |
os.unlink(tmp.name)
|
219 |
text = converter.convert("".join(s.text for s in segments).strip())
|
|
|
221 |
yield "", format_diarization_html(snippets)
|
222 |
return
|
223 |
|
224 |
+
def transcribe_fwhisper_stream(model_id, language, audio_path, device_sel, whisper_multilingual_en):
|
225 |
"""Dispatch to CPU or GPU streaming generators, preserving two-value yields."""
|
226 |
if device_sel == "GPU" and torch.cuda.is_available():
|
227 |
+
yield from _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_multilingual_en)
|
228 |
else:
|
229 |
+
yield from _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_multilingual_en)
|
230 |
|
231 |
# —————— SenseVoice Transcription ——————
|
232 |
def _transcribe_sense_cpu_stream(model_id: str, language: str, audio_path: str,
|
|
|
326 |
whisper_dd = gr.Dropdown(choices=WHISPER_MODELS, value=WHISPER_MODELS[0], label="Whisper Model")
|
327 |
whisper_lang = gr.Dropdown(choices=WHISPER_LANGUAGES, value="auto", label="Whisper Language")
|
328 |
device_radio = gr.Radio(choices=["GPU","CPU"], value="GPU", label="Device")
|
329 |
+
whisper_multilingual_en = gr.Checkbox(label="Multilingual", value=True)
|
330 |
btn_w = gr.Button("Transcribe with Faster-Whisper")
|
331 |
|
332 |
with gr.Column():
|
|
|
356 |
# wire the callbacks into those shared boxes
|
357 |
btn_w.click(
|
358 |
fn=transcribe_fwhisper_stream,
|
359 |
+
inputs=[whisper_dd, whisper_lang, audio_input, device_radio, whisper_multilingual_en],
|
360 |
outputs=[out_w, out_w_d]
|
361 |
)
|
362 |
btn_s.click(
|