VoiceCraft_gradio

Running on Zero

alexnasa commited on May 26

Commit

73cae84

verified ·

1 Parent(s): ecb1630

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,7 +70,22 @@ class WhisperModel:
 class WhisperxModel:
     def __init__(self, model_name, align_model: WhisperxAlignModel):
         from whisperx import load_model
-        self.model = load_model(model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None})
         self.align_model = align_model
     def transcribe(self, audio_path):
@@ -142,7 +157,7 @@ def transcribe(seed, audio_path):
     # then load the ASR+alignment combo
     transcribe_model = WhisperxModel(
-        model_name="large-v3-turbo",   # or "base.en", "small.en", etc.
         align_model=aligner
     )

 class WhisperxModel:
     def __init__(self, model_name, align_model: WhisperxAlignModel):
         from whisperx import load_model
+        # build an asr_options dict that matches the new TranscriptionOptions signature
+        asr_opts = {
+            "suppress_numerals": True,
+            "max_new_tokens": None,
+            "clip_timestamps": None,
+            "hallucination_silence_threshold": None,
+            # new required args:
+            "multilingual": False,
+            "hotwords": {}   # or a list like [], or a dict mapping words→boost weights
+        }
+        # pass them through
+        self.model = load_model(
+            model_name,
+            device,
+            asr_options=asr_opts
+        )
         self.align_model = align_model
     def transcribe(self, audio_path):
     # then load the ASR+alignment combo
     transcribe_model = WhisperxModel(
+        model_name="large.en",   # or "base.en", "small.en", etc.
         align_model=aligner
     )