Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -122,16 +122,13 @@ class AudioProcessor:
|
|
122 |
|
123 |
return mfcc_tensor
|
124 |
|
125 |
-
# Speech recognition function
|
126 |
def recognize_speech(audio_path):
|
127 |
if speech_recognizer is None or speech_processor is None:
|
128 |
return "Speech recognition model not available"
|
129 |
|
130 |
try:
|
131 |
-
# Read audio file
|
132 |
audio_data, sr = sf.read(audio_path)
|
133 |
|
134 |
-
# Resample to 16kHz if needed
|
135 |
if sr != 16000:
|
136 |
audio_data = np.interp(
|
137 |
np.linspace(0, len(audio_data), int(16000 * len(audio_data) / sr)),
|
@@ -140,15 +137,27 @@ def recognize_speech(audio_path):
|
|
140 |
)
|
141 |
sr = 16000
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
150 |
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
except Exception as e:
|
153 |
return f"Speech recognition error: {str(e)}"
|
154 |
|
|
|
122 |
|
123 |
return mfcc_tensor
|
124 |
|
|
|
125 |
def recognize_speech(audio_path):
|
126 |
if speech_recognizer is None or speech_processor is None:
|
127 |
return "Speech recognition model not available"
|
128 |
|
129 |
try:
|
|
|
130 |
audio_data, sr = sf.read(audio_path)
|
131 |
|
|
|
132 |
if sr != 16000:
|
133 |
audio_data = np.interp(
|
134 |
np.linspace(0, len(audio_data), int(16000 * len(audio_data) / sr)),
|
|
|
137 |
)
|
138 |
sr = 16000
|
139 |
|
140 |
+
inputs = speech_processor(
|
141 |
+
audio_data,
|
142 |
+
sampling_rate=sr,
|
143 |
+
return_tensors="pt"
|
144 |
+
).to(device)
|
145 |
|
146 |
+
generated_ids = speech_recognizer.generate(
|
147 |
+
input_features=inputs["input_features"],
|
148 |
+
max_length=100,
|
149 |
+
num_beams=5, # Changed from 1 to 5 for better results
|
150 |
+
early_stopping=True,
|
151 |
+
no_repeat_ngram_size=2
|
152 |
+
)
|
153 |
|
154 |
+
transcription = speech_processor.batch_decode(
|
155 |
+
generated_ids,
|
156 |
+
skip_special_tokens=True
|
157 |
+
)[0]
|
158 |
+
|
159 |
+
return transcription.strip()
|
160 |
+
|
161 |
except Exception as e:
|
162 |
return f"Speech recognition error: {str(e)}"
|
163 |
|