Spaces:
Sleeping
Sleeping
# import whisper | |
# import numpy as np | |
# import soundfile as sf | |
# import io | |
# from tempfile import NamedTemporaryFile | |
# import os | |
# def load_audio(file_bytes): | |
# # Load audio and convert to Whisper's required format | |
# audio, sr = sf.read(io.BytesIO(file_bytes)) | |
# # Convert to mono if stereo | |
# if len(audio.shape) > 1: | |
# audio = np.mean(audio, axis=1) | |
# # Resample to 16kHz if needed | |
# if sr != 16000: | |
# import librosa | |
# audio = librosa.resample(audio, orig_sr=sr, target_sr=16000) | |
# return audio.astype(np.float32) | |
from pydub import AudioSegment | |
import numpy as np | |
import soundfile as sf | |
import io | |
import librosa | |
def load_audio(file_bytes): | |
# Try to load audio using pydub for format compatibility | |
try: | |
audio_segment = AudioSegment.from_file(io.BytesIO(file_bytes)) | |
except Exception as e: | |
raise ValueError("Could not read audio file. Format might be unsupported.") from e | |
# Export to WAV in-memory | |
wav_io = io.BytesIO() | |
audio_segment.export(wav_io, format="wav") | |
wav_io.seek(0) | |
# Read the exported WAV using soundfile | |
audio, sr = sf.read(wav_io) | |
# Convert to mono | |
if len(audio.shape) > 1: | |
audio = np.mean(audio, axis=1) | |
# Resample to 16kHz | |
if sr != 16000: | |
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000) | |
return audio.astype(np.float32) | |