# import whisper # import numpy as np # import soundfile as sf # import io # from tempfile import NamedTemporaryFile # import os # def load_audio(file_bytes): # # Load audio and convert to Whisper's required format # audio, sr = sf.read(io.BytesIO(file_bytes)) # # Convert to mono if stereo # if len(audio.shape) > 1: # audio = np.mean(audio, axis=1) # # Resample to 16kHz if needed # if sr != 16000: # import librosa # audio = librosa.resample(audio, orig_sr=sr, target_sr=16000) # return audio.astype(np.float32) from pydub import AudioSegment import numpy as np import soundfile as sf import io import librosa def load_audio(file_bytes): # Try to load audio using pydub for format compatibility try: audio_segment = AudioSegment.from_file(io.BytesIO(file_bytes)) except Exception as e: raise ValueError("Could not read audio file. Format might be unsupported.") from e # Export to WAV in-memory wav_io = io.BytesIO() audio_segment.export(wav_io, format="wav") wav_io.seek(0) # Read the exported WAV using soundfile audio, sr = sf.read(wav_io) # Convert to mono if len(audio.shape) > 1: audio = np.mean(audio, axis=1) # Resample to 16kHz if sr != 16000: audio = librosa.resample(audio, orig_sr=sr, target_sr=16000) return audio.astype(np.float32)