File size: 550 Bytes
e019578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import whisper
import numpy as np
import soundfile as sf
import io
from tempfile import NamedTemporaryFile
import os



def load_audio(file_bytes):
    # Load audio and convert to Whisper's required format
    audio, sr = sf.read(io.BytesIO(file_bytes))
    
    # Convert to mono if stereo
    if len(audio.shape) > 1:
        audio = np.mean(audio, axis=1)
    
    # Resample to 16kHz if needed
    if sr != 16000:
        import librosa
        audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
    
    return audio.astype(np.float32)