ai-agent / voice_util.py
abdibrahem's picture
Add voice to text feature and update the requirements file
e019578
raw
history blame
550 Bytes
import whisper
import numpy as np
import soundfile as sf
import io
from tempfile import NamedTemporaryFile
import os
def load_audio(file_bytes):
# Load audio and convert to Whisper's required format
audio, sr = sf.read(io.BytesIO(file_bytes))
# Convert to mono if stereo
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
# Resample to 16kHz if needed
if sr != 16000:
import librosa
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
return audio.astype(np.float32)