Spaces:

abdibrahem
/

ai-agent

Sleeping

ai-agent / voice_util.py

Update voice installation and settings

1526f72 29 days ago

1.41 kB

	# import whisper
	# import numpy as np
	# import soundfile as sf
	# import io
	# from tempfile import NamedTemporaryFile
	# import os



	# def load_audio(file_bytes):
	# # Load audio and convert to Whisper's required format
	# audio, sr = sf.read(io.BytesIO(file_bytes))

	# # Convert to mono if stereo
	# if len(audio.shape) > 1:
	# audio = np.mean(audio, axis=1)

	# # Resample to 16kHz if needed
	# if sr != 16000:
	# import librosa
	# audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)

	# return audio.astype(np.float32)
	from pydub import AudioSegment
	import numpy as np
	import soundfile as sf
	import io
	import librosa

	def load_audio(file_bytes):
	# Try to load audio using pydub for format compatibility
	try:
	audio_segment = AudioSegment.from_file(io.BytesIO(file_bytes))
	except Exception as e:
	raise ValueError("Could not read audio file. Format might be unsupported.") from e

	# Export to WAV in-memory
	wav_io = io.BytesIO()
	audio_segment.export(wav_io, format="wav")
	wav_io.seek(0)

	# Read the exported WAV using soundfile
	audio, sr = sf.read(wav_io)

	# Convert to mono
	if len(audio.shape) > 1:
	audio = np.mean(audio, axis=1)

	# Resample to 16kHz
	if sr != 16000:
	audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)

	return audio.astype(np.float32)