import os from pydub import AudioSegment import noisereduce as nr import webrtcvad from ..config import settings def clean_audio(input_path: str) -> str: audio = AudioSegment.from_file(input_path) samples = audio.get_array_of_samples() reduced = nr.reduce_noise( y=samples, sr=settings.SAMPLE_RATE ) cleaned = AudioSegment( reduced.tobytes(), frame_rate=settings.SAMPLE_RATE, sample_width=audio.sample_width, channels=audio.channels ) vad = webrtcvad.Vad(2) trimmed = _apply_vad(cleaned, vad) clean_path = input_path.replace('.wav', '_clean.wav') trimmed.export(clean_path, format='wav') return clean_path def _apply_vad(audio: AudioSegment, vad: webrtcvad.Vad) -> AudioSegment: frame_duration = 30 frames = [] samples = audio.get_array_of_samples() for i in range(0, len(samples), int(settings.SAMPLE_RATE * frame_duration / 1000)): frame = samples[i:i + int(settings.SAMPLE_RATE * frame_duration / 1000)] is_speech = vad.is_speech( frame.tobytes(), sample_rate=settings.SAMPLE_RATE ) if is_speech: frames.extend(frame) return AudioSegment( data=bytes(frames), sample_width=audio.sample_width, frame_rate=settings.SAMPLE_RATE, channels=audio.channels )