Spaces:
Running
Running
import os | |
from pydub import AudioSegment | |
import noisereduce as nr | |
import webrtcvad | |
from ..config import settings | |
def clean_audio(input_path: str) -> str: | |
audio = AudioSegment.from_file(input_path) | |
samples = audio.get_array_of_samples() | |
reduced = nr.reduce_noise( | |
y=samples, sr=settings.SAMPLE_RATE | |
) | |
cleaned = AudioSegment( | |
reduced.tobytes(), | |
frame_rate=settings.SAMPLE_RATE, | |
sample_width=audio.sample_width, | |
channels=audio.channels | |
) | |
vad = webrtcvad.Vad(2) | |
trimmed = _apply_vad(cleaned, vad) | |
clean_path = input_path.replace('.wav', '_clean.wav') | |
trimmed.export(clean_path, format='wav') | |
return clean_path | |
def _apply_vad(audio: AudioSegment, vad: webrtcvad.Vad) -> AudioSegment: | |
frame_duration = 30 | |
frames = [] | |
samples = audio.get_array_of_samples() | |
for i in range(0, len(samples), int(settings.SAMPLE_RATE * frame_duration / 1000)): | |
frame = samples[i:i + int(settings.SAMPLE_RATE * frame_duration / 1000)] | |
is_speech = vad.is_speech( | |
frame.tobytes(), sample_rate=settings.SAMPLE_RATE | |
) | |
if is_speech: | |
frames.extend(frame) | |
return AudioSegment( | |
data=bytes(frames), | |
sample_width=audio.sample_width, | |
frame_rate=settings.SAMPLE_RATE, | |
channels=audio.channels | |
) | |