VoiceToWrite / preprocessing.py
Seicas's picture
Upload 10 files
41979e6 verified
raw
history blame
1.35 kB
import os
from pydub import AudioSegment
import noisereduce as nr
import webrtcvad
from ..config import settings
def clean_audio(input_path: str) -> str:
audio = AudioSegment.from_file(input_path)
samples = audio.get_array_of_samples()
reduced = nr.reduce_noise(
y=samples, sr=settings.SAMPLE_RATE
)
cleaned = AudioSegment(
reduced.tobytes(),
frame_rate=settings.SAMPLE_RATE,
sample_width=audio.sample_width,
channels=audio.channels
)
vad = webrtcvad.Vad(2)
trimmed = _apply_vad(cleaned, vad)
clean_path = input_path.replace('.wav', '_clean.wav')
trimmed.export(clean_path, format='wav')
return clean_path
def _apply_vad(audio: AudioSegment, vad: webrtcvad.Vad) -> AudioSegment:
frame_duration = 30
frames = []
samples = audio.get_array_of_samples()
for i in range(0, len(samples), int(settings.SAMPLE_RATE * frame_duration / 1000)):
frame = samples[i:i + int(settings.SAMPLE_RATE * frame_duration / 1000)]
is_speech = vad.is_speech(
frame.tobytes(), sample_rate=settings.SAMPLE_RATE
)
if is_speech:
frames.extend(frame)
return AudioSegment(
data=bytes(frames),
sample_width=audio.sample_width,
frame_rate=settings.SAMPLE_RATE,
channels=audio.channels
)