Spaces:

Seicas
/

VoiceToWrite

Running

File size: 1,352 Bytes

41979e6

import os
from pydub import AudioSegment
import noisereduce as nr
import webrtcvad
from ..config import settings

def clean_audio(input_path: str) -> str:
    audio = AudioSegment.from_file(input_path)
    samples = audio.get_array_of_samples()
    reduced = nr.reduce_noise(
        y=samples, sr=settings.SAMPLE_RATE
    )
    cleaned = AudioSegment(
        reduced.tobytes(),
        frame_rate=settings.SAMPLE_RATE,
        sample_width=audio.sample_width,
        channels=audio.channels
    )
    vad = webrtcvad.Vad(2)
    trimmed = _apply_vad(cleaned, vad)
    clean_path = input_path.replace('.wav', '_clean.wav')
    trimmed.export(clean_path, format='wav')
    return clean_path

def _apply_vad(audio: AudioSegment, vad: webrtcvad.Vad) -> AudioSegment:
    frame_duration = 30
    frames = []
    samples = audio.get_array_of_samples()
    for i in range(0, len(samples), int(settings.SAMPLE_RATE * frame_duration / 1000)):
        frame = samples[i:i + int(settings.SAMPLE_RATE * frame_duration / 1000)]
        is_speech = vad.is_speech(
            frame.tobytes(), sample_rate=settings.SAMPLE_RATE
        )
        if is_speech:
            frames.extend(frame)
    return AudioSegment(
        data=bytes(frames),
        sample_width=audio.sample_width,
        frame_rate=settings.SAMPLE_RATE,
        channels=audio.channels
    )