File size: 3,033 Bytes
de05d78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import torch
import torchaudio
import soundfile as sf
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from asteroid.models import BaseModel
import gradio as gr
import os
import uuid

# Load pretrained ConvTasNet model
print("Loading model...")
model = BaseModel.from_pretrained("JorisCos/ConvTasNet_Libri2Mix_sepnoisy_16k")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device).eval()
print("Model loaded successfully ✅")

def denoise_and_visualize(audio_path):
    if audio_path is None:
        return "Please upload an audio file.", None, None, None

    try:
        # Unique ID to avoid overwriting files
        uid = str(uuid.uuid4())
        output_dir = "outputs"
        os.makedirs(output_dir, exist_ok=True)

        # Load & resample input to 16kHz mono
        wav, sr = torchaudio.load(audio_path)
        if sr != 16000:
            wav = torchaudio.functional.resample(wav, sr, 16000)
        wav = wav.mean(dim=0, keepdim=True).to(device)

        # Model inference
        with torch.no_grad():
            est_sources = model.separate(wav)
        clean_audio = est_sources[:, 0, :].cpu().squeeze().numpy()

        # Save output audio
        audio_output = os.path.join(output_dir, f"cleaned_{uid}.wav")
        sf.write(audio_output, clean_audio, 16000)

        # Create spectrograms
        orig, _ = librosa.load(audio_path, sr=sr)
        den, _ = librosa.load(audio_output, sr=16000)

        plt.figure(figsize=(12, 5))
        plt.subplot(1, 2, 1)
        D_orig = librosa.amplitude_to_db(np.abs(librosa.stft(orig)), ref=np.max)
        librosa.display.specshow(D_orig, sr=sr, y_axis='log', x_axis='time')
        plt.title("Original Noisy")
        plt.colorbar(format='%+2.0f dB')

        plt.subplot(1, 2, 2)
        D_clean = librosa.amplitude_to_db(np.abs(librosa.stft(den)), ref=np.max)
        librosa.display.specshow(D_clean, sr=16000, y_axis='log', x_axis='time')
        plt.title("Denoised Output")
        plt.colorbar(format='%+2.0f dB')

        plt.tight_layout()
        spectrogram_output = os.path.join(output_dir, f"spectrogram_{uid}.png")
        plt.savefig(spectrogram_output)
        plt.close()

        return "✅ Denoising complete!", audio_output, spectrogram_output, (16000, clean_audio)

    except Exception as e:
        return f"Error processing audio: {e}", None, None, None

# Gradio UI
iface = gr.Interface(
    fn=denoise_and_visualize,
    inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
    outputs=[
        gr.Textbox(label="Status"),
        gr.Audio(label="Denoised Audio"),
        gr.Image(label="Spectrogram Comparison"),
        gr.Audio(label="Denoised Audio (16kHz)"),
    ],
    title="ConvTasNet AI Audio Denoiser",
    description="Upload a noisy audio file. This app removes background noise using ConvTasNet. Spectrograms show before & after.",
)

iface.launch()