Spaces:
Running
Running
import torch | |
import torchaudio | |
import soundfile as sf | |
import librosa | |
import librosa.display | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from asteroid.models import BaseModel | |
import gradio as gr | |
import os | |
import uuid | |
# Load pretrained ConvTasNet model | |
print("Loading model...") | |
model = BaseModel.from_pretrained("JorisCos/ConvTasNet_Libri2Mix_sepnoisy_16k") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = model.to(device).eval() | |
print("Model loaded successfully β ") | |
def denoise_and_visualize(audio_path): | |
if audio_path is None: | |
return "Please upload an audio file.", None, None, None | |
try: | |
# Unique ID to avoid overwriting files | |
uid = str(uuid.uuid4()) | |
output_dir = "outputs" | |
os.makedirs(output_dir, exist_ok=True) | |
# Load & resample input to 16kHz mono | |
wav, sr = torchaudio.load(audio_path) | |
if sr != 16000: | |
wav = torchaudio.functional.resample(wav, sr, 16000) | |
wav = wav.mean(dim=0, keepdim=True).to(device) | |
# Model inference | |
with torch.no_grad(): | |
est_sources = model.separate(wav) | |
clean_audio = est_sources[:, 0, :].cpu().squeeze().numpy() | |
# Save output audio | |
audio_output = os.path.join(output_dir, f"cleaned_{uid}.wav") | |
sf.write(audio_output, clean_audio, 16000) | |
# Create spectrograms | |
orig, _ = librosa.load(audio_path, sr=sr) | |
den, _ = librosa.load(audio_output, sr=16000) | |
plt.figure(figsize=(12, 5)) | |
plt.subplot(1, 2, 1) | |
D_orig = librosa.amplitude_to_db(np.abs(librosa.stft(orig)), ref=np.max) | |
librosa.display.specshow(D_orig, sr=sr, y_axis='log', x_axis='time') | |
plt.title("Original Noisy") | |
plt.colorbar(format='%+2.0f dB') | |
plt.subplot(1, 2, 2) | |
D_clean = librosa.amplitude_to_db(np.abs(librosa.stft(den)), ref=np.max) | |
librosa.display.specshow(D_clean, sr=16000, y_axis='log', x_axis='time') | |
plt.title("Denoised Output") | |
plt.colorbar(format='%+2.0f dB') | |
plt.tight_layout() | |
spectrogram_output = os.path.join(output_dir, f"spectrogram_{uid}.png") | |
plt.savefig(spectrogram_output) | |
plt.close() | |
return "β Denoising complete!", audio_output, spectrogram_output, (16000, clean_audio) | |
except Exception as e: | |
return f"Error processing audio: {e}", None, None, None | |
# Gradio UI | |
iface = gr.Interface( | |
fn=denoise_and_visualize, | |
inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"), | |
outputs=[ | |
gr.Textbox(label="Status"), | |
gr.Audio(label="Denoised Audio"), | |
gr.Image(label="Spectrogram Comparison"), | |
gr.Audio(label="Denoised Audio (16kHz)"), | |
], | |
title="ConvTasNet AI Audio Denoiser", | |
description="Upload a noisy audio file. This app removes background noise using ConvTasNet. Spectrograms show before & after.", | |
) | |
iface.launch() | |