Spaces:

EdgarDataScientist
/

Accent_Classification

Runtime error

File size: 3,340 Bytes

9471255
fa15dd8
5cb7e51
fa15dd8
2685e79
5cb7e51
5ca9307
105a910
fa15dd8
5cb7e51
fa15dd8
5ca9307
 
 
 
5cb7e51
105a910
ba6451b
 
105a910
 
 
 
 
 
 
 
 
 
 
ba6451b
5ca9307
ba6451b
 
 
 
 
 
2685e79
ba6451b
 
 
5cb7e51
5ca9307
fa15dd8
5cb7e51
 
 
fa15dd8
 
 
5ca9307
 
5cb7e51
 
 
5ca9307
5cb7e51
fa15dd8
5ca9307
5cb7e51
 
 
9471255
5cb7e51
5ca9307
f6f6edc
 
5cb7e51
 
 
 
f6f6edc
 
 
5cb7e51
 
f6f6edc
5cb7e51
f6f6edc
5cb7e51
 
105a910
9471255
5cb7e51
 
9471255
5cb7e51
105a910
5cb7e51

import gradio as gr
from moviepy.editor import VideoFileClip
from speechbrain.pretrained import EncoderClassifier
import torchaudio
import requests
import os
import torch
import yt_dlp

CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"

def get_default_device():
    """Return the default device (cuda if available, else cpu)."""
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

def download_video(url):
    """Download video from YouTube or direct MP4 URL using yt_dlp or requests."""
    try:
        if "youtube.com" in url or "youtu.be" in url:
            output_path = "temp_video.%(ext)s"
            ydl_opts = {
                'format': 'best[ext=mp4]/best',
                'outtmpl': output_path,
                'quiet': True,
                'noplaylist': True,
            }
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info_dict = ydl.extract_info(url, download=True)
                downloaded_path = output_path.replace("%(ext)s", info_dict['ext'])
            return downloaded_path
        else:
            # Direct MP4 file download
            local_filename = "temp_video.mp4"
            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                with open(local_filename, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            return local_filename
    except Exception as e:
        raise RuntimeError(f"Failed to download video: {e}")

def extract_audio(video_path):
    """Extract audio from video and save as WAV file."""
    clip = VideoFileClip(video_path)
    audio_path = "temp_audio.wav"
    clip.audio.write_audiofile(audio_path, logger=None)
    clip.close()
    return audio_path

def classify_accent(audio_path):
    """Classify English accent from audio file using SpeechBrain model."""
    device = get_default_device()
    classifier = EncoderClassifier.from_hparams(
        source=CLASSIFIER,
        savedir="pretrained_models/accent_classifier",
        run_opts={"device": str(device)}
    )
    waveform, sample_rate = torchaudio.load(audio_path)
    prediction = classifier.classify_batch(waveform.to(device))
    predicted_accent = prediction[3][0]
    confidence = prediction[1].exp().max().item() * 100
    return predicted_accent, f"{confidence:.2f}%"

def process_video(url):
    """Main processing pipeline: download video, extract audio, classify accent."""
    video_path = None
    audio_path = None
    try:
        video_path = download_video(url)
        audio_path = extract_audio(video_path)
        accent, confidence = classify_accent(audio_path)
        return accent, confidence
    except Exception as e:
        return f"Error: {e}", ""
    finally:
        for f in [video_path, audio_path]:
            if f and os.path.exists(f):
                os.remove(f)

iface = gr.Interface(
    fn=process_video,
    inputs=gr.Textbox(label="Enter Public Video URL (YouTube or direct MP4 link)"),
    outputs=[
        gr.Textbox(label="Detected Accent"),
        gr.Textbox(label="Confidence Score")
    ],
    title="English Accent Classifier",
    description="Paste a public video URL (YouTube or MP4) to detect the English accent and confidence score."
)

if __name__ == "__main__":
    iface.launch()