Spaces:

EdgarDataScientist
/

Accent_Classification

Runtime error

File size: 2,927 Bytes

9471255
fa15dd8
5cb7e51
fa15dd8
5cb7e51
 
fa15dd8
5cb7e51
fa15dd8
5cb7e51
ba6451b
 
 
 
 
 
 
f6f6edc
 
ba6451b
 
 
 
 
 
 
 
 
f6f6edc
 
ba6451b
 
 
fa15dd8
f6f6edc
5cb7e51
fa15dd8
5cb7e51
 
 
fa15dd8
 
 
5cb7e51
 
 
 
 
fa15dd8
5cb7e51
 
 
 
 
9471255
5cb7e51
f6f6edc
 
5cb7e51
 
 
 
f6f6edc
 
 
5cb7e51
 
f6f6edc
5cb7e51
f6f6edc
fa15dd8
5cb7e51
 
 
 
9471255
5cb7e51
 
9471255
5cb7e51

import gradio as gr
from moviepy.editor import VideoFileClip
from speechbrain.pretrained import EncoderClassifier
import torchaudio
from pytube import YouTube
import os

CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"

def download_video(url):
    """Handles YouTube and direct video links with error handling"""
    try:
        if "youtube.com" in url or "youtu.be" in url:
            yt = YouTube(url)
            stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
            if not stream:
                raise ValueError("No suitable video stream found.")
            video_path = stream.download()  # Store the download path
            return video_path
        else:
            # For direct MP4 links, download file
            import requests
            local_filename = "temp_video.mp4"
            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                with open(local_filename, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            video_path = local_filename # Store the download path
            return video_path
    except Exception as e:
        raise RuntimeError(f"Failed to download video: {e}")



def extract_audio(video_path):
    clip = VideoFileClip(video_path)
    audio_path = "temp_audio.wav"
    clip.audio.write_audiofile(audio_path, logger=None)
    clip.close()
    return audio_path

def classify_accent(audio_path):
    classifier = EncoderClassifier.from_hparams(
        source=CLASSIFIER,
        savedir="pretrained_models/accent_classifier",
        run_opts={"device":"cpu"}  # or "cuda" if GPU available
    )
    waveform, sample_rate = torchaudio.load(audio_path)
    prediction = classifier.classify_batch(waveform)
    # prediction format: (scores, probabilities, embeddings, predicted_labels)
    predicted_accent = prediction[3][0]
    confidence = prediction[1].exp().max().item() * 100
    return predicted_accent, f"{confidence:.2f}%"

def process_video(url):
    video_path = None
    audio_path = None
    try:
        video_path = download_video(url)
        audio_path = extract_audio(video_path)
        accent, confidence = classify_accent(audio_path)
        return accent, confidence
    except Exception as e:
        return f"Error: {e}", ""
    finally:
        for f in [video_path, audio_path]:
            if f and os.path.exists(f):
                os.remove(f)


# Gradio interface
iface = gr.Interface(
    fn=process_video,
    inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
    outputs=[
        gr.Textbox(label="Detected Accent"),
        gr.Textbox(label="Confidence Score")
    ],
    title="English Accent Classifier",
    description="Paste a public video URL to detect the English accent and confidence score."
)

if __name__ == "__main__":
    iface.launch()