File size: 2,270 Bytes
5ca170c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import subprocess
import sys

# Ensure yt_dlp is available
try:
    import yt_dlp as youtube_dl
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "yt-dlp"])
    import yt_dlp as youtube_dl

import gradio as gr
from transformers import pipeline

def download_video(video_url, filename="downloaded_video.mp4"):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': filename,
        'noplaylist': True,
        'quiet': True,
        'user_agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/115.0.0.0 Safari/537.36'
        )
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])
    return filename

def extract_audio(video_filename, audio_filename="extracted_audio.wav"):
    command = [
        "ffmpeg",
        "-y",
        "-i", video_filename,
        "-vn",
        "-acodec", "pcm_s16le",
        "-ar", "16000",
        "-ac", "1",
        audio_filename
    ]
    subprocess.run(command, check=True)
    return audio_filename

def classify_accent(audio_file, model_name="superb/wav2vec2-base-superb-sid"):
    classifier = pipeline("audio-classification", model=model_name)
    results = classifier(audio_file)
    if results:
        top = results[0]
        return f"Speaker ID (as accent proxy): {top['label']}\nConfidence: {top['score'] * 100:.2f}%"
    return "No result."

def accent_classifier(video_url):
    try:
        video_file = download_video(video_url)
        audio_file = extract_audio(video_file)
        result = classify_accent(audio_file)
    except Exception as e:
        result = f"Error occurred: {e}"
    finally:
        for f in ["downloaded_video.mp4", "extracted_audio.wav"]:
            if os.path.exists(f):
                os.remove(f)
    return result

iface = gr.Interface(
    fn=accent_classifier,
    inputs=gr.Textbox(label="Video URL", placeholder="Paste a public YouTube or Vimeo video link here"),
    outputs="text",
    title="Accent Classifier",
    description="Download a video, extract the audio, and classify the speaker (as an accent proxy) using a Hugging Face model."
)

if __name__ == "__main__":
    iface.launch()