Spaces:
Runtime error
Runtime error
import gradio as gr | |
from moviepy.editor import VideoFileClip | |
from speechbrain.pretrained import EncoderClassifier | |
import torchaudio | |
from pytube import YouTube | |
import os | |
CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english" | |
def download_video(url): | |
"""Handles YouTube and direct video links""" | |
if "youtube.com" in url: | |
yt = YouTube(url) | |
stream = yt.streams.filter(progressive=True, file_extension='mp4').first() | |
video_path = stream.download() | |
return video_path | |
else: # Direct download (assumes URL is direct mp4 link) | |
# Download file locally | |
import requests | |
local_filename = "temp_video.mp4" | |
with requests.get(url, stream=True) as r: | |
r.raise_for_status() | |
with open(local_filename, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=8192): | |
f.write(chunk) | |
return local_filename | |
def extract_audio(video_path): | |
clip = VideoFileClip(video_path) | |
audio_path = "temp_audio.wav" | |
clip.audio.write_audiofile(audio_path, logger=None) | |
clip.close() | |
return audio_path | |
def classify_accent(audio_path): | |
classifier = EncoderClassifier.from_hparams( | |
source=CLASSIFIER, | |
savedir="pretrained_models/accent_classifier", | |
run_opts={"device":"cpu"} # or "cuda" if GPU available | |
) | |
waveform, sample_rate = torchaudio.load(audio_path) | |
prediction = classifier.classify_batch(waveform) | |
# prediction format: (scores, probabilities, embeddings, predicted_labels) | |
predicted_accent = prediction[3][0] | |
confidence = prediction[1].exp().max().item() * 100 | |
return predicted_accent, f"{confidence:.2f}%" | |
def process_video(url): | |
try: | |
video_path = download_video(url) | |
audio_path = extract_audio(video_path) | |
accent, confidence = classify_accent(audio_path) | |
finally: | |
# Cleanup temp files if they exist | |
for f in [video_path, audio_path]: | |
if os.path.exists(f): | |
os.remove(f) | |
return accent, confidence | |
# Gradio interface | |
iface = gr.Interface( | |
fn=process_video, | |
inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"), | |
outputs=[ | |
gr.Textbox(label="Detected Accent"), | |
gr.Textbox(label="Confidence Score") | |
], | |
title="English Accent Classifier", | |
description="Paste a public video URL to detect the English accent and confidence score." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |