Spaces:
Runtime error
Runtime error
File size: 2,509 Bytes
9471255 fa15dd8 9471255 fa15dd8 9471255 fa15dd8 9471255 fa15dd8 9471255 fa15dd8 9471255 fa15dd8 9471255 9cb62ce 9471255 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from moviepy.editor import VideoFileClip
from speechbrain.pretrained import EncoderClassifier
import torchaudio
from pytube import YouTube
import os
CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
def download_video(url):
"""Handles YouTube and direct video links"""
if "youtube.com" in url:
yt = YouTube(url)
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
video_path = stream.download()
return video_path
else: # Direct download (assumes URL is direct mp4 link)
# Download file locally
import requests
local_filename = "temp_video.mp4"
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
def extract_audio(video_path):
clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
clip.audio.write_audiofile(audio_path, logger=None)
clip.close()
return audio_path
def classify_accent(audio_path):
classifier = EncoderClassifier.from_hparams(
source=CLASSIFIER,
savedir="pretrained_models/accent_classifier",
run_opts={"device":"cpu"} # or "cuda" if GPU available
)
waveform, sample_rate = torchaudio.load(audio_path)
prediction = classifier.classify_batch(waveform)
# prediction format: (scores, probabilities, embeddings, predicted_labels)
predicted_accent = prediction[3][0]
confidence = prediction[1].exp().max().item() * 100
return predicted_accent, f"{confidence:.2f}%"
def process_video(url):
try:
video_path = download_video(url)
audio_path = extract_audio(video_path)
accent, confidence = classify_accent(audio_path)
finally:
# Cleanup temp files if they exist
for f in [video_path, audio_path]:
if os.path.exists(f):
os.remove(f)
return accent, confidence
# Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
outputs=[
gr.Textbox(label="Detected Accent"),
gr.Textbox(label="Confidence Score")
],
title="English Accent Classifier",
description="Paste a public video URL to detect the English accent and confidence score."
)
if __name__ == "__main__":
iface.launch()
|