Spaces:
Runtime error
Runtime error
File size: 2,927 Bytes
9471255 fa15dd8 5cb7e51 fa15dd8 5cb7e51 fa15dd8 5cb7e51 fa15dd8 5cb7e51 ba6451b f6f6edc ba6451b f6f6edc ba6451b fa15dd8 f6f6edc 5cb7e51 fa15dd8 5cb7e51 fa15dd8 5cb7e51 fa15dd8 5cb7e51 9471255 5cb7e51 f6f6edc 5cb7e51 f6f6edc 5cb7e51 f6f6edc 5cb7e51 f6f6edc fa15dd8 5cb7e51 9471255 5cb7e51 9471255 5cb7e51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from moviepy.editor import VideoFileClip
from speechbrain.pretrained import EncoderClassifier
import torchaudio
from pytube import YouTube
import os
CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
def download_video(url):
"""Handles YouTube and direct video links with error handling"""
try:
if "youtube.com" in url or "youtu.be" in url:
yt = YouTube(url)
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
if not stream:
raise ValueError("No suitable video stream found.")
video_path = stream.download() # Store the download path
return video_path
else:
# For direct MP4 links, download file
import requests
local_filename = "temp_video.mp4"
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
video_path = local_filename # Store the download path
return video_path
except Exception as e:
raise RuntimeError(f"Failed to download video: {e}")
def extract_audio(video_path):
clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
clip.audio.write_audiofile(audio_path, logger=None)
clip.close()
return audio_path
def classify_accent(audio_path):
classifier = EncoderClassifier.from_hparams(
source=CLASSIFIER,
savedir="pretrained_models/accent_classifier",
run_opts={"device":"cpu"} # or "cuda" if GPU available
)
waveform, sample_rate = torchaudio.load(audio_path)
prediction = classifier.classify_batch(waveform)
# prediction format: (scores, probabilities, embeddings, predicted_labels)
predicted_accent = prediction[3][0]
confidence = prediction[1].exp().max().item() * 100
return predicted_accent, f"{confidence:.2f}%"
def process_video(url):
video_path = None
audio_path = None
try:
video_path = download_video(url)
audio_path = extract_audio(video_path)
accent, confidence = classify_accent(audio_path)
return accent, confidence
except Exception as e:
return f"Error: {e}", ""
finally:
for f in [video_path, audio_path]:
if f and os.path.exists(f):
os.remove(f)
# Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
outputs=[
gr.Textbox(label="Detected Accent"),
gr.Textbox(label="Confidence Score")
],
title="English Accent Classifier",
description="Paste a public video URL to detect the English accent and confidence score."
)
if __name__ == "__main__":
iface.launch()
|