EdgarDataScientist's picture
Update app.py
9471255 verified
raw
history blame
2.51 kB
import gradio as gr
from moviepy.editor import VideoFileClip
from speechbrain.pretrained import EncoderClassifier
import torchaudio
from pytube import YouTube
import os
CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
def download_video(url):
"""Handles YouTube and direct video links"""
if "youtube.com" in url:
yt = YouTube(url)
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
video_path = stream.download()
return video_path
else: # Direct download (assumes URL is direct mp4 link)
# Download file locally
import requests
local_filename = "temp_video.mp4"
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
def extract_audio(video_path):
clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
clip.audio.write_audiofile(audio_path, logger=None)
clip.close()
return audio_path
def classify_accent(audio_path):
classifier = EncoderClassifier.from_hparams(
source=CLASSIFIER,
savedir="pretrained_models/accent_classifier",
run_opts={"device":"cpu"} # or "cuda" if GPU available
)
waveform, sample_rate = torchaudio.load(audio_path)
prediction = classifier.classify_batch(waveform)
# prediction format: (scores, probabilities, embeddings, predicted_labels)
predicted_accent = prediction[3][0]
confidence = prediction[1].exp().max().item() * 100
return predicted_accent, f"{confidence:.2f}%"
def process_video(url):
try:
video_path = download_video(url)
audio_path = extract_audio(video_path)
accent, confidence = classify_accent(audio_path)
finally:
# Cleanup temp files if they exist
for f in [video_path, audio_path]:
if os.path.exists(f):
os.remove(f)
return accent, confidence
# Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
outputs=[
gr.Textbox(label="Detected Accent"),
gr.Textbox(label="Confidence Score")
],
title="English Accent Classifier",
description="Paste a public video URL to detect the English accent and confidence score."
)
if __name__ == "__main__":
iface.launch()