Spaces:

EdgarDataScientist
/

Accent_Classification

Runtime error

App Files Files Community

Accent_Classification / app.py

EdgarDataScientist

Update app.py

5ca9307 verified 3 months ago

raw

history blame

3.25 kB

	import gradio as gr
	from moviepy.editor import VideoFileClip
	from speechbrain.pretrained import EncoderClassifier
	import torchaudio
	from pytubefix import YouTube
	from pytubefix.cli import on_progress
	import requests
	import os
	import torch

	CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"

	def get_default_device():
	"""Return the default device (cuda if available, else cpu)."""
	return torch.device("cuda" if torch.cuda.is_available() else "cpu")

	def download_video(url):
	"""Download video from YouTube or direct MP4 URL using pytubefix."""
	try:
	if "youtube.com" in url or "youtu.be" in url:
	yt = YouTube(url, on_progress_callback=on_progress)
	stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
	if not stream:
	raise ValueError("No suitable video stream found.")
	video_path = stream.download()
	return video_path
	else:
	# Direct MP4 file download
	local_filename = "temp_video.mp4"
	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	with open(local_filename, 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)
	return local_filename
	except Exception as e:
	raise RuntimeError(f"Failed to download video: {e}")

	def extract_audio(video_path):
	"""Extract audio from video and save as WAV file."""
	clip = VideoFileClip(video_path)
	audio_path = "temp_audio.wav"
	clip.audio.write_audiofile(audio_path, logger=None)
	clip.close()
	return audio_path

	def classify_accent(audio_path):
	"""Classify English accent from audio file using SpeechBrain model."""
	device = get_default_device()
	classifier = EncoderClassifier.from_hparams(
	source=CLASSIFIER,
	savedir="pretrained_models/accent_classifier",
	run_opts={"device": str(device)}
	)
	waveform, sample_rate = torchaudio.load(audio_path)
	prediction = classifier.classify_batch(waveform.to(device))
	predicted_accent = prediction[3][0]
	confidence = prediction[1].exp().max().item() * 100
	return predicted_accent, f"{confidence:.2f}%"

	def process_video(url):
	"""Main processing pipeline: download video, extract audio, classify accent."""
	video_path = None
	audio_path = None
	try:
	video_path = download_video(url)
	audio_path = extract_audio(video_path)
	accent, confidence = classify_accent(audio_path)
	return accent, confidence
	except Exception as e:
	return f"Error: {e}", ""
	finally:
	# Clean up temporary files
	for f in [video_path, audio_path]:
	if f and os.path.exists(f):
	os.remove(f)

	iface = gr.Interface(
	fn=process_video,
	inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
	outputs=[
	gr.Textbox(label="Detected Accent"),
	gr.Textbox(label="Confidence Score")
	],
	title="English Accent Classifier",
	description="Paste a public video URL to detect the English accent and confidence score."
	)

	if __name__ == "__main__":
	iface.launch()