Spaces:

EdgarDataScientist
/

Accent_Classification

Runtime error

App Files Files Community

Accent_Classification / app.py

EdgarDataScientist

Update app.py

f6f6edc verified 3 months ago

raw

history blame

2.93 kB

	import gradio as gr
	from moviepy.editor import VideoFileClip
	from speechbrain.pretrained import EncoderClassifier
	import torchaudio
	from pytube import YouTube
	import os

	CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"

	def download_video(url):
	"""Handles YouTube and direct video links with error handling"""
	try:
	if "youtube.com" in url or "youtu.be" in url:
	yt = YouTube(url)
	stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
	if not stream:
	raise ValueError("No suitable video stream found.")
	video_path = stream.download() # Store the download path
	return video_path
	else:
	# For direct MP4 links, download file
	import requests
	local_filename = "temp_video.mp4"
	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	with open(local_filename, 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)
	video_path = local_filename # Store the download path
	return video_path
	except Exception as e:
	raise RuntimeError(f"Failed to download video: {e}")



	def extract_audio(video_path):
	clip = VideoFileClip(video_path)
	audio_path = "temp_audio.wav"
	clip.audio.write_audiofile(audio_path, logger=None)
	clip.close()
	return audio_path

	def classify_accent(audio_path):
	classifier = EncoderClassifier.from_hparams(
	source=CLASSIFIER,
	savedir="pretrained_models/accent_classifier",
	run_opts={"device":"cpu"} # or "cuda" if GPU available
	)
	waveform, sample_rate = torchaudio.load(audio_path)
	prediction = classifier.classify_batch(waveform)
	# prediction format: (scores, probabilities, embeddings, predicted_labels)
	predicted_accent = prediction[3][0]
	confidence = prediction[1].exp().max().item() * 100
	return predicted_accent, f"{confidence:.2f}%"

	def process_video(url):
	video_path = None
	audio_path = None
	try:
	video_path = download_video(url)
	audio_path = extract_audio(video_path)
	accent, confidence = classify_accent(audio_path)
	return accent, confidence
	except Exception as e:
	return f"Error: {e}", ""
	finally:
	for f in [video_path, audio_path]:
	if f and os.path.exists(f):
	os.remove(f)


	# Gradio interface
	iface = gr.Interface(
	fn=process_video,
	inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
	outputs=[
	gr.Textbox(label="Detected Accent"),
	gr.Textbox(label="Confidence Score")
	],
	title="English Accent Classifier",
	description="Paste a public video URL to detect the English accent and confidence score."
	)

	if __name__ == "__main__":
	iface.launch()