import gradio as gr from moviepy.editor import VideoFileClip from speechbrain.pretrained import EncoderClassifier import torchaudio from pytubefix import YouTube from pytubefix.cli import on_progress import requests import os import torch CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english" def get_default_device(): """Return the default device (cuda if available, else cpu).""" return torch.device("cuda" if torch.cuda.is_available() else "cpu") def download_video(url): """Download video from YouTube or direct MP4 URL using pytubefix.""" try: if "youtube.com" in url or "youtu.be" in url: yt = YouTube(url, on_progress_callback=on_progress) stream = yt.streams.filter(progressive=True, file_extension='mp4').first() if not stream: raise ValueError("No suitable video stream found.") video_path = stream.download() return video_path else: # Direct MP4 file download local_filename = "temp_video.mp4" with requests.get(url, stream=True) as r: r.raise_for_status() with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) return local_filename except Exception as e: raise RuntimeError(f"Failed to download video: {e}") def extract_audio(video_path): """Extract audio from video and save as WAV file.""" clip = VideoFileClip(video_path) audio_path = "temp_audio.wav" clip.audio.write_audiofile(audio_path, logger=None) clip.close() return audio_path def classify_accent(audio_path): """Classify English accent from audio file using SpeechBrain model.""" device = get_default_device() classifier = EncoderClassifier.from_hparams( source=CLASSIFIER, savedir="pretrained_models/accent_classifier", run_opts={"device": str(device)} ) waveform, sample_rate = torchaudio.load(audio_path) prediction = classifier.classify_batch(waveform.to(device)) predicted_accent = prediction[3][0] confidence = prediction[1].exp().max().item() * 100 return predicted_accent, f"{confidence:.2f}%" def process_video(url): """Main processing pipeline: download video, extract audio, classify accent.""" video_path = None audio_path = None try: video_path = download_video(url) audio_path = extract_audio(video_path) accent, confidence = classify_accent(audio_path) return accent, confidence except Exception as e: return f"Error: {e}", "" finally: # Clean up temporary files for f in [video_path, audio_path]: if f and os.path.exists(f): os.remove(f) iface = gr.Interface( fn=process_video, inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"), outputs=[ gr.Textbox(label="Detected Accent"), gr.Textbox(label="Confidence Score") ], title="English Accent Classifier", description="Paste a public video URL to detect the English accent and confidence score." ) if __name__ == "__main__": iface.launch()