import os import subprocess import sys # Ensure yt_dlp is available try: import yt_dlp as youtube_dl except ImportError: subprocess.check_call([sys.executable, "-m", "pip", "install", "yt-dlp"]) import yt_dlp as youtube_dl import gradio as gr from transformers import pipeline def download_video(video_url, filename="downloaded_video.mp4"): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': filename, 'noplaylist': True, 'quiet': True, 'user_agent': ( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/115.0.0.0 Safari/537.36' ) } with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([video_url]) return filename def extract_audio(video_filename, audio_filename="extracted_audio.wav"): command = [ "ffmpeg", "-y", "-i", video_filename, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_filename ] subprocess.run(command, check=True) return audio_filename def classify_accent(audio_file, model_name="superb/wav2vec2-base-superb-sid"): classifier = pipeline("audio-classification", model=model_name) results = classifier(audio_file) if results: top = results[0] return f"Speaker ID (as accent proxy): {top['label']}\nConfidence: {top['score'] * 100:.2f}%" return "No result." def accent_classifier(video_url): try: video_file = download_video(video_url) audio_file = extract_audio(video_file) result = classify_accent(audio_file) except Exception as e: result = f"Error occurred: {e}" finally: for f in ["downloaded_video.mp4", "extracted_audio.wav"]: if os.path.exists(f): os.remove(f) return result iface = gr.Interface( fn=accent_classifier, inputs=gr.Textbox(label="Video URL", placeholder="Paste a public YouTube or Vimeo video link here"), outputs="text", title="Accent Classifier", description="Download a video, extract the audio, and classify the speaker (as an accent proxy) using a Hugging Face model." ) if __name__ == "__main__": iface.launch()