leeksang's picture
Upload 6 files
5ca170c verified
import os
import subprocess
import sys
# Ensure yt_dlp is available
try:
import yt_dlp as youtube_dl
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "yt-dlp"])
import yt_dlp as youtube_dl
import gradio as gr
from transformers import pipeline
def download_video(video_url, filename="downloaded_video.mp4"):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': filename,
'noplaylist': True,
'quiet': True,
'user_agent': (
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/115.0.0.0 Safari/537.36'
)
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([video_url])
return filename
def extract_audio(video_filename, audio_filename="extracted_audio.wav"):
command = [
"ffmpeg",
"-y",
"-i", video_filename,
"-vn",
"-acodec", "pcm_s16le",
"-ar", "16000",
"-ac", "1",
audio_filename
]
subprocess.run(command, check=True)
return audio_filename
def classify_accent(audio_file, model_name="superb/wav2vec2-base-superb-sid"):
classifier = pipeline("audio-classification", model=model_name)
results = classifier(audio_file)
if results:
top = results[0]
return f"Speaker ID (as accent proxy): {top['label']}\nConfidence: {top['score'] * 100:.2f}%"
return "No result."
def accent_classifier(video_url):
try:
video_file = download_video(video_url)
audio_file = extract_audio(video_file)
result = classify_accent(audio_file)
except Exception as e:
result = f"Error occurred: {e}"
finally:
for f in ["downloaded_video.mp4", "extracted_audio.wav"]:
if os.path.exists(f):
os.remove(f)
return result
iface = gr.Interface(
fn=accent_classifier,
inputs=gr.Textbox(label="Video URL", placeholder="Paste a public YouTube or Vimeo video link here"),
outputs="text",
title="Accent Classifier",
description="Download a video, extract the audio, and classify the speaker (as an accent proxy) using a Hugging Face model."
)
if __name__ == "__main__":
iface.launch()