Spaces:
Runtime error
Runtime error
File size: 4,080 Bytes
9471255 5cb7e51 37fbc84 5a4c42c fa15dd8 5a4c42c fa15dd8 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 5a4c42c ba6451b 37fbc84 fa15dd8 5cb7e51 fa15dd8 5a4c42c 9471255 37fbc84 5cb7e51 37fbc84 5a4c42c 37fbc84 5a4c42c 37fbc84 f6f6edc 5a4c42c f6f6edc 5cb7e51 37fbc84 9471255 5cb7e51 37fbc84 9471255 5a4c42c 5cb7e51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import os
import tempfile
import requests
from moviepy.editor import VideoFileClip
from speechbrain.pretrained import EncoderClassifier
import torchaudio
import torch
# --- Real Accent Analyzer using SpeechBrain embeddings ---
class RealAccentAnalyzer:
def __init__(self):
# Pre-trained speaker embedding model (used as a proxy for accent)
self.classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")
self.reference_embeddings = self._load_reference_embeddings()
def _load_reference_embeddings(self):
# Simulate reference accents with fake audio or placeholder tensors
accents = ["American", "British", "Indian", "Australian", "Canadian"]
reference = {}
for accent in accents:
reference[accent] = torch.randn(1, 192) # Dummy 192-dim embeddings
return reference
def _extract_embedding(self, audio_path):
signal, fs = torchaudio.load(audio_path)
if signal.shape[0] > 1:
signal = torch.mean(signal, dim=0, keepdim=True)
if fs != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=fs, new_freq=16000)
signal = resampler(signal)
embedding = self.classifier.encode_batch(signal)
return embedding.squeeze().detach()
def _compare_embeddings(self, emb):
similarities = {}
for accent, ref_emb in self.reference_embeddings.items():
score = torch.nn.functional.cosine_similarity(emb, ref_emb, dim=0).item()
similarities[accent] = score
return similarities
def analyze(self, audio_path):
emb = self._extract_embedding(audio_path)
similarities = self._compare_embeddings(emb)
top_accent = max(similarities, key=similarities.get)
confidence = similarities[top_accent]
explanation = f"The speaker most likely has a {top_accent} English accent with similarity score {confidence:.2f}."
return {
"accent": top_accent,
"score": confidence,
"explanation": explanation,
"all_scores": similarities
}
# --- Download and Extract Audio ---
def download_and_extract_audio(url):
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, "video.mp4")
audio_path = os.path.join(temp_dir, "audio.wav")
if "youtube.com" in url or "youtu.be" in url:
from pytubefix import YouTube
yt = YouTube(url)
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
if not stream:
raise RuntimeError("No suitable video stream found.")
stream.download(output_path=temp_dir, filename="video.mp4")
else:
r = requests.get(url, stream=True)
r.raise_for_status()
with open(video_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
clip = VideoFileClip(video_path)
clip.audio.write_audiofile(audio_path, logger=None)
clip.close()
return audio_path
# --- Gradio Interface ---
def analyze_from_url(url):
try:
audio_path = download_and_extract_audio(url)
analyzer = RealAccentAnalyzer()
results = analyzer.analyze(audio_path)
os.remove(audio_path)
return (
results["accent"],
f"{results['score']*100:.1f}%",
results["explanation"]
)
except Exception as e:
return ("Error", "0%", f"Error processing video/audio: {e}")
iface = gr.Interface(
fn=analyze_from_url,
inputs=gr.Textbox(label="Enter Public Video URL (YouTube or direct MP4)"),
outputs=[
gr.Textbox(label="Detected Accent"),
gr.Textbox(label="Confidence Score"),
gr.Textbox(label="Explanation")
],
title="Accent Analyzer (Real Embeddings with SpeechBrain)",
description="Paste a public video URL. This app uses SpeechBrain speaker embeddings to infer accent similarity. It's experimental!"
)
if __name__ == "__main__":
iface.launch()
|