Spaces:
Runtime error
Runtime error
File size: 8,242 Bytes
9471255 5cb7e51 37fbc84 fa15dd8 37fbc84 fa15dd8 37fbc84 5ca9307 37fbc84 105a910 37fbc84 ba6451b 37fbc84 ba6451b 37fbc84 fa15dd8 5cb7e51 fa15dd8 37fbc84 9471255 37fbc84 5cb7e51 37fbc84 f6f6edc 37fbc84 f6f6edc 5cb7e51 37fbc84 9471255 5cb7e51 37fbc84 9471255 37fbc84 5cb7e51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import gradio as gr
import os
import tempfile
import requests
from moviepy.editor import VideoFileClip
import random
import json
# --- Lightweight AccentAnalyzer class (from your paste) ---
class AccentAnalyzer:
def __init__(self):
self.accent_profiles = {
"American": {
"features": ["rhotic", "flapped_t", "cot_caught_merger"],
"description": "American English accent with rhotic pronunciation and typical North American features."
},
"British": {
"features": ["non_rhotic", "t_glottalization", "trap_bath_split"],
"description": "British English accent with non-rhotic pronunciation and typical UK features."
},
"Australian": {
"features": ["non_rhotic", "flat_a", "high_rising_terminal"],
"description": "Australian English accent with distinctive vowel sounds and intonation patterns."
},
"Canadian": {
"features": ["rhotic", "canadian_raising", "eh_tag"],
"description": "Canadian English accent with features of both American and British English."
},
"Indian": {
"features": ["retroflex_consonants", "monophthongization", "syllable_timing"],
"description": "Indian English accent influenced by native Indian languages."
},
"Irish": {
"features": ["dental_fricatives", "alveolar_l", "soft_consonants"],
"description": "Irish English accent with distinctive rhythm and consonant patterns."
},
"Scottish": {
"features": ["rolled_r", "monophthongs", "glottal_stops"],
"description": "Scottish English accent with strong consonants and distinctive vowel patterns."
},
"South African": {
"features": ["non_rhotic", "kit_split", "kw_hw_distinction"],
"description": "South African English accent with influences from Afrikaans and other local languages."
}
}
self._load_or_create_accent_data()
def _load_or_create_accent_data(self):
# For demo: just create simulated data in-memory
self.accent_data = self._create_simulated_accent_data()
def _create_simulated_accent_data(self):
accent_data = {}
for accent, profile in self.accent_profiles.items():
accent_data[accent] = {
"primary_features": profile["features"],
"feature_probabilities": {}
}
for feature in profile["features"]:
accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.7, 0.9)
all_features = set()
for a, p in self.accent_profiles.items():
all_features.update(p["features"])
for feature in all_features:
if feature not in profile["features"]:
accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.1, 0.4)
return accent_data
def _extract_features(self, audio_path):
all_features = set()
for accent, profile in self.accent_profiles.items():
all_features.update(profile["features"])
detected_features = {}
for feature in all_features:
detected_features[feature] = random.uniform(0.1, 0.9)
return detected_features
def _calculate_accent_scores(self, detected_features):
accent_scores = {}
for accent, data in self.accent_data.items():
score = 0
total_weight = 0
for feature, probability in detected_features.items():
expected_prob = data["feature_probabilities"].get(feature, 0.1)
weight = 3.0 if feature in data["primary_features"] else 1.0
feature_score = probability * expected_prob * weight
score += feature_score
total_weight += weight
if total_weight > 0:
accent_scores[accent] = (score / total_weight) * 100
else:
accent_scores[accent] = 0
return accent_scores
def _generate_explanation(self, accent_type, confidence):
if confidence >= 70:
confidence_level = "high confidence"
certainty = "is very clear"
elif confidence >= 50:
confidence_level = "moderate confidence"
certainty = "is present"
else:
confidence_level = "low confidence"
certainty = "may be present"
description = self.accent_profiles[accent_type]["description"]
second_accent = self._get_second_most_likely_accent(accent_type)
explanation = f"The speaker has a {confidence_level} {accent_type} English accent. The {accent_type} accent {certainty}, with features of both {accent_type} and {second_accent} English present."
return explanation
def _get_second_most_likely_accent(self, primary_accent):
accent_similarities = {
"American": ["Canadian", "British"],
"British": ["Australian", "Irish"],
"Australian": ["British", "South African"],
"Canadian": ["American", "British"],
"Indian": ["British", "South African"],
"Irish": ["Scottish", "British"],
"Scottish": ["Irish", "British"],
"South African": ["Australian", "British"]
}
return random.choice(accent_similarities[primary_accent])
def analyze_accent(self, audio_path):
detected_features = self._extract_features(audio_path)
accent_scores = self._calculate_accent_scores(detected_features)
accent_type = max(accent_scores, key=accent_scores.get)
confidence = accent_scores[accent_type]
explanation = self._generate_explanation(accent_type, confidence)
return {
"accent_type": accent_type,
"confidence": confidence,
"explanation": explanation,
"all_scores": accent_scores
}
# --- Utility: Download video and extract audio ---
def download_and_extract_audio(url):
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, "video.mp4")
audio_path = os.path.join(temp_dir, "audio.wav")
# Download video
if "youtube.com" in url or "youtu.be" in url:
# Use pytubefix for YouTube
from pytubefix import YouTube
yt = YouTube(url)
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
if not stream:
raise RuntimeError("No suitable video stream found.")
stream.download(output_path=temp_dir, filename="video.mp4")
else:
# Direct MP4 download
r = requests.get(url, stream=True)
r.raise_for_status()
with open(video_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Extract audio
clip = VideoFileClip(video_path)
clip.audio.write_audiofile(audio_path, logger=None)
clip.close()
return audio_path
# --- Gradio interface ---
def analyze_from_url(url):
try:
audio_path = download_and_extract_audio(url)
analyzer = AccentAnalyzer()
results = analyzer.analyze_accent(audio_path)
os.remove(audio_path)
return (
results["accent_type"],
f"{results['confidence']:.1f}%",
results["explanation"]
)
except Exception as e:
return (
"Error",
"0%",
f"Error processing video/audio: {e}"
)
iface = gr.Interface(
fn=analyze_from_url,
inputs=gr.Textbox(label="Enter Public Video URL (YouTube or direct MP4)"),
outputs=[
gr.Textbox(label="Detected Accent"),
gr.Textbox(label="Confidence Score"),
gr.Textbox(label="Explanation")
],
title="English Accent Analyzer (Rule-Based Demo)",
description="Paste a public video URL to detect the English accent and confidence score. (Demo: uses simulated accent features, not real ML audio analysis.)"
)
if __name__ == "__main__":
iface.launch()
|