EdgarDataScientist commited on
Commit
37fbc84
·
verified ·
1 Parent(s): ca36a7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -68
app.py CHANGED
@@ -1,92 +1,197 @@
1
  import gradio as gr
2
- from moviepy.editor import VideoFileClip
3
- from speechbrain.pretrained import EncoderClassifier
4
- import torchaudio
5
- import requests
6
  import os
7
- import torch
8
- import yt_dlp
 
 
 
9
 
10
- CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
11
 
12
- def get_default_device():
13
- """Return the default device (cuda if available, else cpu)."""
14
- return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- def download_video(url):
17
- """Download video from YouTube or direct MP4 URL using yt_dlp or requests."""
18
- try:
19
- if "youtube.com" in url or "youtu.be" in url:
20
- output_path = "temp_video.%(ext)s"
21
- ydl_opts = {
22
- 'format': 'best[ext=mp4]/best',
23
- 'outtmpl': output_path,
24
- 'quiet': True,
25
- 'noplaylist': True,
26
  }
27
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
28
- info_dict = ydl.extract_info(url, download=True)
29
- downloaded_path = output_path.replace("%(ext)s", info_dict['ext'])
30
- return downloaded_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  else:
32
- # Direct MP4 file download
33
- local_filename = "temp_video.mp4"
34
- with requests.get(url, stream=True) as r:
35
- r.raise_for_status()
36
- with open(local_filename, 'wb') as f:
37
- for chunk in r.iter_content(chunk_size=8192):
38
- f.write(chunk)
39
- return local_filename
40
- except Exception as e:
41
- raise RuntimeError(f"Failed to download video: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- def extract_audio(video_path):
44
- """Extract audio from video and save as WAV file."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  clip = VideoFileClip(video_path)
46
- audio_path = "temp_audio.wav"
47
  clip.audio.write_audiofile(audio_path, logger=None)
48
  clip.close()
49
  return audio_path
50
 
51
- def classify_accent(audio_path):
52
- """Classify English accent from audio file using SpeechBrain model."""
53
- device = get_default_device() # Use your helper function
54
- classifier = EncoderClassifier.from_hparams(
55
- source=CLASSIFIER,
56
- savedir="pretrained_models/accent_classifier",
57
- run_opts={"device": str(device)}
58
- )
59
- waveform, sample_rate = torchaudio.load(audio_path)
60
- prediction = classifier.classify_batch(waveform.to(device))
61
- predicted_accent = prediction[3][0]
62
- confidence = prediction[1].exp().max().item() * 100
63
- return predicted_accent, f"{confidence:.2f}%"
64
 
65
- def process_video(url):
66
- """Main processing pipeline: download video, extract audio, classify accent."""
67
- video_path = None
68
- audio_path = None
69
  try:
70
- video_path = download_video(url)
71
- audio_path = extract_audio(video_path)
72
- accent, confidence = classify_accent(audio_path)
73
- return accent, confidence
 
 
 
 
 
74
  except Exception as e:
75
- return f"Error: {e}", ""
76
- finally:
77
- for f in [video_path, audio_path]:
78
- if f and os.path.exists(f):
79
- os.remove(f)
80
 
81
  iface = gr.Interface(
82
- fn=process_video,
83
- inputs=gr.Textbox(label="Enter Public Video URL (YouTube or direct MP4 link)"),
84
  outputs=[
85
  gr.Textbox(label="Detected Accent"),
86
- gr.Textbox(label="Confidence Score")
 
87
  ],
88
- title="English Accent Classifier",
89
- description="Paste a public video URL (YouTube or MP4) to detect the English accent and confidence score."
90
  )
91
 
92
  if __name__ == "__main__":
 
1
  import gradio as gr
 
 
 
 
2
  import os
3
+ import tempfile
4
+ import requests
5
+ from moviepy.editor import VideoFileClip
6
+ import random
7
+ import json
8
 
9
+ # --- Lightweight AccentAnalyzer class (from your paste) ---
10
 
11
+ class AccentAnalyzer:
12
+ def __init__(self):
13
+ self.accent_profiles = {
14
+ "American": {
15
+ "features": ["rhotic", "flapped_t", "cot_caught_merger"],
16
+ "description": "American English accent with rhotic pronunciation and typical North American features."
17
+ },
18
+ "British": {
19
+ "features": ["non_rhotic", "t_glottalization", "trap_bath_split"],
20
+ "description": "British English accent with non-rhotic pronunciation and typical UK features."
21
+ },
22
+ "Australian": {
23
+ "features": ["non_rhotic", "flat_a", "high_rising_terminal"],
24
+ "description": "Australian English accent with distinctive vowel sounds and intonation patterns."
25
+ },
26
+ "Canadian": {
27
+ "features": ["rhotic", "canadian_raising", "eh_tag"],
28
+ "description": "Canadian English accent with features of both American and British English."
29
+ },
30
+ "Indian": {
31
+ "features": ["retroflex_consonants", "monophthongization", "syllable_timing"],
32
+ "description": "Indian English accent influenced by native Indian languages."
33
+ },
34
+ "Irish": {
35
+ "features": ["dental_fricatives", "alveolar_l", "soft_consonants"],
36
+ "description": "Irish English accent with distinctive rhythm and consonant patterns."
37
+ },
38
+ "Scottish": {
39
+ "features": ["rolled_r", "monophthongs", "glottal_stops"],
40
+ "description": "Scottish English accent with strong consonants and distinctive vowel patterns."
41
+ },
42
+ "South African": {
43
+ "features": ["non_rhotic", "kit_split", "kw_hw_distinction"],
44
+ "description": "South African English accent with influences from Afrikaans and other local languages."
45
+ }
46
+ }
47
+ self._load_or_create_accent_data()
48
 
49
+ def _load_or_create_accent_data(self):
50
+ # For demo: just create simulated data in-memory
51
+ self.accent_data = self._create_simulated_accent_data()
52
+
53
+ def _create_simulated_accent_data(self):
54
+ accent_data = {}
55
+ for accent, profile in self.accent_profiles.items():
56
+ accent_data[accent] = {
57
+ "primary_features": profile["features"],
58
+ "feature_probabilities": {}
59
  }
60
+ for feature in profile["features"]:
61
+ accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.7, 0.9)
62
+ all_features = set()
63
+ for a, p in self.accent_profiles.items():
64
+ all_features.update(p["features"])
65
+ for feature in all_features:
66
+ if feature not in profile["features"]:
67
+ accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.1, 0.4)
68
+ return accent_data
69
+
70
+ def _extract_features(self, audio_path):
71
+ all_features = set()
72
+ for accent, profile in self.accent_profiles.items():
73
+ all_features.update(profile["features"])
74
+ detected_features = {}
75
+ for feature in all_features:
76
+ detected_features[feature] = random.uniform(0.1, 0.9)
77
+ return detected_features
78
+
79
+ def _calculate_accent_scores(self, detected_features):
80
+ accent_scores = {}
81
+ for accent, data in self.accent_data.items():
82
+ score = 0
83
+ total_weight = 0
84
+ for feature, probability in detected_features.items():
85
+ expected_prob = data["feature_probabilities"].get(feature, 0.1)
86
+ weight = 3.0 if feature in data["primary_features"] else 1.0
87
+ feature_score = probability * expected_prob * weight
88
+ score += feature_score
89
+ total_weight += weight
90
+ if total_weight > 0:
91
+ accent_scores[accent] = (score / total_weight) * 100
92
+ else:
93
+ accent_scores[accent] = 0
94
+ return accent_scores
95
+
96
+ def _generate_explanation(self, accent_type, confidence):
97
+ if confidence >= 70:
98
+ confidence_level = "high confidence"
99
+ certainty = "is very clear"
100
+ elif confidence >= 50:
101
+ confidence_level = "moderate confidence"
102
+ certainty = "is present"
103
  else:
104
+ confidence_level = "low confidence"
105
+ certainty = "may be present"
106
+ description = self.accent_profiles[accent_type]["description"]
107
+ second_accent = self._get_second_most_likely_accent(accent_type)
108
+ explanation = f"The speaker has a {confidence_level} {accent_type} English accent. The {accent_type} accent {certainty}, with features of both {accent_type} and {second_accent} English present."
109
+ return explanation
110
+
111
+ def _get_second_most_likely_accent(self, primary_accent):
112
+ accent_similarities = {
113
+ "American": ["Canadian", "British"],
114
+ "British": ["Australian", "Irish"],
115
+ "Australian": ["British", "South African"],
116
+ "Canadian": ["American", "British"],
117
+ "Indian": ["British", "South African"],
118
+ "Irish": ["Scottish", "British"],
119
+ "Scottish": ["Irish", "British"],
120
+ "South African": ["Australian", "British"]
121
+ }
122
+ return random.choice(accent_similarities[primary_accent])
123
+
124
+ def analyze_accent(self, audio_path):
125
+ detected_features = self._extract_features(audio_path)
126
+ accent_scores = self._calculate_accent_scores(detected_features)
127
+ accent_type = max(accent_scores, key=accent_scores.get)
128
+ confidence = accent_scores[accent_type]
129
+ explanation = self._generate_explanation(accent_type, confidence)
130
+ return {
131
+ "accent_type": accent_type,
132
+ "confidence": confidence,
133
+ "explanation": explanation,
134
+ "all_scores": accent_scores
135
+ }
136
+
137
+ # --- Utility: Download video and extract audio ---
138
 
139
+ def download_and_extract_audio(url):
140
+ temp_dir = tempfile.mkdtemp()
141
+ video_path = os.path.join(temp_dir, "video.mp4")
142
+ audio_path = os.path.join(temp_dir, "audio.wav")
143
+ # Download video
144
+ if "youtube.com" in url or "youtu.be" in url:
145
+ # Use pytubefix for YouTube
146
+ from pytubefix import YouTube
147
+ yt = YouTube(url)
148
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
149
+ if not stream:
150
+ raise RuntimeError("No suitable video stream found.")
151
+ stream.download(output_path=temp_dir, filename="video.mp4")
152
+ else:
153
+ # Direct MP4 download
154
+ r = requests.get(url, stream=True)
155
+ r.raise_for_status()
156
+ with open(video_path, "wb") as f:
157
+ for chunk in r.iter_content(chunk_size=8192):
158
+ f.write(chunk)
159
+ # Extract audio
160
  clip = VideoFileClip(video_path)
 
161
  clip.audio.write_audiofile(audio_path, logger=None)
162
  clip.close()
163
  return audio_path
164
 
165
+ # --- Gradio interface ---
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ def analyze_from_url(url):
 
 
 
168
  try:
169
+ audio_path = download_and_extract_audio(url)
170
+ analyzer = AccentAnalyzer()
171
+ results = analyzer.analyze_accent(audio_path)
172
+ os.remove(audio_path)
173
+ return (
174
+ results["accent_type"],
175
+ f"{results['confidence']:.1f}%",
176
+ results["explanation"]
177
+ )
178
  except Exception as e:
179
+ return (
180
+ "Error",
181
+ "0%",
182
+ f"Error processing video/audio: {e}"
183
+ )
184
 
185
  iface = gr.Interface(
186
+ fn=analyze_from_url,
187
+ inputs=gr.Textbox(label="Enter Public Video URL (YouTube or direct MP4)"),
188
  outputs=[
189
  gr.Textbox(label="Detected Accent"),
190
+ gr.Textbox(label="Confidence Score"),
191
+ gr.Textbox(label="Explanation")
192
  ],
193
+ title="English Accent Analyzer (Rule-Based Demo)",
194
+ description="Paste a public video URL to detect the English accent and confidence score. (Demo: uses simulated accent features, not real ML audio analysis.)"
195
  )
196
 
197
  if __name__ == "__main__":