EdgarDataScientist commited on
Commit
8cdbd03
Β·
verified Β·
1 Parent(s): d752d17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -124
app.py CHANGED
@@ -8,150 +8,100 @@ import torchaudio
8
  import torch
9
  import ffmpeg
10
 
11
- # Load SpeechBrain
12
  try:
13
  from speechbrain.inference import EncoderClassifier
14
- speechbrain_classifier = EncoderClassifier.from_hparams(
15
  source="speechbrain/lang-id-commonlanguage_ecapa",
16
  savedir="pretrained_models/lang-id-commonlanguage_ecapa"
17
  )
18
- SPEECHBRAIN_LOADED = True
19
  except Exception as e:
20
- st.warning(f"Could not load SpeechBrain model: {e}. Using simulation.")
21
- SPEECHBRAIN_LOADED = False
22
-
23
- class AccentAnalyzer:
24
- def __init__(self):
25
- self.accent_profiles = {
26
- "American": {"features": ["rhotic", "flapped_t", "cot_caught_merger"]},
27
- "British": {"features": ["non_rhotic", "t_glottalization", "trap_bath_split"]},
28
- "Australian": {"features": ["non_rhotic", "flat_a", "high_rising_terminal"]},
29
- "Canadian": {"features": ["rhotic", "canadian_raising", "eh_tag"]},
30
- "Indian": {"features": ["retroflex_consonants", "monophthongization", "syllable_timing"]},
31
- "Irish": {"features": ["dental_fricatives", "alveolar_l", "soft_consonants"]},
32
- "Scottish": {"features": ["rolled_r", "monophthongs", "glottal_stops"]},
33
- "South African": {"features": ["non_rhotic", "kit_split", "kw_hw_distinction"]}
34
- }
35
- self.accent_data = self._simulate_profiles()
36
-
37
- def _simulate_profiles(self):
38
- all_features = set(f for p in self.accent_profiles.values() for f in p["features"])
39
- data = {}
40
- for name, profile in self.accent_profiles.items():
41
- data[name] = {
42
- "primary_features": profile["features"],
43
- "feature_probabilities": {
44
- f: random.uniform(0.7, 0.9) if f in profile["features"] else random.uniform(0.1, 0.4)
45
- for f in all_features
46
- }
47
- }
48
- return data
49
-
50
- def _simulate_accent_classification(self, audio_path):
51
- all_features = {f for p in self.accent_profiles.values() for f in p["features"]}
52
- detected = {f: random.uniform(0.1, 0.9) for f in all_features}
53
- scores = {}
54
- for accent, data in self.accent_data.items():
55
- score = sum(
56
- detected[f] * data["feature_probabilities"][f] * (3.0 if f in data["primary_features"] else 1.0)
57
- for f in all_features
58
- )
59
- scores[accent] = score
60
- top = max(scores, key=scores.get)
61
- conf = (scores[top] / max(scores.values())) * 100
62
- return {
63
- "accent_type": top,
64
- "confidence": conf,
65
- "explanation": f"Detected **{top}** accent with {conf:.1f}% confidence.",
66
- "all_scores": scores
67
- }
68
-
69
- def analyze_accent(self, audio_path):
70
- if not SPEECHBRAIN_LOADED:
71
- return self._simulate_accent_classification(audio_path)
72
-
73
- try:
74
- signal, sr = torchaudio.load(audio_path)
75
- duration = signal.shape[1] / sr
76
- if duration < 1.0:
77
- raise ValueError("Audio too short to analyze.")
78
-
79
- if signal.shape[0] > 1:
80
- signal = signal.mean(dim=0, keepdim=True)
81
- if sr != 16000:
82
- signal = torchaudio.transforms.Resample(sr, 16000)(signal)
83
- signal = signal.unsqueeze(0) # [1, 1, time]
84
-
85
- pred = speechbrain_classifier.classify_batch(signal)
86
- probs = pred[0].squeeze(0).tolist()
87
- labels = pred[1][0]
88
- scores = {speechbrain_classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
89
-
90
- if labels[0] == 'en':
91
- result = self._simulate_accent_classification(audio_path)
92
- result["all_scores"] = scores
93
- return result
94
- return {
95
- "accent_type": labels[0],
96
- "confidence": max(probs) * 100,
97
- "explanation": f"Detected language: **{labels[0]}** ({max(probs)*100:.1f}%)",
98
- "all_scores": scores
99
- }
100
- except Exception as e:
101
- st.warning(f"Fallback to simulation: {e}")
102
- return self._simulate_accent_classification(audio_path)
103
-
104
- def download_and_extract_audio(url_or_path, is_upload=False):
105
  temp_dir = tempfile.mkdtemp()
106
- video_path = os.path.join(temp_dir, "video.mp4")
107
  audio_path = os.path.join(temp_dir, "audio.wav")
108
 
109
  if is_upload:
110
  with open(video_path, "wb") as f:
111
- f.write(url_or_path.read())
112
  else:
113
- with requests.get(url_or_path, stream=True) as r:
114
  r.raise_for_status()
115
  with open(video_path, 'wb') as f:
116
  for chunk in r.iter_content(chunk_size=8192):
117
  f.write(chunk)
118
 
119
- (
120
- ffmpeg
121
- .input(video_path)
122
- .output(audio_path, ar=16000, ac=1, format='wav')
123
- .run(quiet=True, overwrite_output=True)
124
- )
125
  return audio_path
126
 
127
- # --- Streamlit App ---
128
- st.set_page_config(page_title="Accent Analyzer", layout="wide")
129
- st.title("πŸ—£οΈ English Accent or Language Analyzer")
130
 
131
- st.markdown("Upload a video/audio file or provide a direct `.mp4` or `.wav` URL:")
132
 
133
- url = st.text_input("πŸ”— Enter Direct MP4/WAV URL:")
134
- uploaded_file = st.file_uploader("πŸ“ Or upload a file (MP4/WAV)", type=["mp4", "wav"])
135
 
136
- if st.button("Analyze"):
137
- if not url and not uploaded_file:
138
- st.error("Please enter a valid URL or upload a file.")
139
  else:
140
- try:
141
- with st.spinner("Processing audio..."):
142
- audio_path = download_and_extract_audio(uploaded_file if uploaded_file else url, is_upload=bool(uploaded_file))
143
- analyzer = AccentAnalyzer()
144
- results = analyzer.analyze_accent(audio_path)
145
-
146
- st.success(results["explanation"])
147
-
148
- labels, values = zip(*results["all_scores"].items())
149
- fig, ax = plt.subplots()
150
- ax.bar(labels, values, color='skyblue')
151
- ax.set_ylabel('Confidence (%)')
152
- ax.set_title('Accent/Language Confidence')
153
- plt.xticks(rotation=45)
154
- st.pyplot(fig)
155
-
156
- except Exception as e:
157
- st.error(f"Failed to analyze: {e}")
 
8
  import torch
9
  import ffmpeg
10
 
11
+ # Try loading SpeechBrain
12
  try:
13
  from speechbrain.inference import EncoderClassifier
14
+ classifier = EncoderClassifier.from_hparams(
15
  source="speechbrain/lang-id-commonlanguage_ecapa",
16
  savedir="pretrained_models/lang-id-commonlanguage_ecapa"
17
  )
18
+ SB_READY = True
19
  except Exception as e:
20
+ st.warning(" SpeechBrain model load failed. Falling back to simulation.")
21
+ SB_READY = False
22
+
23
+ # Accent Profiles for English detection
24
+ accent_profiles = {
25
+ "American": ["rhotic", "flapped_t", "cot_caught_merger"],
26
+ "British": ["non_rhotic", "t_glottalization", "trap_bath_split"],
27
+ "Australian": ["non_rhotic", "flat_a", "high_rising_terminal"],
28
+ "Canadian": ["rhotic", "canadian_raising", "eh_tag"],
29
+ "Indian": ["retroflex_consonants", "monophthongization", "syllable_timing"]
30
+ }
31
+
32
+ def simulate_accent_classification():
33
+ accent = random.choice(list(accent_profiles.keys()))
34
+ confidence = random.uniform(75, 98)
35
+ return {
36
+ "accent": accent,
37
+ "confidence": round(confidence, 2),
38
+ "summary": f"Simulated detection: {accent} accent with {confidence:.2f}% confidence."
39
+ }
40
+
41
+ def real_accent_classification(audio_path):
42
+ try:
43
+ signal, sr = torchaudio.load(audio_path)
44
+ if signal.shape[0] > 1:
45
+ signal = signal.mean(dim=0, keepdim=True)
46
+ if sr != 16000:
47
+ signal = torchaudio.transforms.Resample(sr, 16000)(signal)
48
+ signal = signal.unsqueeze(0)
49
+
50
+ pred = classifier.classify_batch(signal)
51
+ probs = pred[0].squeeze(0).tolist()
52
+ labels = pred[1][0]
53
+
54
+ lang_scores = {classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
55
+ top_lang = max(lang_scores, key=lang_scores.get)
56
+
57
+ if top_lang != "en":
58
+ return {"accent": "Non-English", "confidence": lang_scores[top_lang], "summary": f"Detected language: {top_lang}"}
59
+
60
+ # Simulate accent if English
61
+ result = simulate_accent_classification()
62
+ result["summary"] += f" (Base language: English)"
63
+ return result
64
+ except Exception as e:
65
+ return simulate_accent_classification()
66
+
67
+ def extract_audio(url_or_file, is_upload=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  temp_dir = tempfile.mkdtemp()
69
+ video_path = os.path.join(temp_dir, "input_video.mp4")
70
  audio_path = os.path.join(temp_dir, "audio.wav")
71
 
72
  if is_upload:
73
  with open(video_path, "wb") as f:
74
+ f.write(url_or_file.read())
75
  else:
76
+ with requests.get(url_or_file, stream=True) as r:
77
  r.raise_for_status()
78
  with open(video_path, 'wb') as f:
79
  for chunk in r.iter_content(chunk_size=8192):
80
  f.write(chunk)
81
 
82
+ ffmpeg.input(video_path).output(audio_path, ar=16000, ac=1).run(overwrite_output=True, quiet=True)
 
 
 
 
 
83
  return audio_path
84
 
85
+ # --- Streamlit UI ---
86
+ st.set_page_config(page_title="English Accent Analyzer", layout="centered")
87
+ st.title("πŸ—£οΈ English Accent Analyzer")
88
 
89
+ st.markdown("### 🎯 Objective:\nUpload or link a video/audio of a speaker. We’ll detect if they're speaking English and simulate the accent.")
90
 
91
+ url_input = st.text_input("πŸ”— Paste public Loom or direct MP4/WAV link:")
92
+ uploaded_file = st.file_uploader("πŸ“ Or upload a video/audio file", type=["mp4", "wav"])
93
 
94
+ if st.button(" Analyze"):
95
+ if not url_input and not uploaded_file:
96
+ st.error("Please provide a valid URL or upload a file.")
97
  else:
98
+ with st.spinner("Analyzing..."):
99
+ try:
100
+ audio_path = extract_audio(uploaded_file if uploaded_file else url_input, is_upload=bool(uploaded_file))
101
+ result = real_accent_classification(audio_path) if SB_READY else simulate_accent_classification()
102
+
103
+ st.success(f"🎧 Detected Accent: **{result['accent']}**")
104
+ st.metric("Confidence", f"{result['confidence']}%")
105
+ st.markdown(f"πŸ“ {result['summary']}")
106
+ except Exception as e:
107
+ st.error(f"❌ Error during analysis: {e}")