Spaces:
Sleeping
Sleeping
Refactor README and enhance Streamlit app with accent detection features, audio processing, and improved deployment instructions
Browse files- README.md +48 -10
- requirements.txt +7 -7
- src/streamlit_app.py +368 -25
README.md
CHANGED
@@ -1,26 +1,64 @@
|
|
1 |
---
|
2 |
-
title: Accent Detector
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
app_port: 8501
|
8 |
tags:
|
9 |
- streamlit
|
|
|
|
|
|
|
10 |
pinned: false
|
11 |
-
short_description:
|
12 |
license: mit
|
13 |
---
|
14 |
|
15 |
-
# π€ Accent Detection Tool
|
16 |
|
17 |
-
This app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
## Usage
|
20 |
-
|
21 |
-
-
|
22 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
## Powered By
|
25 |
- [SpeechBrain](https://huggingface.co/speechbrain/lang-id-commonlanguage_ecapa)
|
|
|
26 |
- [Streamlit](https://streamlit.io)
|
|
|
|
1 |
---
|
2 |
+
title: English Accent Detector
|
3 |
+
emoji: π€
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
sdk: docker
|
7 |
app_port: 8501
|
8 |
tags:
|
9 |
- streamlit
|
10 |
+
- audio
|
11 |
+
- accent-detection
|
12 |
+
- hiring
|
13 |
pinned: false
|
14 |
+
short_description: Detect and analyze English accents from videos
|
15 |
license: mit
|
16 |
---
|
17 |
|
18 |
+
# π€ English Accent Detection Tool
|
19 |
|
20 |
+
This app analyzes a speaker's English accent from video URLs or audio uploads, providing detailed insights for hiring evaluation purposes.
|
21 |
+
|
22 |
+
## Features
|
23 |
+
- **Video URL Processing**: Accept and analyze videos from Loom, YouTube, or direct MP4 links
|
24 |
+
- **Audio Upload Support**: Directly upload audio files for analysis
|
25 |
+
- **English Accent Classification**: Identify specific English accents (American, British, Australian, etc.)
|
26 |
+
- **Confidence Scoring**: Get detailed confidence scores for English proficiency
|
27 |
+
- **Detailed Analysis**: Receive expert-like explanations about accent characteristics
|
28 |
+
- **Visual Feedback**: View audio waveforms and listen to the processed audio
|
29 |
|
30 |
## Usage
|
31 |
+
1. **Via Video URL**:
|
32 |
+
- Enter a public video URL (Loom, YouTube, direct MP4, etc.)
|
33 |
+
- Click "Analyze Video"
|
34 |
+
- View the accent classification, confidence scores, and analysis
|
35 |
+
|
36 |
+
2. **Via Audio Upload**:
|
37 |
+
- Upload an audio file (WAV, MP3, M4A, OGG)
|
38 |
+
- Click "Analyze Audio"
|
39 |
+
- View the results
|
40 |
+
|
41 |
+
## Technology Stack
|
42 |
+
- **Audio Processing**: FFmpeg, Librosa
|
43 |
+
- **ML Models**: SpeechBrain, Transformers
|
44 |
+
- **UI**: Streamlit
|
45 |
+
- **Deployment**: Docker
|
46 |
+
|
47 |
+
## Requirements
|
48 |
+
- Python 3.9+
|
49 |
+
- FFmpeg
|
50 |
+
- See requirements.txt for Python dependencies
|
51 |
+
|
52 |
+
## Deployment
|
53 |
+
The app is containerized with Docker for easy deployment. Use the included Dockerfile to build and run:
|
54 |
+
|
55 |
+
```bash
|
56 |
+
docker build -t accent-detector .
|
57 |
+
docker run -p 8501:8501 accent-detector
|
58 |
+
```
|
59 |
|
60 |
## Powered By
|
61 |
- [SpeechBrain](https://huggingface.co/speechbrain/lang-id-commonlanguage_ecapa)
|
62 |
+
- [Hugging Face Transformers](https://huggingface.co/speechbrain/lang-id-voxlingua107-ecapa)
|
63 |
- [Streamlit](https://streamlit.io)
|
64 |
+
- [FFmpeg](https://ffmpeg.org/)
|
requirements.txt
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
streamlit>=1.25.0
|
2 |
yt_dlp>=2023.7.6
|
3 |
-
moviepy>=1.0.3
|
4 |
-
numpy>=1.22.0
|
5 |
-
decorator>=4.4.2
|
6 |
-
imageio>=2.9.0
|
7 |
-
imageio-ffmpeg>=0.4.5
|
8 |
-
proglog>=0.1.10
|
9 |
speechbrain>=0.5.15
|
10 |
torch>=2.0.0
|
11 |
-
torchaudio>=2.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
streamlit>=1.25.0
|
2 |
yt_dlp>=2023.7.6
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
speechbrain>=0.5.15
|
4 |
torch>=2.0.0
|
5 |
+
torchaudio>=2.0.0
|
6 |
+
transformers>=4.30.0
|
7 |
+
librosa>=0.10.0
|
8 |
+
matplotlib>=3.7.0
|
9 |
+
scikit-learn>=1.3.0
|
10 |
+
openai>=1.0.0
|
11 |
+
python-dotenv>=1.0.0
|
src/streamlit_app.py
CHANGED
@@ -1,44 +1,387 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
import yt_dlp
|
4 |
-
|
|
|
|
|
|
|
5 |
from speechbrain.pretrained import LanguageIdentification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def download_video(url, video_path="video.mp4"):
|
|
|
8 |
ydl_opts = {"outtmpl": video_path}
|
9 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
10 |
ydl.download([url])
|
|
|
11 |
|
12 |
def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
|
13 |
-
video
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
def
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# --- Streamlit App ---
|
24 |
-
st.
|
25 |
-
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
st.caption("Note: This is based on SpeechBrain's language model and may group accents by broader language class.")
|
42 |
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
import yt_dlp
|
4 |
+
import subprocess
|
5 |
+
import librosa
|
6 |
+
import numpy as np
|
7 |
+
import torch
|
8 |
from speechbrain.pretrained import LanguageIdentification
|
9 |
+
from transformers import AutoProcessor, AutoModelForAudioClassification
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
import tempfile
|
13 |
+
import time
|
14 |
+
|
15 |
+
# Comment for deployment instructions:
|
16 |
+
# To deploy this app:
|
17 |
+
# 1. Make sure Docker is installed
|
18 |
+
# 2. Build the Docker image: docker build -t accent-detector .
|
19 |
+
# 3. Run the container: docker run -p 8501:8501 accent-detector
|
20 |
+
# 4. Access the app at http://localhost:8501
|
21 |
+
#
|
22 |
+
# For cloud deployment:
|
23 |
+
# - Streamlit Cloud: Connect your GitHub repository to Streamlit Cloud
|
24 |
+
# - Hugging Face Spaces: Use the Docker deployment option
|
25 |
+
# - Azure/AWS/GCP: Deploy the container using their container services
|
26 |
+
|
27 |
+
# Load environment variables (if .env file exists)
|
28 |
+
try:
|
29 |
+
load_dotenv()
|
30 |
+
except:
|
31 |
+
pass
|
32 |
+
|
33 |
+
# Check for OpenAI API access - optional for enhanced explanations
|
34 |
+
try:
|
35 |
+
import openai
|
36 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
37 |
+
have_openai = openai.api_key is not None
|
38 |
+
except (ImportError, AttributeError):
|
39 |
+
have_openai = False
|
40 |
+
|
41 |
+
# English accent categories
|
42 |
+
ENGLISH_ACCENTS = {
|
43 |
+
"en-us": "American English",
|
44 |
+
"en-gb": "British English",
|
45 |
+
"en-au": "Australian English",
|
46 |
+
"en-ca": "Canadian English",
|
47 |
+
"en-ie": "Irish English",
|
48 |
+
"en-scotland": "Scottish English",
|
49 |
+
"en-in": "Indian English",
|
50 |
+
"en-za": "South African English",
|
51 |
+
"en-ng": "Nigerian English",
|
52 |
+
"en-caribbean": "Caribbean English",
|
53 |
+
}
|
54 |
|
55 |
def download_video(url, video_path="video.mp4"):
|
56 |
+
"""Download a video from a URL"""
|
57 |
ydl_opts = {"outtmpl": video_path}
|
58 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
59 |
ydl.download([url])
|
60 |
+
return os.path.exists(video_path)
|
61 |
|
62 |
def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
|
63 |
+
"""Extract audio from video file using ffmpeg"""
|
64 |
+
try:
|
65 |
+
subprocess.run(
|
66 |
+
['ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
|
67 |
+
check=True,
|
68 |
+
capture_output=True
|
69 |
+
)
|
70 |
+
return os.path.exists(audio_path)
|
71 |
+
except subprocess.CalledProcessError as e:
|
72 |
+
st.error(f"Error extracting audio: {e}")
|
73 |
+
st.error(f"ffmpeg output: {e.stderr.decode('utf-8')}")
|
74 |
+
raise
|
75 |
+
|
76 |
+
class AccentDetector:
|
77 |
+
def __init__(self):
|
78 |
+
# Initialize the language identification model
|
79 |
+
self.lang_id = LanguageIdentification.from_hparams(
|
80 |
+
source="speechbrain/lang-id-commonlanguage_ecapa",
|
81 |
+
savedir="tmp_model"
|
82 |
+
)
|
83 |
+
|
84 |
+
# Initialize the English accent classifier - using VoxLingua107 for now
|
85 |
+
# In production, you'd use a more specialized accent model
|
86 |
+
try:
|
87 |
+
self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
|
88 |
+
self.processor = AutoProcessor.from_pretrained(self.model_name)
|
89 |
+
self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
|
90 |
+
self.have_accent_model = True
|
91 |
+
except Exception as e:
|
92 |
+
st.warning(f"Could not load accent model: {str(e)}")
|
93 |
+
self.have_accent_model = False
|
94 |
|
95 |
+
def is_english(self, audio_path, threshold=0.7):
|
96 |
+
"""
|
97 |
+
Determine if the speech is English and return confidence score
|
98 |
+
"""
|
99 |
+
prediction = self.lang_id.classify_file(audio_path)
|
100 |
+
lang = prediction[1]
|
101 |
+
score = float(prediction[0][0])
|
102 |
+
|
103 |
+
# Check if language is English (slightly fuzzy match)
|
104 |
+
is_english = "eng" in lang.lower() or "en-" in lang.lower() or lang.lower() == "en"
|
105 |
+
|
106 |
+
return is_english, lang, score
|
107 |
+
|
108 |
+
def classify_accent(self, audio_path):
|
109 |
+
"""
|
110 |
+
Classify the specific English accent
|
111 |
+
"""
|
112 |
+
if not self.have_accent_model:
|
113 |
+
return "Unknown English Accent", 0.0
|
114 |
+
|
115 |
+
try:
|
116 |
+
# Load and preprocess audio
|
117 |
+
audio, sr = librosa.load(audio_path, sr=16000)
|
118 |
+
inputs = self.processor(audio, sampling_rate=sr, return_tensors="pt")
|
119 |
+
|
120 |
+
# Get predictions
|
121 |
+
with torch.no_grad():
|
122 |
+
outputs = self.model(**inputs)
|
123 |
+
|
124 |
+
# Get probabilities
|
125 |
+
probs = outputs.logits.softmax(dim=-1)[0]
|
126 |
+
prediction_id = probs.argmax().item()
|
127 |
+
confidence = probs[prediction_id].item()
|
128 |
+
|
129 |
+
# Get predicted label
|
130 |
+
id2label = self.model.config.id2label
|
131 |
+
accent_code = id2label[prediction_id]
|
132 |
+
|
133 |
+
# Map to English accent if possible
|
134 |
+
if accent_code.startswith('en-'):
|
135 |
+
accent = ENGLISH_ACCENTS.get(accent_code, f"English ({accent_code})")
|
136 |
+
confidence = confidence # Keep confidence as-is for English accents
|
137 |
+
else:
|
138 |
+
# If it's not an English accent code, use our pre-classification
|
139 |
+
is_english, _, _ = self.is_english(audio_path)
|
140 |
+
if is_english:
|
141 |
+
accent = "General English"
|
142 |
+
else:
|
143 |
+
accent = f"Non-English ({accent_code})"
|
144 |
+
confidence *= 0.7 # Reduce confidence for non-specific matches
|
145 |
+
|
146 |
+
return accent, confidence
|
147 |
+
except Exception as e:
|
148 |
+
st.error(f"Error in accent classification: {str(e)}")
|
149 |
+
return "Unknown English Accent", 0.0
|
150 |
+
|
151 |
+
def generate_explanation(self, audio_path, accent, confidence, is_english, language):
|
152 |
+
"""
|
153 |
+
Generate an explanation of the accent detection results using OpenAI API (if available)
|
154 |
+
"""
|
155 |
+
if not have_openai:
|
156 |
+
if is_english:
|
157 |
+
return f"The speaker has a {accent} accent with {confidence*100:.1f}% confidence. The speech was identified as English."
|
158 |
+
else:
|
159 |
+
return f"The speech was identified as {language}, not English. English confidence is low."
|
160 |
+
|
161 |
+
try:
|
162 |
+
import openai
|
163 |
+
is_english, lang, lang_score = self.is_english(audio_path)
|
164 |
+
|
165 |
+
prompt = f"""
|
166 |
+
Audio analysis detected a speaker with the following characteristics:
|
167 |
+
- Primary accent/language: {accent}
|
168 |
+
- Confidence score: {confidence*100:.1f}%
|
169 |
+
- Detected language category: {lang}
|
170 |
+
- Is English: {is_english}
|
171 |
+
|
172 |
+
Based on this information, provide a 2-3 sentence summary about the speaker's accent.
|
173 |
+
Focus on how clear their English is and any notable accent characteristics.
|
174 |
+
This is for hiring purposes to evaluate English speaking abilities.
|
175 |
+
"""
|
176 |
+
|
177 |
+
response = openai.chat.completions.create(
|
178 |
+
model="gpt-3.5-turbo",
|
179 |
+
messages=[
|
180 |
+
{"role": "system", "content": "You are an accent analysis specialist providing factual assessments."},
|
181 |
+
{"role": "user", "content": prompt}
|
182 |
+
],
|
183 |
+
max_tokens=150
|
184 |
+
)
|
185 |
+
|
186 |
+
return response.choices[0].message.content.strip()
|
187 |
+
except Exception as e:
|
188 |
+
st.error(f"Error generating explanation: {str(e)}")
|
189 |
+
if is_english:
|
190 |
+
return f"The speaker has a {accent} accent with {confidence*100:.1f}% confidence. The speech was identified as English."
|
191 |
+
else:
|
192 |
+
return f"The speech was identified as {language}, not English. English confidence is low."
|
193 |
+
|
194 |
+
def analyze_audio(self, audio_path):
|
195 |
+
"""
|
196 |
+
Complete analysis pipeline returning all needed results
|
197 |
+
"""
|
198 |
+
# Check if it's English
|
199 |
+
is_english, lang, lang_score = self.is_english(audio_path)
|
200 |
+
|
201 |
+
# Classify accent if it's English
|
202 |
+
if is_english:
|
203 |
+
accent, accent_confidence = self.classify_accent(audio_path)
|
204 |
+
english_confidence = lang_score * 100 # Scale to percentage
|
205 |
+
else:
|
206 |
+
accent = f"Non-English ({lang})"
|
207 |
+
accent_confidence = lang_score
|
208 |
+
english_confidence = max(0, min(30, lang_score * 50)) # Cap at 30% if non-English
|
209 |
+
|
210 |
+
# Generate explanation
|
211 |
+
explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
|
212 |
+
|
213 |
+
# Create visualization of the audio waveform
|
214 |
+
try:
|
215 |
+
y, sr = librosa.load(audio_path, sr=None)
|
216 |
+
fig, ax = plt.subplots(figsize=(10, 2))
|
217 |
+
ax.plot(y)
|
218 |
+
ax.set_xlabel('Sample')
|
219 |
+
ax.set_ylabel('Amplitude')
|
220 |
+
ax.set_title('Audio Waveform')
|
221 |
+
plt.tight_layout()
|
222 |
+
audio_viz = fig
|
223 |
+
except Exception as e:
|
224 |
+
st.warning(f"Could not generate audio visualization: {str(e)}")
|
225 |
+
audio_viz = None
|
226 |
+
|
227 |
+
return {
|
228 |
+
"is_english": is_english,
|
229 |
+
"accent": accent,
|
230 |
+
"accent_confidence": accent_confidence * 100, # Scale to percentage
|
231 |
+
"english_confidence": english_confidence,
|
232 |
+
"language_detected": lang,
|
233 |
+
"explanation": explanation,
|
234 |
+
"audio_viz": audio_viz
|
235 |
+
}
|
236 |
+
|
237 |
+
def process_uploaded_audio(uploaded_file):
|
238 |
+
"""Process uploaded audio file"""
|
239 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
|
240 |
+
temp_file.write(uploaded_file.getvalue())
|
241 |
+
audio_path = temp_file.name
|
242 |
+
|
243 |
+
detector = AccentDetector()
|
244 |
+
results = detector.analyze_audio(audio_path)
|
245 |
+
|
246 |
+
# Clean up
|
247 |
+
os.unlink(audio_path)
|
248 |
+
return results
|
249 |
|
250 |
# --- Streamlit App ---
|
251 |
+
st.set_page_config(
|
252 |
+
page_title="π€ English Accent Detector",
|
253 |
+
page_icon="π€",
|
254 |
+
layout="wide"
|
255 |
+
)
|
256 |
|
257 |
+
st.title("π€ English Accent Detection Tool")
|
258 |
+
st.markdown("""
|
259 |
+
This app analyzes a speaker's English accent from a video or audio source.
|
260 |
+
It provides:
|
261 |
+
- Classification of the accent (British, American, etc.)
|
262 |
+
- Confidence score for English proficiency
|
263 |
+
- Explanation of accent characteristics
|
264 |
+
""")
|
265 |
+
|
266 |
+
# Create tabs for different input methods
|
267 |
+
tab1, tab2 = st.tabs(["Video URL", "Upload Audio"])
|
268 |
|
269 |
+
with tab1:
|
270 |
+
url = st.text_input("Enter a public video URL (e.g. Loom, YouTube, or direct MP4 link)")
|
271 |
+
|
272 |
+
if st.button("Analyze Video"):
|
273 |
+
if not url:
|
274 |
+
st.warning("Please enter a valid URL")
|
275 |
+
else:
|
276 |
+
try:
|
277 |
+
# Create a placeholder for status updates
|
278 |
+
status = st.empty()
|
279 |
+
|
280 |
+
# Generate unique filenames using timestamp to avoid conflicts
|
281 |
+
timestamp = str(int(time.time()))
|
282 |
+
video_path = f"video_{timestamp}.mp4"
|
283 |
+
audio_path = f"audio_{timestamp}.wav"
|
284 |
+
|
285 |
+
# Download and process the video
|
286 |
+
status.text("Downloading video...")
|
287 |
+
download_success = download_video(url, video_path)
|
288 |
+
if not download_success:
|
289 |
+
st.error("Failed to download video")
|
290 |
+
else:
|
291 |
+
status.text("Extracting audio...")
|
292 |
+
extract_success = extract_audio(video_path, audio_path)
|
293 |
+
if not extract_success:
|
294 |
+
st.error("Failed to extract audio")
|
295 |
+
else:
|
296 |
+
status.text("Analyzing accent... (this may take a moment)")
|
297 |
+
detector = AccentDetector()
|
298 |
+
results = detector.analyze_audio(audio_path)
|
299 |
+
|
300 |
+
# Display results
|
301 |
+
st.success("β
Analysis Complete!")
|
302 |
+
|
303 |
+
# Create columns for results
|
304 |
+
col1, col2 = st.columns([2, 1])
|
305 |
+
|
306 |
+
with col1:
|
307 |
+
st.subheader("Accent Analysis Results")
|
308 |
+
st.markdown(f"**Detected Accent:** {results['accent']}")
|
309 |
+
st.markdown(f"**English Proficiency:** {results['english_confidence']:.1f}%")
|
310 |
+
st.markdown(f"**Accent Confidence:** {results['accent_confidence']:.1f}%")
|
311 |
+
|
312 |
+
# Show explanation in a box
|
313 |
+
st.markdown("### Expert Analysis")
|
314 |
+
st.info(results['explanation'])
|
315 |
+
|
316 |
+
with col2:
|
317 |
+
if results['audio_viz']:
|
318 |
+
st.pyplot(results['audio_viz'])
|
319 |
+
|
320 |
+
# Show audio playback
|
321 |
+
st.audio(audio_path)
|
322 |
+
|
323 |
+
# Clean up files
|
324 |
+
try:
|
325 |
+
if os.path.exists(video_path):
|
326 |
+
os.remove(video_path)
|
327 |
+
if os.path.exists(audio_path):
|
328 |
+
os.remove(audio_path)
|
329 |
+
except Exception as e:
|
330 |
+
st.warning(f"Couldn't clean up temporary files: {str(e)}")
|
331 |
+
|
332 |
+
except Exception as e:
|
333 |
+
st.error(f"Error during analysis: {str(e)}")
|
334 |
|
335 |
+
with tab2:
|
336 |
+
uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a", "ogg"])
|
337 |
+
|
338 |
+
if uploaded_file is not None:
|
339 |
+
st.audio(uploaded_file)
|
340 |
+
|
341 |
+
if st.button("Analyze Audio"):
|
342 |
+
with st.spinner("Analyzing audio... (this may take a moment)"):
|
343 |
+
try:
|
344 |
+
results = process_uploaded_audio(uploaded_file)
|
345 |
+
|
346 |
+
# Display results
|
347 |
+
st.success("β
Analysis Complete!")
|
348 |
+
|
349 |
+
# Create columns for results
|
350 |
+
col1, col2 = st.columns([2, 1])
|
351 |
+
|
352 |
+
with col1:
|
353 |
+
st.subheader("Accent Analysis Results")
|
354 |
+
st.markdown(f"**Detected Accent:** {results['accent']}")
|
355 |
+
st.markdown(f"**English Proficiency:** {results['english_confidence']:.1f}%")
|
356 |
+
st.markdown(f"**Accent Confidence:** {results['accent_confidence']:.1f}%")
|
357 |
+
|
358 |
+
# Show explanation in a box
|
359 |
+
st.markdown("### Expert Analysis")
|
360 |
+
st.info(results['explanation'])
|
361 |
+
|
362 |
+
with col2:
|
363 |
+
if results['audio_viz']:
|
364 |
+
st.pyplot(results['audio_viz'])
|
365 |
+
|
366 |
+
except Exception as e:
|
367 |
+
st.error(f"Error during analysis: {str(e)}")
|
368 |
|
369 |
+
# Add footer with deployment info
|
370 |
+
st.markdown("---")
|
371 |
+
st.markdown("Deployed using Streamlit β’ Built with SpeechBrain and Transformers")
|
|
|
372 |
|
373 |
+
# Add a section for how it works
|
374 |
+
with st.expander("βΉοΈ How It Works"):
|
375 |
+
st.markdown("""
|
376 |
+
This app uses a multi-stage process to analyze a speaker's accent:
|
377 |
+
|
378 |
+
1. **Audio Extraction**: The audio track is extracted from the input video or directly processed from uploaded audio.
|
379 |
+
|
380 |
+
2. **Language Identification**: First, we determine if the speech is English using SpeechBrain's language identification model.
|
381 |
+
|
382 |
+
3. **Accent Classification**: For English speech, we analyze the specific accent using a transformer-based model trained on diverse accent data.
|
383 |
+
|
384 |
+
4. **English Proficiency Score**: A confidence score is calculated based on both language identification and accent clarity.
|
385 |
+
|
386 |
+
5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
|
387 |
+
""")
|