EdgarDataScientist's picture
Update app.py
8cdbd03 verified
raw
history blame
4.16 kB
import streamlit as st
import os
import tempfile
import requests
import random
import matplotlib.pyplot as plt
import torchaudio
import torch
import ffmpeg
# Try loading SpeechBrain
try:
from speechbrain.inference import EncoderClassifier
classifier = EncoderClassifier.from_hparams(
source="speechbrain/lang-id-commonlanguage_ecapa",
savedir="pretrained_models/lang-id-commonlanguage_ecapa"
)
SB_READY = True
except Exception as e:
st.warning(" SpeechBrain model load failed. Falling back to simulation.")
SB_READY = False
# Accent Profiles for English detection
accent_profiles = {
"American": ["rhotic", "flapped_t", "cot_caught_merger"],
"British": ["non_rhotic", "t_glottalization", "trap_bath_split"],
"Australian": ["non_rhotic", "flat_a", "high_rising_terminal"],
"Canadian": ["rhotic", "canadian_raising", "eh_tag"],
"Indian": ["retroflex_consonants", "monophthongization", "syllable_timing"]
}
def simulate_accent_classification():
accent = random.choice(list(accent_profiles.keys()))
confidence = random.uniform(75, 98)
return {
"accent": accent,
"confidence": round(confidence, 2),
"summary": f"Simulated detection: {accent} accent with {confidence:.2f}% confidence."
}
def real_accent_classification(audio_path):
try:
signal, sr = torchaudio.load(audio_path)
if signal.shape[0] > 1:
signal = signal.mean(dim=0, keepdim=True)
if sr != 16000:
signal = torchaudio.transforms.Resample(sr, 16000)(signal)
signal = signal.unsqueeze(0)
pred = classifier.classify_batch(signal)
probs = pred[0].squeeze(0).tolist()
labels = pred[1][0]
lang_scores = {classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
top_lang = max(lang_scores, key=lang_scores.get)
if top_lang != "en":
return {"accent": "Non-English", "confidence": lang_scores[top_lang], "summary": f"Detected language: {top_lang}"}
# Simulate accent if English
result = simulate_accent_classification()
result["summary"] += f" (Base language: English)"
return result
except Exception as e:
return simulate_accent_classification()
def extract_audio(url_or_file, is_upload=False):
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, "input_video.mp4")
audio_path = os.path.join(temp_dir, "audio.wav")
if is_upload:
with open(video_path, "wb") as f:
f.write(url_or_file.read())
else:
with requests.get(url_or_file, stream=True) as r:
r.raise_for_status()
with open(video_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
ffmpeg.input(video_path).output(audio_path, ar=16000, ac=1).run(overwrite_output=True, quiet=True)
return audio_path
# --- Streamlit UI ---
st.set_page_config(page_title="English Accent Analyzer", layout="centered")
st.title("πŸ—£οΈ English Accent Analyzer")
st.markdown("### 🎯 Objective:\nUpload or link a video/audio of a speaker. We’ll detect if they're speaking English and simulate the accent.")
url_input = st.text_input("πŸ”— Paste public Loom or direct MP4/WAV link:")
uploaded_file = st.file_uploader("πŸ“ Or upload a video/audio file", type=["mp4", "wav"])
if st.button(" Analyze"):
if not url_input and not uploaded_file:
st.error("Please provide a valid URL or upload a file.")
else:
with st.spinner("Analyzing..."):
try:
audio_path = extract_audio(uploaded_file if uploaded_file else url_input, is_upload=bool(uploaded_file))
result = real_accent_classification(audio_path) if SB_READY else simulate_accent_classification()
st.success(f"🎧 Detected Accent: **{result['accent']}**")
st.metric("Confidence", f"{result['confidence']}%")
st.markdown(f"πŸ“ {result['summary']}")
except Exception as e:
st.error(f"❌ Error during analysis: {e}")