Spaces:
Sleeping
Sleeping
File size: 6,304 Bytes
98c4440 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
# -*- coding: utf-8 -*-
"""Accent.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1yprWdRUXGqD4QIFAZuMwdyTuwA2Hhdvj
"""
# Install needed libraries (run this cell first!)
!pip install --quiet yt-dlp ffmpeg-python torch torchaudio transformers streamlit speechbrain
import os
import subprocess
import torchaudio
import torch
from speechbrain.pretrained import EncoderClassifier
import yt_dlp
# Paste your video URL here (YouTube or direct MP4 link)
VIDEO_URL = "https://youtu.be/DDjWTWHHkpk?si=oIj6Fuy8Hg2E8U_l" # Example: Replace with your actual link!
def download_video(url, out_path="input_video.mp4"):
"""
Downloads a video from YouTube or direct MP4 link.
Returns the filename of the downloaded video.
"""
# If it's a YouTube link, use yt-dlp
if "youtube.com" in url or "youtu.be" in url:
ydl_opts = {'outtmpl': out_path}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
else:
# For direct links, use wget/curl fallback
os.system(f"wget -O {out_path} {url}")
return out_path
video_file = download_video(VIDEO_URL)
print(f"Downloaded video: {video_file}")
def extract_audio(video_path, audio_path="audio.wav"):
"""
Extracts audio from a video file using ffmpeg.
Returns the filename of the audio file.
"""
# Remove if already exists
if os.path.exists(audio_path):
os.remove(audio_path)
# Extract audio with ffmpeg
cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}"
subprocess.call(cmd, shell=True)
return audio_path
audio_file = extract_audio(video_file)
print(f"Extracted audio file: {audio_file}")
def extract_audio(video_path, audio_path="/content/audio.wav"):
"""
Extracts audio from a video file using ffmpeg.
Returns the filename of the audio file.
"""
# Remove if already exists
if os.path.exists(audio_path):
os.remove(audio_path)
# Extract audio with ffmpeg
cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}"
# Use subprocess.run to capture output and check the return code
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"FFmpeg command failed with error code {result.returncode}")
print("FFmpeg stderr:")
print(result.stderr)
# Optionally, raise an error or exit if audio extraction fails
raise RuntimeError(f"Failed to extract audio using FFmpeg. See stderr above.")
else:
print("FFmpeg stdout:")
print(result.stdout)
print("FFmpeg stderr:")
print(result.stderr) # ffmpeg often outputs info/warnings to stderr
# Check if the audio file was actually created
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio file '{audio_path}' was not created after FFmpeg execution.")
return audio_path
# Download the pre-trained English accent classifier (SpeechBrain)
accent_model = EncoderClassifier.from_hparams(
source="speechbrain/lang-id-commonlanguage_ecapa",
savedir="tmp_accent_model"
)
"""Used to Debuging the code"""
# List the files to see if input_video.mp4 is present
import os
print(os.listdir('.'))
"""TO check the debug file path"""
# Try extracting audio again, but print output to check for errors
video_path = "/content/input_video.mp4.webm" # or whatever your filename is!
audio_path = "audio.wav"
os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}")
# See if audio.wav was created
print(os.listdir('.'))
"""Check the Size of the file"""
# Check if the file now exists and get its size
import os
print("audio.wav exists:", os.path.exists(audio_path))
if os.path.exists(audio_path):
print("audio.wav size (bytes):", os.path.getsize(audio_path))
# Load the audio file (must be 16kHz mono)
signal, fs = torchaudio.load(audio_file)
# If stereo, take only the first channel
if signal.shape[0] > 1:
signal = signal[0].unsqueeze(0)
# Run classification
prediction = accent_model.classify_batch(signal)
pred_label = prediction[3][0]
pred_scores = prediction[1][0]
# Convert score to percentage
confidence = float(pred_scores.max()) * 100
# Display top label and score
print(f"Predicted Accent: {pred_label}")
print(f"Confidence: {confidence:.1f}%")
print("Possible accent labels:", accent_model.hparams.label_encoder.lab2ind.keys())
explanation = f"The speaker's English accent was classified as '{pred_label}' with a confidence score of {confidence:.1f}%. This means the model is {confidence:.0f}% sure the person sounds most similar to this accent group."
print(explanation)
# Save as app.py in Colab for launching a simple web UI
with open("app.py", "w") as f:
f.write('''
import streamlit as st
import os
import subprocess
import torchaudio
from speechbrain.pretrained import EncoderClassifier
st.title("🗣️ English Accent Classifier (Proof of Concept)")
url = st.text_input("Enter public video URL (YouTube or direct MP4):")
if st.button("Analyze"):
with st.spinner("Downloading video..."):
if "youtube.com" in url or "youtu.be" in url:
os.system(f'yt-dlp -o input_video.mp4 "{url}"')
else:
os.system(f'wget -O input_video.mp4 "{url}"')
with st.spinner("Extracting audio..."):
os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav")
with st.spinner("Classifying accent..."):
accent_model = EncoderClassifier.from_hparams(
source="speechbrain/lang-id-commonlanguage_ecapa",
savedir="tmp_accent_model"
)
signal, fs = torchaudio.load("audio.wav")
if signal.shape[0] > 1:
signal = signal[0].unsqueeze(0)
prediction = accent_model.classify_batch(signal)
pred_label = prediction[3][0]
pred_scores = prediction[1][0]
confidence = float(pred_scores.max()) * 100
st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)")
st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.")
''')
print("Streamlit app code saved as app.py!")
print("To launch the UI, run: !streamlit run app.py --server.headless true --server.port 8501") |