File size: 6,304 Bytes
98c4440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# -*- coding: utf-8 -*-
"""Accent.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1yprWdRUXGqD4QIFAZuMwdyTuwA2Hhdvj
"""

# Install needed libraries (run this cell first!)
!pip install --quiet yt-dlp ffmpeg-python torch torchaudio transformers streamlit speechbrain

import os
import subprocess
import torchaudio
import torch
from speechbrain.pretrained import EncoderClassifier
import yt_dlp

# Paste your video URL here (YouTube or direct MP4 link)
VIDEO_URL = "https://youtu.be/DDjWTWHHkpk?si=oIj6Fuy8Hg2E8U_l"  # Example: Replace with your actual link!

def download_video(url, out_path="input_video.mp4"):
    """
    Downloads a video from YouTube or direct MP4 link.
    Returns the filename of the downloaded video.
    """
    # If it's a YouTube link, use yt-dlp
    if "youtube.com" in url or "youtu.be" in url:
        ydl_opts = {'outtmpl': out_path}
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
    else:
        # For direct links, use wget/curl fallback
        os.system(f"wget -O {out_path} {url}")
    return out_path

video_file = download_video(VIDEO_URL)
print(f"Downloaded video: {video_file}")

def extract_audio(video_path, audio_path="audio.wav"):
    """
    Extracts audio from a video file using ffmpeg.
    Returns the filename of the audio file.
    """
    # Remove if already exists
    if os.path.exists(audio_path):
        os.remove(audio_path)
    # Extract audio with ffmpeg
    cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}"
    subprocess.call(cmd, shell=True)
    return audio_path

audio_file = extract_audio(video_file)
print(f"Extracted audio file: {audio_file}")

def extract_audio(video_path, audio_path="/content/audio.wav"):
    """
    Extracts audio from a video file using ffmpeg.
    Returns the filename of the audio file.
    """
    # Remove if already exists
    if os.path.exists(audio_path):
        os.remove(audio_path)
    # Extract audio with ffmpeg
    cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}"
    # Use subprocess.run to capture output and check the return code
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

    if result.returncode != 0:
        print(f"FFmpeg command failed with error code {result.returncode}")
        print("FFmpeg stderr:")
        print(result.stderr)
        # Optionally, raise an error or exit if audio extraction fails
        raise RuntimeError(f"Failed to extract audio using FFmpeg. See stderr above.")
    else:
        print("FFmpeg stdout:")
        print(result.stdout)
        print("FFmpeg stderr:")
        print(result.stderr) # ffmpeg often outputs info/warnings to stderr

    # Check if the audio file was actually created
    if not os.path.exists(audio_path):
         raise FileNotFoundError(f"Audio file '{audio_path}' was not created after FFmpeg execution.")

    return audio_path

# Download the pre-trained English accent classifier (SpeechBrain)
accent_model = EncoderClassifier.from_hparams(
    source="speechbrain/lang-id-commonlanguage_ecapa",
    savedir="tmp_accent_model"
)

"""Used to Debuging the code"""

# List the files to see if input_video.mp4 is present
import os
print(os.listdir('.'))

"""TO check the debug file path"""

# Try extracting audio again, but print output to check for errors
video_path = "/content/input_video.mp4.webm"  # or whatever your filename is!
audio_path = "audio.wav"

os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}")

# See if audio.wav was created
print(os.listdir('.'))

"""Check the Size of the file"""

# Check if the file now exists and get its size
import os
print("audio.wav exists:", os.path.exists(audio_path))
if os.path.exists(audio_path):
    print("audio.wav size (bytes):", os.path.getsize(audio_path))

# Load the audio file (must be 16kHz mono)
signal, fs = torchaudio.load(audio_file)

# If stereo, take only the first channel
if signal.shape[0] > 1:
    signal = signal[0].unsqueeze(0)

# Run classification
prediction = accent_model.classify_batch(signal)
pred_label = prediction[3][0]
pred_scores = prediction[1][0]

# Convert score to percentage
confidence = float(pred_scores.max()) * 100

# Display top label and score
print(f"Predicted Accent: {pred_label}")
print(f"Confidence: {confidence:.1f}%")
print("Possible accent labels:", accent_model.hparams.label_encoder.lab2ind.keys())

explanation = f"The speaker's English accent was classified as '{pred_label}' with a confidence score of {confidence:.1f}%. This means the model is {confidence:.0f}% sure the person sounds most similar to this accent group."

print(explanation)

# Save as app.py in Colab for launching a simple web UI
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
import os
import subprocess
import torchaudio
from speechbrain.pretrained import EncoderClassifier

st.title("🗣️ English Accent Classifier (Proof of Concept)")

url = st.text_input("Enter public video URL (YouTube or direct MP4):")
if st.button("Analyze"):
    with st.spinner("Downloading video..."):
        if "youtube.com" in url or "youtu.be" in url:
            os.system(f'yt-dlp -o input_video.mp4 "{url}"')
        else:
            os.system(f'wget -O input_video.mp4 "{url}"')
    with st.spinner("Extracting audio..."):
        os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav")
    with st.spinner("Classifying accent..."):
        accent_model = EncoderClassifier.from_hparams(
            source="speechbrain/lang-id-commonlanguage_ecapa",
            savedir="tmp_accent_model"
        )
        signal, fs = torchaudio.load("audio.wav")
        if signal.shape[0] > 1:
            signal = signal[0].unsqueeze(0)
        prediction = accent_model.classify_batch(signal)
        pred_label = prediction[3][0]
        pred_scores = prediction[1][0]
        confidence = float(pred_scores.max()) * 100
        st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)")
        st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.")
''')

print("Streamlit app code saved as app.py!")
print("To launch the UI, run: !streamlit run app.py --server.headless true --server.port 8501")