EdgarDataScientist commited on
Commit
2685e79
·
verified ·
1 Parent(s): f6f6edc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -2,37 +2,36 @@ import gradio as gr
2
  from moviepy.editor import VideoFileClip
3
  from speechbrain.pretrained import EncoderClassifier
4
  import torchaudio
5
- from pytube import YouTube
 
 
6
  import os
7
 
8
  CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
9
 
10
  def download_video(url):
11
- """Handles YouTube and direct video links with error handling"""
12
  try:
13
  if "youtube.com" in url or "youtu.be" in url:
14
- yt = YouTube(url)
 
15
  stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
16
  if not stream:
17
  raise ValueError("No suitable video stream found.")
18
- video_path = stream.download() # Store the download path
19
  return video_path
20
  else:
21
  # For direct MP4 links, download file
22
- import requests
23
  local_filename = "temp_video.mp4"
24
  with requests.get(url, stream=True) as r:
25
  r.raise_for_status()
26
  with open(local_filename, 'wb') as f:
27
  for chunk in r.iter_content(chunk_size=8192):
28
  f.write(chunk)
29
- video_path = local_filename # Store the download path
30
- return video_path
31
  except Exception as e:
32
  raise RuntimeError(f"Failed to download video: {e}")
33
 
34
-
35
-
36
  def extract_audio(video_path):
37
  clip = VideoFileClip(video_path)
38
  audio_path = "temp_audio.wav"
@@ -44,11 +43,10 @@ def classify_accent(audio_path):
44
  classifier = EncoderClassifier.from_hparams(
45
  source=CLASSIFIER,
46
  savedir="pretrained_models/accent_classifier",
47
- run_opts={"device":"cpu"} # or "cuda" if GPU available
48
  )
49
  waveform, sample_rate = torchaudio.load(audio_path)
50
  prediction = classifier.classify_batch(waveform)
51
- # prediction format: (scores, probabilities, embeddings, predicted_labels)
52
  predicted_accent = prediction[3][0]
53
  confidence = prediction[1].exp().max().item() * 100
54
  return predicted_accent, f"{confidence:.2f}%"
@@ -68,8 +66,6 @@ def process_video(url):
68
  if f and os.path.exists(f):
69
  os.remove(f)
70
 
71
-
72
- # Gradio interface
73
  iface = gr.Interface(
74
  fn=process_video,
75
  inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
@@ -83,3 +79,4 @@ iface = gr.Interface(
83
 
84
  if __name__ == "__main__":
85
  iface.launch()
 
 
2
  from moviepy.editor import VideoFileClip
3
  from speechbrain.pretrained import EncoderClassifier
4
  import torchaudio
5
+ from pytubefix import YouTube
6
+ from pytubefix.cli import on_progress
7
+ import requests
8
  import os
9
 
10
  CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
11
 
12
  def download_video(url):
13
+ """Handles YouTube and direct video links with pytubefix and error handling"""
14
  try:
15
  if "youtube.com" in url or "youtu.be" in url:
16
+ yt = YouTube(url, on_progress_callback=on_progress)
17
+ # Get progressive mp4 streams (video + audio combined)
18
  stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
19
  if not stream:
20
  raise ValueError("No suitable video stream found.")
21
+ video_path = stream.download()
22
  return video_path
23
  else:
24
  # For direct MP4 links, download file
 
25
  local_filename = "temp_video.mp4"
26
  with requests.get(url, stream=True) as r:
27
  r.raise_for_status()
28
  with open(local_filename, 'wb') as f:
29
  for chunk in r.iter_content(chunk_size=8192):
30
  f.write(chunk)
31
+ return local_filename
 
32
  except Exception as e:
33
  raise RuntimeError(f"Failed to download video: {e}")
34
 
 
 
35
  def extract_audio(video_path):
36
  clip = VideoFileClip(video_path)
37
  audio_path = "temp_audio.wav"
 
43
  classifier = EncoderClassifier.from_hparams(
44
  source=CLASSIFIER,
45
  savedir="pretrained_models/accent_classifier",
46
+ run_opts={"device":"cpu"} # Change to "cuda" if GPU available
47
  )
48
  waveform, sample_rate = torchaudio.load(audio_path)
49
  prediction = classifier.classify_batch(waveform)
 
50
  predicted_accent = prediction[3][0]
51
  confidence = prediction[1].exp().max().item() * 100
52
  return predicted_accent, f"{confidence:.2f}%"
 
66
  if f and os.path.exists(f):
67
  os.remove(f)
68
 
 
 
69
  iface = gr.Interface(
70
  fn=process_video,
71
  inputs=gr.Textbox(label="Enter Public Video URL (YouTube, Loom, direct MP4)"),
 
79
 
80
  if __name__ == "__main__":
81
  iface.launch()
82
+