EdgarDataScientist commited on
Commit
5ca9307
·
verified ·
1 Parent(s): d23d38e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -6,22 +6,26 @@ from pytubefix import YouTube
6
  from pytubefix.cli import on_progress
7
  import requests
8
  import os
 
9
 
10
  CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
11
 
 
 
 
 
12
  def download_video(url):
13
- """Handles YouTube and direct video links with pytubefix and error handling"""
14
  try:
15
  if "youtube.com" in url or "youtu.be" in url:
16
  yt = YouTube(url, on_progress_callback=on_progress)
17
- # Get progressive mp4 streams (video + audio combined)
18
  stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
19
  if not stream:
20
  raise ValueError("No suitable video stream found.")
21
  video_path = stream.download()
22
  return video_path
23
  else:
24
- # For direct MP4 links, download file
25
  local_filename = "temp_video.mp4"
26
  with requests.get(url, stream=True) as r:
27
  r.raise_for_status()
@@ -33,6 +37,7 @@ def download_video(url):
33
  raise RuntimeError(f"Failed to download video: {e}")
34
 
35
  def extract_audio(video_path):
 
36
  clip = VideoFileClip(video_path)
37
  audio_path = "temp_audio.wav"
38
  clip.audio.write_audiofile(audio_path, logger=None)
@@ -40,18 +45,21 @@ def extract_audio(video_path):
40
  return audio_path
41
 
42
  def classify_accent(audio_path):
 
 
43
  classifier = EncoderClassifier.from_hparams(
44
  source=CLASSIFIER,
45
  savedir="pretrained_models/accent_classifier",
46
- run_opts={"device":"cpu"} # Change to "cuda" if GPU available
47
  )
48
  waveform, sample_rate = torchaudio.load(audio_path)
49
- prediction = classifier.classify_batch(waveform)
50
  predicted_accent = prediction[3][0]
51
  confidence = prediction[1].exp().max().item() * 100
52
  return predicted_accent, f"{confidence:.2f}%"
53
 
54
  def process_video(url):
 
55
  video_path = None
56
  audio_path = None
57
  try:
@@ -62,6 +70,7 @@ def process_video(url):
62
  except Exception as e:
63
  return f"Error: {e}", ""
64
  finally:
 
65
  for f in [video_path, audio_path]:
66
  if f and os.path.exists(f):
67
  os.remove(f)
@@ -79,4 +88,3 @@ iface = gr.Interface(
79
 
80
  if __name__ == "__main__":
81
  iface.launch()
82
-
 
6
  from pytubefix.cli import on_progress
7
  import requests
8
  import os
9
+ import torch
10
 
11
  CLASSIFIER = "Jzuluaga/accent-id-commonaccent_xlsr-en-english"
12
 
13
+ def get_default_device():
14
+ """Return the default device (cuda if available, else cpu)."""
15
+ return torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
  def download_video(url):
18
+ """Download video from YouTube or direct MP4 URL using pytubefix."""
19
  try:
20
  if "youtube.com" in url or "youtu.be" in url:
21
  yt = YouTube(url, on_progress_callback=on_progress)
 
22
  stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
23
  if not stream:
24
  raise ValueError("No suitable video stream found.")
25
  video_path = stream.download()
26
  return video_path
27
  else:
28
+ # Direct MP4 file download
29
  local_filename = "temp_video.mp4"
30
  with requests.get(url, stream=True) as r:
31
  r.raise_for_status()
 
37
  raise RuntimeError(f"Failed to download video: {e}")
38
 
39
  def extract_audio(video_path):
40
+ """Extract audio from video and save as WAV file."""
41
  clip = VideoFileClip(video_path)
42
  audio_path = "temp_audio.wav"
43
  clip.audio.write_audiofile(audio_path, logger=None)
 
45
  return audio_path
46
 
47
  def classify_accent(audio_path):
48
+ """Classify English accent from audio file using SpeechBrain model."""
49
+ device = get_default_device()
50
  classifier = EncoderClassifier.from_hparams(
51
  source=CLASSIFIER,
52
  savedir="pretrained_models/accent_classifier",
53
+ run_opts={"device": str(device)}
54
  )
55
  waveform, sample_rate = torchaudio.load(audio_path)
56
+ prediction = classifier.classify_batch(waveform.to(device))
57
  predicted_accent = prediction[3][0]
58
  confidence = prediction[1].exp().max().item() * 100
59
  return predicted_accent, f"{confidence:.2f}%"
60
 
61
  def process_video(url):
62
+ """Main processing pipeline: download video, extract audio, classify accent."""
63
  video_path = None
64
  audio_path = None
65
  try:
 
70
  except Exception as e:
71
  return f"Error: {e}", ""
72
  finally:
73
+ # Clean up temporary files
74
  for f in [video_path, audio_path]:
75
  if f and os.path.exists(f):
76
  os.remove(f)
 
88
 
89
  if __name__ == "__main__":
90
  iface.launch()