Spaces:

AnalysisWithMSR
/

SEO

Sleeping

AnalysisWithMSR commited on Dec 14, 2024

Commit

6070eee

verified ·

1 Parent(s): 89457db

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -55,16 +55,22 @@ def get_video_duration(video_id, api_key):
 def download_and_transcribe_with_whisper(youtube_url):
     try:
-        # Temporary directory for storing the downloaded audio
-        with tempfile.TemporaryDirectory() as temp_dir:
-            temp_audio_file = os.path.join(temp_dir, "audio.mp4")  # Pytube downloads in mp4 format
-            # Download audio using pytube
-            yt = YouTube(youtube_url)
-            audio_stream = yt.streams.filter(only_audio=True).first()  # Get the first available audio stream
-            audio_stream.download(output_path=temp_dir, filename="audio.mp4")  # Download audio to temp dir
-            # Convert the downloaded audio (mp4) to wav for Whisper
             audio = AudioSegment.from_file(temp_audio_file)
             wav_file = os.path.join(temp_dir, "audio.wav")
             audio.export(wav_file, format="wav")

 def download_and_transcribe_with_whisper(youtube_url):
     try:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_audio_file = os.path.join(temp_dir, "audio.mp3")
+            ydl_opts = {
+                'format': 'bestaudio/best',
+                'outtmpl': temp_audio_file,
+                'extractaudio': True,
+                'audioquality': 1,
+            }
+            # Download audio using yt-dlp
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([youtube_url])
+            # Convert to wav for Whisper
             audio = AudioSegment.from_file(temp_audio_file)
             wav_file = os.path.join(temp_dir, "audio.wav")
             audio.export(wav_file, format="wav")