Spaces:

AnalysisWithMSR
/

SEO

Sleeping

App Files Files Community

AnalysisWithMSR commited on Dec 14, 2024

Commit

3833cc4

verified ·

1 Parent(s): f8db1f8

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -16

app.py CHANGED Viewed

@@ -86,41 +86,63 @@ def download_and_transcribe_with_whisper(youtube_url):
 def get_transcript_from_youtube_api(video_id, video_length):
     """Fetches transcript using YouTube API if available."""
     try:
         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
         for transcript in transcript_list:
-            if not transcript.is_generated:
-                segments = transcript.fetch()
-                return " ".join(segment['text'] for segment in segments)
         if video_length > 15:
             auto_transcript = transcript_list.find_generated_transcript(['en'])
             if auto_transcript:
-                segments = auto_transcript.fetch()
-                return " ".join(segment['text'] for segment in segments)
-        print("Manual transcript not available, and video is too short for auto-transcript.")
-        return None
     except Exception as e:
         print(f"Error fetching transcript: {e}")
         return None
 def get_transcript(youtube_url, api_key):
     """Gets transcript from YouTube API or Whisper if unavailable."""
-    video_id = extract_video_id(youtube_url)
-    if not video_id:
-        print("Invalid or unsupported YouTube URL.")
-        return None
     video_length = get_video_duration(video_id, api_key)
     if video_length is not None:
-        print(f"Video length: {video_length:.2f} minutes.")
         transcript = get_transcript_from_youtube_api(video_id, video_length)
         if transcript:
             return transcript
-        print("Using Whisper for transcription.")
-        return download_and_transcribe_with_whisper(youtube_url)
     else:
         print("Error fetching video duration.")
         return None

 def get_transcript_from_youtube_api(video_id, video_length):
     """Fetches transcript using YouTube API if available."""
     try:
+        # Fetch available transcripts
         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+        # Look for manually created transcripts first
         for transcript in transcript_list:
+            if not transcript.is_generated:  # This checks for manually created transcripts
+                manual_transcript = transcript.fetch()
+                # Check if manual_transcript is iterable (should be a list)
+                if isinstance(manual_transcript, list):
+                    full_transcript = " ".join([segment['text'] for segment in manual_transcript])
+                    return full_transcript  # Return manual transcript immediately
+                else:
+                    print("Manual transcript is not iterable.")
+                    return None
+        # If no manual transcript found, proceed to auto-generated transcript
         if video_length > 15:
+            # Video is longer than 15 minutes, so use auto-generated transcript
+            print("Video is longer than 15 minutes, using auto-generated transcript.")
             auto_transcript = transcript_list.find_generated_transcript(['en'])
             if auto_transcript:
+                # Extract the text from the auto-generated transcript
+                full_transcript = " ".join([segment['text'] for segment in auto_transcript.fetch()])
+                return full_transcript  # Return auto-generated transcript
+            else:
+                print("No auto-generated transcript available.")
+                return None
+        else:
+            # Video is shorter than 15 minutes, use Whisper for transcription
+            print("Video is shorter than 15 minutes, using Whisper for transcription.")
+            return None  # This will be handled by Whisper in your main function
     except Exception as e:
         print(f"Error fetching transcript: {e}")
         return None
 def get_transcript(youtube_url, api_key):
     """Gets transcript from YouTube API or Whisper if unavailable."""
+    video_id = youtube_url.split("v=")[-1]  # Extract the video ID from URL
     video_length = get_video_duration(video_id, api_key)
     if video_length is not None:
+        print(f"Video length: {video_length} minutes.")
+        # Fetch transcript using YouTube API
         transcript = get_transcript_from_youtube_api(video_id, video_length)
+        # If a transcript is found from YouTube, use it
         if transcript:
+            print("Transcript found.")
             return transcript
+        else:
+            # No transcript found from YouTube API, proceed with Whisper
+            print("No transcript found on YouTube, using Whisper for transcription.")
+            return download_and_transcribe_with_whisper(youtube_url)  # Use Whisper for short videos
     else:
         print("Error fetching video duration.")
         return None