Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -86,41 +86,63 @@ def download_and_transcribe_with_whisper(youtube_url):
|
|
86 |
def get_transcript_from_youtube_api(video_id, video_length):
|
87 |
"""Fetches transcript using YouTube API if available."""
|
88 |
try:
|
|
|
89 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
90 |
|
|
|
91 |
for transcript in transcript_list:
|
92 |
-
if not transcript.is_generated:
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
if video_length > 15:
|
|
|
|
|
97 |
auto_transcript = transcript_list.find_generated_transcript(['en'])
|
98 |
if auto_transcript:
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
|
103 |
-
|
|
|
|
|
104 |
|
105 |
except Exception as e:
|
106 |
print(f"Error fetching transcript: {e}")
|
107 |
return None
|
108 |
|
|
|
109 |
def get_transcript(youtube_url, api_key):
|
110 |
"""Gets transcript from YouTube API or Whisper if unavailable."""
|
111 |
-
video_id =
|
112 |
-
if not video_id:
|
113 |
-
print("Invalid or unsupported YouTube URL.")
|
114 |
-
return None
|
115 |
-
|
116 |
video_length = get_video_duration(video_id, api_key)
|
|
|
117 |
if video_length is not None:
|
118 |
-
print(f"Video length: {video_length
|
|
|
|
|
119 |
transcript = get_transcript_from_youtube_api(video_id, video_length)
|
|
|
|
|
120 |
if transcript:
|
|
|
121 |
return transcript
|
122 |
-
|
123 |
-
|
|
|
|
|
124 |
else:
|
125 |
print("Error fetching video duration.")
|
126 |
return None
|
|
|
86 |
def get_transcript_from_youtube_api(video_id, video_length):
|
87 |
"""Fetches transcript using YouTube API if available."""
|
88 |
try:
|
89 |
+
# Fetch available transcripts
|
90 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
91 |
|
92 |
+
# Look for manually created transcripts first
|
93 |
for transcript in transcript_list:
|
94 |
+
if not transcript.is_generated: # This checks for manually created transcripts
|
95 |
+
manual_transcript = transcript.fetch()
|
96 |
+
# Check if manual_transcript is iterable (should be a list)
|
97 |
+
if isinstance(manual_transcript, list):
|
98 |
+
full_transcript = " ".join([segment['text'] for segment in manual_transcript])
|
99 |
+
return full_transcript # Return manual transcript immediately
|
100 |
+
else:
|
101 |
+
print("Manual transcript is not iterable.")
|
102 |
+
return None
|
103 |
+
|
104 |
+
# If no manual transcript found, proceed to auto-generated transcript
|
105 |
if video_length > 15:
|
106 |
+
# Video is longer than 15 minutes, so use auto-generated transcript
|
107 |
+
print("Video is longer than 15 minutes, using auto-generated transcript.")
|
108 |
auto_transcript = transcript_list.find_generated_transcript(['en'])
|
109 |
if auto_transcript:
|
110 |
+
# Extract the text from the auto-generated transcript
|
111 |
+
full_transcript = " ".join([segment['text'] for segment in auto_transcript.fetch()])
|
112 |
+
return full_transcript # Return auto-generated transcript
|
113 |
+
else:
|
114 |
+
print("No auto-generated transcript available.")
|
115 |
+
return None
|
116 |
|
117 |
+
else:
|
118 |
+
# Video is shorter than 15 minutes, use Whisper for transcription
|
119 |
+
print("Video is shorter than 15 minutes, using Whisper for transcription.")
|
120 |
+
return None # This will be handled by Whisper in your main function
|
121 |
|
122 |
except Exception as e:
|
123 |
print(f"Error fetching transcript: {e}")
|
124 |
return None
|
125 |
|
126 |
+
|
127 |
def get_transcript(youtube_url, api_key):
|
128 |
"""Gets transcript from YouTube API or Whisper if unavailable."""
|
129 |
+
video_id = youtube_url.split("v=")[-1] # Extract the video ID from URL
|
|
|
|
|
|
|
|
|
130 |
video_length = get_video_duration(video_id, api_key)
|
131 |
+
|
132 |
if video_length is not None:
|
133 |
+
print(f"Video length: {video_length} minutes.")
|
134 |
+
|
135 |
+
# Fetch transcript using YouTube API
|
136 |
transcript = get_transcript_from_youtube_api(video_id, video_length)
|
137 |
+
|
138 |
+
# If a transcript is found from YouTube, use it
|
139 |
if transcript:
|
140 |
+
print("Transcript found.")
|
141 |
return transcript
|
142 |
+
else:
|
143 |
+
# No transcript found from YouTube API, proceed with Whisper
|
144 |
+
print("No transcript found on YouTube, using Whisper for transcription.")
|
145 |
+
return download_and_transcribe_with_whisper(youtube_url) # Use Whisper for short videos
|
146 |
else:
|
147 |
print("Error fetching video duration.")
|
148 |
return None
|