AnalysisWithMSR commited on
Commit
3833cc4
·
verified ·
1 Parent(s): f8db1f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -16
app.py CHANGED
@@ -86,41 +86,63 @@ def download_and_transcribe_with_whisper(youtube_url):
86
  def get_transcript_from_youtube_api(video_id, video_length):
87
  """Fetches transcript using YouTube API if available."""
88
  try:
 
89
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
90
 
 
91
  for transcript in transcript_list:
92
- if not transcript.is_generated:
93
- segments = transcript.fetch()
94
- return " ".join(segment['text'] for segment in segments)
95
-
 
 
 
 
 
 
 
96
  if video_length > 15:
 
 
97
  auto_transcript = transcript_list.find_generated_transcript(['en'])
98
  if auto_transcript:
99
- segments = auto_transcript.fetch()
100
- return " ".join(segment['text'] for segment in segments)
 
 
 
 
101
 
102
- print("Manual transcript not available, and video is too short for auto-transcript.")
103
- return None
 
 
104
 
105
  except Exception as e:
106
  print(f"Error fetching transcript: {e}")
107
  return None
108
 
 
109
  def get_transcript(youtube_url, api_key):
110
  """Gets transcript from YouTube API or Whisper if unavailable."""
111
- video_id = extract_video_id(youtube_url)
112
- if not video_id:
113
- print("Invalid or unsupported YouTube URL.")
114
- return None
115
-
116
  video_length = get_video_duration(video_id, api_key)
 
117
  if video_length is not None:
118
- print(f"Video length: {video_length:.2f} minutes.")
 
 
119
  transcript = get_transcript_from_youtube_api(video_id, video_length)
 
 
120
  if transcript:
 
121
  return transcript
122
- print("Using Whisper for transcription.")
123
- return download_and_transcribe_with_whisper(youtube_url)
 
 
124
  else:
125
  print("Error fetching video duration.")
126
  return None
 
86
  def get_transcript_from_youtube_api(video_id, video_length):
87
  """Fetches transcript using YouTube API if available."""
88
  try:
89
+ # Fetch available transcripts
90
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
91
 
92
+ # Look for manually created transcripts first
93
  for transcript in transcript_list:
94
+ if not transcript.is_generated: # This checks for manually created transcripts
95
+ manual_transcript = transcript.fetch()
96
+ # Check if manual_transcript is iterable (should be a list)
97
+ if isinstance(manual_transcript, list):
98
+ full_transcript = " ".join([segment['text'] for segment in manual_transcript])
99
+ return full_transcript # Return manual transcript immediately
100
+ else:
101
+ print("Manual transcript is not iterable.")
102
+ return None
103
+
104
+ # If no manual transcript found, proceed to auto-generated transcript
105
  if video_length > 15:
106
+ # Video is longer than 15 minutes, so use auto-generated transcript
107
+ print("Video is longer than 15 minutes, using auto-generated transcript.")
108
  auto_transcript = transcript_list.find_generated_transcript(['en'])
109
  if auto_transcript:
110
+ # Extract the text from the auto-generated transcript
111
+ full_transcript = " ".join([segment['text'] for segment in auto_transcript.fetch()])
112
+ return full_transcript # Return auto-generated transcript
113
+ else:
114
+ print("No auto-generated transcript available.")
115
+ return None
116
 
117
+ else:
118
+ # Video is shorter than 15 minutes, use Whisper for transcription
119
+ print("Video is shorter than 15 minutes, using Whisper for transcription.")
120
+ return None # This will be handled by Whisper in your main function
121
 
122
  except Exception as e:
123
  print(f"Error fetching transcript: {e}")
124
  return None
125
 
126
+
127
  def get_transcript(youtube_url, api_key):
128
  """Gets transcript from YouTube API or Whisper if unavailable."""
129
+ video_id = youtube_url.split("v=")[-1] # Extract the video ID from URL
 
 
 
 
130
  video_length = get_video_duration(video_id, api_key)
131
+
132
  if video_length is not None:
133
+ print(f"Video length: {video_length} minutes.")
134
+
135
+ # Fetch transcript using YouTube API
136
  transcript = get_transcript_from_youtube_api(video_id, video_length)
137
+
138
+ # If a transcript is found from YouTube, use it
139
  if transcript:
140
+ print("Transcript found.")
141
  return transcript
142
+ else:
143
+ # No transcript found from YouTube API, proceed with Whisper
144
+ print("No transcript found on YouTube, using Whisper for transcription.")
145
+ return download_and_transcribe_with_whisper(youtube_url) # Use Whisper for short videos
146
  else:
147
  print("Error fetching video duration.")
148
  return None