AnalysisWithMSR commited on
Commit
93d65a4
·
verified ·
1 Parent(s): 2c84b32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -103
app.py CHANGED
@@ -10,18 +10,8 @@ from youtube_transcript_api import YouTubeTranscriptApi
10
  import openai
11
  import json
12
  import os
13
- from pytube import YouTube
14
- from pytrends.request import TrendReq
15
- import torch
16
  from urllib.parse import urlparse, parse_qs
17
-
18
-
19
- YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
20
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
21
- if not YOUTUBE_API_KEY:
22
- return "YouTube API key is missing! Please set the `YOUTUBE_API_KEY` environment variable."
23
- if not OPENAI_API_KEY:
24
- return "OpenAI API key is missing! Please set the `OPENAI_API_KEY` environment variable."
25
 
26
  def extract_video_id(url):
27
  """Extracts the video ID from a YouTube URL."""
@@ -61,7 +51,6 @@ def get_video_duration(video_id, api_key):
61
 
62
  def download_and_transcribe_with_whisper(youtube_url):
63
  try:
64
-
65
  with tempfile.TemporaryDirectory() as temp_dir:
66
  temp_audio_file = os.path.join(temp_dir, "audio.mp3")
67
 
@@ -80,81 +69,35 @@ def download_and_transcribe_with_whisper(youtube_url):
80
  audio.export(wav_file, format="wav")
81
 
82
  # Run Whisper transcription
83
- model = whisper.load_model("turbo",weights_only=True)
84
  result = model.transcribe(wav_file)
85
- transcript = result['text']
86
- return transcript
87
 
88
  except Exception as e:
89
  print(f"Error during transcription: {e}")
90
  return None
91
 
92
- def get_transcript_from_youtube_api(video_id, video_length):
93
- """Fetches transcript using YouTube API if available."""
94
- try:
95
- # Fetch available transcripts
96
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
97
-
98
- # Look for manually created transcripts first
99
- for transcript in transcript_list:
100
- if not transcript.is_generated: # This checks for manually created transcripts
101
- manual_transcript = transcript.fetch()
102
- # Check if manual_transcript is iterable (should be a list)
103
- if isinstance(manual_transcript, list):
104
- full_transcript = " ".join([segment['text'] for segment in manual_transcript])
105
- return full_transcript # Return manual transcript immediately
106
- else:
107
- print("Manual transcript is not iterable.")
108
- return None
109
-
110
- # If no manual transcript found, proceed to auto-generated transcript
111
- if video_length > 15:
112
- # Video is longer than 15 minutes, so use auto-generated transcript
113
- print("Video is longer than 15 minutes, using auto-generated transcript.")
114
- auto_transcript = transcript_list.find_generated_transcript(['en'])
115
- if auto_transcript:
116
- # Extract the text from the auto-generated transcript
117
- full_transcript = " ".join([segment['text'] for segment in auto_transcript.fetch()])
118
- return full_transcript # Return auto-generated transcript
119
- else:
120
- print("No auto-generated transcript available.")
121
- return None
122
-
123
- else:
124
- # Video is shorter than 15 minutes, use Whisper for transcription
125
- print("Video is shorter than 15 minutes, using Whisper for transcription.")
126
- return None # This will be handled by Whisper in your main function
127
-
128
- except Exception as e:
129
- print(f"Error fetching transcript: {e}")
130
- return None
131
-
132
-
133
  def get_transcript(youtube_url, api_key):
134
  """Gets transcript from YouTube API or Whisper if unavailable."""
135
- video_id = youtube_url.split("v=")[-1] # Extract the video ID from URL
 
 
 
136
  video_length = get_video_duration(video_id, api_key)
137
 
138
  if video_length is not None:
139
  print(f"Video length: {video_length} minutes.")
140
-
141
- # Fetch transcript using YouTube API
142
- transcript = get_transcript_from_youtube_api(video_id, video_length)
143
-
144
- # If a transcript is found from YouTube, use it
145
- if transcript:
146
- print("Transcript found.")
147
- return transcript
148
- else:
149
- # No transcript found from YouTube API, proceed with Whisper
150
- print("No transcript found on YouTube, using Whisper for transcription.")
151
- return download_and_transcribe_with_whisper(youtube_url) # Use Whisper for short videos
152
  else:
153
  print("Error fetching video duration.")
154
  return None
155
 
156
  def summarize_text_huggingface(text):
157
-
158
  """Summarizes text using a Hugging Face summarization model."""
159
  device = 0 if torch.cuda.is_available() else -1
160
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
@@ -194,7 +137,6 @@ def generate_optimized_content(api_key, summarized_transcript):
194
  """
195
 
196
  try:
197
- # Use the updated OpenAI API format for chat completions
198
  response = openai.ChatCompletion.create(
199
  model="gpt-3.5-turbo",
200
  messages=[
@@ -202,60 +144,39 @@ def generate_optimized_content(api_key, summarized_transcript):
202
  {"role": "user", "content": prompt}
203
  ]
204
  )
205
- # Extract and parse the response
206
- response_content = response['choices'][0]['message']['content']
207
- content = json.loads(response_content)
208
- return content
209
-
210
  except Exception as e:
211
  print(f"Error generating content: {e}")
212
  return None
213
 
214
-
215
- # Add all your functions like `extract_video_id()`, `get_transcript()`, etc.
216
-
217
- # Gradio Function for YouTube SEO
218
  def youtube_seo_pipeline(youtube_url):
219
- print("Starting the SEO pipeline...") # Debugging line
220
-
221
- if not YOUTUBE_API_KEY AND not OPENAI_API_KEY:
 
222
  return "API keys missing! Please check environment variables."
223
 
224
- print("Extracting video ID...")
225
  video_id = extract_video_id(youtube_url)
226
  if not video_id:
227
  return "Invalid YouTube URL."
228
-
229
- print(f"Video ID: {video_id}")
230
 
231
- print("Fetching transcript...")
232
  transcript = get_transcript(youtube_url, YOUTUBE_API_KEY)
233
- print(transcript)
234
  if not transcript:
235
- return "Failed to fetch transcript. Try another video."
236
-
237
- print("Summarizing transcript...")
238
  summarized_text = summarize_text_huggingface(transcript)
239
- print(f"Summarized Text: {summarized_text[:200]}...") # Show only the first 200 chars
240
-
241
- print("Generating optimized content...")
242
  optimized_content = generate_optimized_content(OPENAI_API_KEY, summarized_text)
243
 
244
- if optimized_content:
245
- return json.dumps(optimized_content, indent=4)
246
- else:
247
- return "Failed to generate SEO content."
248
-
249
 
250
- # Define Gradio Interface
251
  iface = gr.Interface(
252
  fn=youtube_seo_pipeline,
253
  inputs="text",
254
  outputs="text",
255
  title="YouTube SEO Optimizer",
256
- description="Enter a YouTube video URL to fetch and optimize SEO content (title, description, tags, and keywords)."
257
  )
258
 
259
- # Launch Gradio App
260
  if __name__ == "__main__":
261
- iface.launch()
 
10
  import openai
11
  import json
12
  import os
 
 
 
13
  from urllib.parse import urlparse, parse_qs
14
+ import torch
 
 
 
 
 
 
 
15
 
16
  def extract_video_id(url):
17
  """Extracts the video ID from a YouTube URL."""
 
51
 
52
  def download_and_transcribe_with_whisper(youtube_url):
53
  try:
 
54
  with tempfile.TemporaryDirectory() as temp_dir:
55
  temp_audio_file = os.path.join(temp_dir, "audio.mp3")
56
 
 
69
  audio.export(wav_file, format="wav")
70
 
71
  # Run Whisper transcription
72
+ model = whisper.load_model("Turbo",weights_only=True)
73
  result = model.transcribe(wav_file)
74
+ return result['text']
 
75
 
76
  except Exception as e:
77
  print(f"Error during transcription: {e}")
78
  return None
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def get_transcript(youtube_url, api_key):
81
  """Gets transcript from YouTube API or Whisper if unavailable."""
82
+ video_id = extract_video_id(youtube_url)
83
+ if not video_id:
84
+ return None
85
+
86
  video_length = get_video_duration(video_id, api_key)
87
 
88
  if video_length is not None:
89
  print(f"Video length: {video_length} minutes.")
90
+ try:
91
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
92
+ return " ".join([segment['text'] for segment in transcript])
93
+ except Exception as e:
94
+ print(f"No transcript found via YouTube API: {e}")
95
+ return download_and_transcribe_with_whisper(youtube_url)
 
 
 
 
 
 
96
  else:
97
  print("Error fetching video duration.")
98
  return None
99
 
100
  def summarize_text_huggingface(text):
 
101
  """Summarizes text using a Hugging Face summarization model."""
102
  device = 0 if torch.cuda.is_available() else -1
103
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
 
137
  """
138
 
139
  try:
 
140
  response = openai.ChatCompletion.create(
141
  model="gpt-3.5-turbo",
142
  messages=[
 
144
  {"role": "user", "content": prompt}
145
  ]
146
  )
147
+ return json.loads(response['choices'][0]['message']['content'])
 
 
 
 
148
  except Exception as e:
149
  print(f"Error generating content: {e}")
150
  return None
151
 
 
 
 
 
152
  def youtube_seo_pipeline(youtube_url):
153
+ YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
154
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
155
+
156
+ if not YOUTUBE_API_KEY or not OPENAI_API_KEY:
157
  return "API keys missing! Please check environment variables."
158
 
 
159
  video_id = extract_video_id(youtube_url)
160
  if not video_id:
161
  return "Invalid YouTube URL."
 
 
162
 
 
163
  transcript = get_transcript(youtube_url, YOUTUBE_API_KEY)
 
164
  if not transcript:
165
+ return "Failed to fetch transcript."
166
+
 
167
  summarized_text = summarize_text_huggingface(transcript)
 
 
 
168
  optimized_content = generate_optimized_content(OPENAI_API_KEY, summarized_text)
169
 
170
+ return json.dumps(optimized_content, indent=4) if optimized_content else "Failed to generate SEO content."
 
 
 
 
171
 
172
+ # Gradio Interface
173
  iface = gr.Interface(
174
  fn=youtube_seo_pipeline,
175
  inputs="text",
176
  outputs="text",
177
  title="YouTube SEO Optimizer",
178
+ description="Enter a YouTube video URL to fetch and optimize SEO content."
179
  )
180
 
 
181
  if __name__ == "__main__":
182
+ iface.launch()