AnalysisWithMSR commited on
Commit
783f341
·
verified ·
1 Parent(s): e9825c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -56
app.py CHANGED
@@ -1,17 +1,17 @@
1
- import googleapiclient.discovery
2
- import re
3
- import yt_dlp
4
  import whisper
5
  from pydub import AudioSegment
6
  import tempfile
7
- from transformers import pipeline
8
- from pytrends.request import TrendReq
9
  from youtube_transcript_api import YouTubeTranscriptApi
10
- import torch
11
  import openai
12
- import json
13
- from urllib.parse import urlparse, parse_qs
14
- import os
 
 
15
 
16
  def extract_video_id(url):
17
  """Extracts the video ID from a YouTube URL."""
@@ -29,10 +29,15 @@ def extract_video_id(url):
29
  print(f"Error parsing URL: {e}")
30
  return None
31
 
32
- def get_video_duration(video_id, api_key):
33
- """Fetches the video duration in minutes."""
 
 
 
 
 
34
  try:
35
- youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
36
  request = youtube.videos().list(part="contentDetails", id=video_id)
37
  response = request.execute()
38
  if response["items"]:
@@ -49,11 +54,12 @@ def get_video_duration(video_id, api_key):
49
  print(f"Error fetching video duration: {e}")
50
  return None
51
 
 
52
  def download_and_transcribe_with_whisper(youtube_url):
 
53
  try:
54
  with tempfile.TemporaryDirectory() as temp_dir:
55
  temp_audio_file = os.path.join(temp_dir, "audio.mp3")
56
-
57
  ydl_opts = {
58
  'format': 'bestaudio/best',
59
  'outtmpl': temp_audio_file,
@@ -80,47 +86,43 @@ def download_and_transcribe_with_whisper(youtube_url):
80
  print(f"Error during transcription: {e}")
81
  return None
82
 
83
- def get_transcript_from_youtube_api(video_id, video_length):
84
- """Fetches transcript using YouTube API if available."""
 
 
 
 
 
85
  try:
86
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
87
-
88
  for transcript in transcript_list:
89
  if not transcript.is_generated:
90
  segments = transcript.fetch()
91
  return " ".join(segment['text'] for segment in segments)
92
-
93
- if video_length > 15:
94
- auto_transcript = transcript_list.find_generated_transcript(['en'])
95
- if auto_transcript:
96
- segments = auto_transcript.fetch()
97
- return " ".join(segment['text'] for segment in segments)
98
-
99
- print("Manual transcript not available, and video is too short for auto-transcript.")
100
  return None
101
 
102
  except Exception as e:
103
  print(f"Error fetching transcript: {e}")
104
  return None
105
 
106
- def get_transcript(youtube_url, api_key):
 
107
  """Gets transcript from YouTube API or Whisper if unavailable."""
108
  video_id = extract_video_id(youtube_url)
109
  if not video_id:
110
  print("Invalid or unsupported YouTube URL.")
111
  return None
112
 
113
- video_length = get_video_duration(video_id, api_key)
114
- if video_length is not None:
115
- print(f"Video length: {video_length:.2f} minutes.")
116
- transcript = get_transcript_from_youtube_api(video_id, video_length)
117
  if transcript:
118
  return transcript
119
- print("Using Whisper for transcription.")
120
- return download_and_transcribe_with_whisper(youtube_url)
121
- else:
122
- print("Error fetching video duration.")
123
- return None
124
 
125
  def summarize_text_huggingface(text):
126
  """Summarizes text using a Hugging Face summarization model."""
@@ -137,8 +139,12 @@ def summarize_text_huggingface(text):
137
  ]
138
  return " ".join(summaries)
139
 
140
- def generate_optimized_content(api_key, summarized_transcript):
141
- openai.api_key = api_key
 
 
 
 
142
 
143
  prompt = f"""
144
  Analyze the following summarized YouTube video transcript and:
@@ -178,29 +184,25 @@ def generate_optimized_content(api_key, summarized_transcript):
178
  return None
179
 
180
 
181
- def main():
182
- youtube_url = input("Enter a YouTube video URL: ").strip()
183
- youtube_api_key = "AIzaSyDzvaQzykj94MWl5fmY3wIBQchqXiCClUc" # Set your YouTube API key as an environment variable
184
- openai_api_key = "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" # Set your OpenAI API key as an environment variable
185
-
186
- if not youtube_api_key or not openai_api_key:
187
- print("Missing API keys. Please set your YOUTUBE_API_KEY and OPENAI_API_KEY environment variables.")
188
- return
189
-
190
- transcript = get_transcript(youtube_url, youtube_api_key)
191
  if not transcript:
192
- print("Could not fetch the transcript. Please try another video.")
193
- return
194
 
195
  summary = summarize_text_huggingface(transcript)
196
- print("\nSummarized Transcript:\n", summary)
 
 
 
197
 
198
- optimized_content = generate_optimized_content(openai_api_key, summary)
199
- if optimized_content:
200
- print("\nOptimized Content:")
201
- print(json.dumps(optimized_content, indent=4))
202
- else:
203
- print("Error generating optimized content.")
 
204
 
205
  if __name__ == "__main__":
206
- main()
 
1
+ import gradio as gr
2
+ from transformers import pipeline
 
3
  import whisper
4
  from pydub import AudioSegment
5
  import tempfile
6
+ import os
7
+ import googleapiclient.discovery
8
  from youtube_transcript_api import YouTubeTranscriptApi
 
9
  import openai
10
+
11
+ # Load API keys from environment variables (recommended)
12
+ YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
13
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
14
+
15
 
16
  def extract_video_id(url):
17
  """Extracts the video ID from a YouTube URL."""
 
29
  print(f"Error parsing URL: {e}")
30
  return None
31
 
32
+
33
+ def get_video_duration(video_id):
34
+ """Fetches the video duration in minutes (if API key provided)."""
35
+ if not YOUTUBE_API_KEY:
36
+ print("Missing YouTube API key. Skipping video duration.")
37
+ return None
38
+
39
  try:
40
+ youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
41
  request = youtube.videos().list(part="contentDetails", id=video_id)
42
  response = request.execute()
43
  if response["items"]:
 
54
  print(f"Error fetching video duration: {e}")
55
  return None
56
 
57
+
58
  def download_and_transcribe_with_whisper(youtube_url):
59
+ """Downloads and transcribes audio using Whisper."""
60
  try:
61
  with tempfile.TemporaryDirectory() as temp_dir:
62
  temp_audio_file = os.path.join(temp_dir, "audio.mp3")
 
63
  ydl_opts = {
64
  'format': 'bestaudio/best',
65
  'outtmpl': temp_audio_file,
 
86
  print(f"Error during transcription: {e}")
87
  return None
88
 
89
+
90
+ def get_transcript_from_youtube_api(video_id):
91
+ """Fetches transcript using YouTube API (if available)."""
92
+ if not YOUTUBE_API_KEY:
93
+ print("Missing YouTube API key. Skipping YouTube transcript.")
94
+ return None
95
+
96
  try:
97
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 
98
  for transcript in transcript_list:
99
  if not transcript.is_generated:
100
  segments = transcript.fetch()
101
  return " ".join(segment['text'] for segment in segments)
102
+ print("Manual transcript not found.")
 
 
 
 
 
 
 
103
  return None
104
 
105
  except Exception as e:
106
  print(f"Error fetching transcript: {e}")
107
  return None
108
 
109
+
110
+ def get_transcript(youtube_url):
111
  """Gets transcript from YouTube API or Whisper if unavailable."""
112
  video_id = extract_video_id(youtube_url)
113
  if not video_id:
114
  print("Invalid or unsupported YouTube URL.")
115
  return None
116
 
117
+ video_length = get_video_duration(video_id)
118
+ if video_length:
119
+ transcript = get_transcript_from_youtube_api(video_id)
 
120
  if transcript:
121
  return transcript
122
+
123
+ print("Using Whisper for transcription.")
124
+ return download_and_transcribe_with_whisper(youtube_url)
125
+
 
126
 
127
  def summarize_text_huggingface(text):
128
  """Summarizes text using a Hugging Face summarization model."""
 
139
  ]
140
  return " ".join(summaries)
141
 
142
+
143
+ def generate_optimized_content(summarized_transcript):
144
+ """Generates optimized content using OpenAI (if API key provided)."""
145
+ if not OPENAI_API_KEY:
146
+ print("Missing OpenAI API key. Skipping optimized content generation.")
147
+ return None
148
 
149
  prompt = f"""
150
  Analyze the following summarized YouTube video transcript and:
 
184
  return None
185
 
186
 
187
+ def seo_tool(youtube_url):
188
+ """This function takes a YouTube URL as input and performs SEO optimization tasks."""
189
+ transcript = get_transcript(youtube_url)
 
 
 
 
 
 
 
190
  if not transcript:
191
+ return "Could not fetch the transcript. Please try another video."
 
192
 
193
  summary = summarize_text_huggingface(transcript)
194
+ optimized_content = generate_optimized_content(summary)
195
+
196
+ return summary, optimized_content
197
+
198
 
199
+ interface = gr.Interface(
200
+ fn=seo_tool,
201
+ inputs="text",
202
+ outputs=["text", "json"],
203
+ title="SEO Tool for YouTube Videos",
204
+ description="Enter a YouTube URL to get a summary and optimized content suggestions."
205
+ )
206
 
207
  if __name__ == "__main__":
208
+ interface.launch()