AnalysisWithMSR commited on
Commit
2cb5449
·
verified ·
1 Parent(s): 93d65a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -179
app.py CHANGED
@@ -1,182 +1,10 @@
1
- import tempfile
2
- import gradio as gr
3
- import googleapiclient.discovery
4
- import re
5
- import yt_dlp
6
- import whisper
7
- from pydub import AudioSegment
8
- from transformers import pipeline
9
- from youtube_transcript_api import YouTubeTranscriptApi
10
- import openai
11
- import json
12
  import os
13
- from urllib.parse import urlparse, parse_qs
14
- import torch
15
 
16
- def extract_video_id(url):
17
- """Extracts the video ID from a YouTube URL."""
18
- try:
19
- parsed_url = urlparse(url)
20
- if "youtube.com" in parsed_url.netloc:
21
- query_params = parse_qs(parsed_url.query)
22
- return query_params.get('v', [None])[0]
23
- elif "youtu.be" in parsed_url.netloc:
24
- return parsed_url.path.strip("/")
25
- else:
26
- print("Invalid YouTube URL.")
27
- return None
28
- except Exception as e:
29
- print(f"Error parsing URL: {e}")
30
- return None
31
 
32
- def get_video_duration(video_id, api_key):
33
- """Fetches the video duration in minutes."""
34
- try:
35
- youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
36
- request = youtube.videos().list(part="contentDetails", id=video_id)
37
- response = request.execute()
38
- if response["items"]:
39
- duration = response["items"][0]["contentDetails"]["duration"]
40
- match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', duration)
41
- hours = int(match.group(1)) if match.group(1) else 0
42
- minutes = int(match.group(2)) if match.group(2) else 0
43
- seconds = int(match.group(3)) if match.group(3) else 0
44
- return hours * 60 + minutes + seconds / 60
45
- else:
46
- print("No video details found.")
47
- return None
48
- except Exception as e:
49
- print(f"Error fetching video duration: {e}")
50
- return None
51
-
52
- def download_and_transcribe_with_whisper(youtube_url):
53
- try:
54
- with tempfile.TemporaryDirectory() as temp_dir:
55
- temp_audio_file = os.path.join(temp_dir, "audio.mp3")
56
-
57
- ydl_opts = {
58
- 'format': 'bestaudio/best',
59
- 'outtmpl': temp_audio_file,
60
- }
61
-
62
- # Download audio using yt-dlp
63
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
64
- ydl.download([youtube_url])
65
-
66
- # Convert to wav for Whisper
67
- audio = AudioSegment.from_file(temp_audio_file)
68
- wav_file = os.path.join(temp_dir, "audio.wav")
69
- audio.export(wav_file, format="wav")
70
-
71
- # Run Whisper transcription
72
- model = whisper.load_model("Turbo",weights_only=True)
73
- result = model.transcribe(wav_file)
74
- return result['text']
75
-
76
- except Exception as e:
77
- print(f"Error during transcription: {e}")
78
- return None
79
-
80
- def get_transcript(youtube_url, api_key):
81
- """Gets transcript from YouTube API or Whisper if unavailable."""
82
- video_id = extract_video_id(youtube_url)
83
- if not video_id:
84
- return None
85
-
86
- video_length = get_video_duration(video_id, api_key)
87
-
88
- if video_length is not None:
89
- print(f"Video length: {video_length} minutes.")
90
- try:
91
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
92
- return " ".join([segment['text'] for segment in transcript])
93
- except Exception as e:
94
- print(f"No transcript found via YouTube API: {e}")
95
- return download_and_transcribe_with_whisper(youtube_url)
96
- else:
97
- print("Error fetching video duration.")
98
- return None
99
-
100
- def summarize_text_huggingface(text):
101
- """Summarizes text using a Hugging Face summarization model."""
102
- device = 0 if torch.cuda.is_available() else -1
103
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
104
-
105
- max_input_length = 1024
106
- chunk_overlap = 100
107
- text_chunks = [
108
- text[i:i + max_input_length]
109
- for i in range(0, len(text), max_input_length - chunk_overlap)
110
- ]
111
- summaries = [
112
- summarizer(chunk, max_length=100, min_length=50, do_sample=False)[0]['summary_text']
113
- for chunk in text_chunks
114
- ]
115
- return " ".join(summaries)
116
-
117
- def generate_optimized_content(api_key, summarized_transcript):
118
- openai.api_key = api_key
119
-
120
- prompt = f"""
121
- Analyze the following summarized YouTube video transcript and:
122
- 1. Extract the top 10 keywords.
123
- 2. Generate an optimized title (less than 65 characters).
124
- 3. Create an engaging description.
125
- 4. Generate related tags for the video.
126
-
127
- Summarized Transcript:
128
- {summarized_transcript}
129
-
130
- Provide the results in the following JSON format:
131
- {{
132
- "keywords": ["keyword1", "keyword2", ..., "keyword10"],
133
- "title": "Generated Title",
134
- "description": "Generated Description",
135
- "tags": ["tag1", "tag2", ..., "tag10"]
136
- }}
137
- """
138
-
139
- try:
140
- response = openai.ChatCompletion.create(
141
- model="gpt-3.5-turbo",
142
- messages=[
143
- {"role": "system", "content": "You are an SEO expert."},
144
- {"role": "user", "content": prompt}
145
- ]
146
- )
147
- return json.loads(response['choices'][0]['message']['content'])
148
- except Exception as e:
149
- print(f"Error generating content: {e}")
150
- return None
151
-
152
- def youtube_seo_pipeline(youtube_url):
153
- YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
154
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
155
-
156
- if not YOUTUBE_API_KEY or not OPENAI_API_KEY:
157
- return "API keys missing! Please check environment variables."
158
-
159
- video_id = extract_video_id(youtube_url)
160
- if not video_id:
161
- return "Invalid YouTube URL."
162
-
163
- transcript = get_transcript(youtube_url, YOUTUBE_API_KEY)
164
- if not transcript:
165
- return "Failed to fetch transcript."
166
-
167
- summarized_text = summarize_text_huggingface(transcript)
168
- optimized_content = generate_optimized_content(OPENAI_API_KEY, summarized_text)
169
-
170
- return json.dumps(optimized_content, indent=4) if optimized_content else "Failed to generate SEO content."
171
-
172
- # Gradio Interface
173
- iface = gr.Interface(
174
- fn=youtube_seo_pipeline,
175
- inputs="text",
176
- outputs="text",
177
- title="YouTube SEO Optimizer",
178
- description="Enter a YouTube video URL to fetch and optimize SEO content."
179
- )
180
-
181
- if __name__ == "__main__":
182
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
 
3
+ # Fetch the API key from the environment
4
+ youtube_api_key = os.getenv("YOUTUBE_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Check if the key is set and print accordingly
7
+ if youtube_api_key:
8
+ print("YOUTUBE_API_KEY is set.")
9
+ else:
10
+ print("YOUTUBE_API_KEY is not set.")