Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,6 +16,12 @@ import torch
|
|
16 |
from urllib.parse import urlparse, parse_qs
|
17 |
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def extract_video_id(url):
|
21 |
"""Extracts the video ID from a YouTube URL."""
|
@@ -62,8 +68,6 @@ def download_and_transcribe_with_whisper(youtube_url):
|
|
62 |
ydl_opts = {
|
63 |
'format': 'bestaudio/best',
|
64 |
'outtmpl': temp_audio_file,
|
65 |
-
'extractaudio': True,
|
66 |
-
'audioquality': 1,
|
67 |
}
|
68 |
|
69 |
# Download audio using yt-dlp
|
@@ -76,7 +80,7 @@ def download_and_transcribe_with_whisper(youtube_url):
|
|
76 |
audio.export(wav_file, format="wav")
|
77 |
|
78 |
# Run Whisper transcription
|
79 |
-
model = whisper.load_model("turbo")
|
80 |
result = model.transcribe(wav_file)
|
81 |
transcript = result['text']
|
82 |
return transcript
|
@@ -150,8 +154,11 @@ def get_transcript(youtube_url, api_key):
|
|
150 |
return None
|
151 |
|
152 |
def summarize_text_huggingface(text):
|
|
|
153 |
"""Summarizes text using a Hugging Face summarization model."""
|
154 |
-
|
|
|
|
|
155 |
max_input_length = 1024
|
156 |
chunk_overlap = 100
|
157 |
text_chunks = [
|
@@ -203,8 +210,7 @@ def generate_optimized_content(api_key, summarized_transcript):
|
|
203 |
except Exception as e:
|
204 |
print(f"Error generating content: {e}")
|
205 |
return None
|
206 |
-
|
207 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
208 |
|
209 |
# Add all your functions like `extract_video_id()`, `get_transcript()`, etc.
|
210 |
|
|
|
16 |
from urllib.parse import urlparse, parse_qs
|
17 |
|
18 |
|
19 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
20 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
21 |
+
if not YOUTUBE_API_KEY:
|
22 |
+
return "YouTube API key is missing! Please set the `YOUTUBE_API_KEY` environment variable."
|
23 |
+
if not OPENAI_API_KEY:
|
24 |
+
return "OpenAI API key is missing! Please set the `OPENAI_API_KEY` environment variable."
|
25 |
|
26 |
def extract_video_id(url):
|
27 |
"""Extracts the video ID from a YouTube URL."""
|
|
|
68 |
ydl_opts = {
|
69 |
'format': 'bestaudio/best',
|
70 |
'outtmpl': temp_audio_file,
|
|
|
|
|
71 |
}
|
72 |
|
73 |
# Download audio using yt-dlp
|
|
|
80 |
audio.export(wav_file, format="wav")
|
81 |
|
82 |
# Run Whisper transcription
|
83 |
+
model = whisper.load_model("turbo",weights_only=True)
|
84 |
result = model.transcribe(wav_file)
|
85 |
transcript = result['text']
|
86 |
return transcript
|
|
|
154 |
return None
|
155 |
|
156 |
def summarize_text_huggingface(text):
|
157 |
+
|
158 |
"""Summarizes text using a Hugging Face summarization model."""
|
159 |
+
device = 0 if torch.cuda.is_available() else -1
|
160 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
|
161 |
+
|
162 |
max_input_length = 1024
|
163 |
chunk_overlap = 100
|
164 |
text_chunks = [
|
|
|
210 |
except Exception as e:
|
211 |
print(f"Error generating content: {e}")
|
212 |
return None
|
213 |
+
|
|
|
214 |
|
215 |
# Add all your functions like `extract_video_id()`, `get_transcript()`, etc.
|
216 |
|