Spaces:

AnalysisWithMSR
/

SEO

Sleeping

App Files Files Community

AnalysisWithMSR commited on Dec 15, 2024

Commit

2cb5449

verified ·

1 Parent(s): 93d65a4

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -179

app.py CHANGED Viewed

@@ -1,182 +1,10 @@
-import tempfile
-import gradio as gr
-import googleapiclient.discovery
-import re
-import yt_dlp
-import whisper
-from pydub import AudioSegment
-from transformers import pipeline
-from youtube_transcript_api import YouTubeTranscriptApi
-import openai
-import json
 import os
-from urllib.parse import urlparse, parse_qs
-import torch
-def extract_video_id(url):
-    """Extracts the video ID from a YouTube URL."""
-    try:
-        parsed_url = urlparse(url)
-        if "youtube.com" in parsed_url.netloc:
-            query_params = parse_qs(parsed_url.query)
-            return query_params.get('v', [None])[0]
-        elif "youtu.be" in parsed_url.netloc:
-            return parsed_url.path.strip("/")
-        else:
-            print("Invalid YouTube URL.")
-            return None
-    except Exception as e:
-        print(f"Error parsing URL: {e}")
-        return None
-def get_video_duration(video_id, api_key):
-    """Fetches the video duration in minutes."""
-    try:
-        youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
-        request = youtube.videos().list(part="contentDetails", id=video_id)
-        response = request.execute()
-        if response["items"]:
-            duration = response["items"][0]["contentDetails"]["duration"]
-            match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', duration)
-            hours = int(match.group(1)) if match.group(1) else 0
-            minutes = int(match.group(2)) if match.group(2) else 0
-            seconds = int(match.group(3)) if match.group(3) else 0
-            return hours * 60 + minutes + seconds / 60
-        else:
-            print("No video details found.")
-            return None
-    except Exception as e:
-        print(f"Error fetching video duration: {e}")
-        return None
-def download_and_transcribe_with_whisper(youtube_url):
-    try:
-        with tempfile.TemporaryDirectory() as temp_dir:
-            temp_audio_file = os.path.join(temp_dir, "audio.mp3")
-            ydl_opts = {
-                'format': 'bestaudio/best',
-                'outtmpl': temp_audio_file,
-            }
-            # Download audio using yt-dlp
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([youtube_url])
-            # Convert to wav for Whisper
-            audio = AudioSegment.from_file(temp_audio_file)
-            wav_file = os.path.join(temp_dir, "audio.wav")
-            audio.export(wav_file, format="wav")
-            # Run Whisper transcription
-            model = whisper.load_model("Turbo",weights_only=True)
-            result = model.transcribe(wav_file)
-            return result['text']
-    except Exception as e:
-        print(f"Error during transcription: {e}")
-        return None
-def get_transcript(youtube_url, api_key):
-    """Gets transcript from YouTube API or Whisper if unavailable."""
-    video_id = extract_video_id(youtube_url)
-    if not video_id:
-        return None
-    video_length = get_video_duration(video_id, api_key)
-    if video_length is not None:
-        print(f"Video length: {video_length} minutes.")
-        try:
-            transcript = YouTubeTranscriptApi.get_transcript(video_id)
-            return " ".join([segment['text'] for segment in transcript])
-        except Exception as e:
-            print(f"No transcript found via YouTube API: {e}")
-            return download_and_transcribe_with_whisper(youtube_url)
-    else:
-        print("Error fetching video duration.")
-        return None
-def summarize_text_huggingface(text):
-    """Summarizes text using a Hugging Face summarization model."""
-    device = 0 if torch.cuda.is_available() else -1
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
-    max_input_length = 1024
-    chunk_overlap = 100
-    text_chunks = [
-        text[i:i + max_input_length]
-        for i in range(0, len(text), max_input_length - chunk_overlap)
-    ]
-    summaries = [
-        summarizer(chunk, max_length=100, min_length=50, do_sample=False)[0]['summary_text']
-        for chunk in text_chunks
-    ]
-    return " ".join(summaries)
-def generate_optimized_content(api_key, summarized_transcript):
-    openai.api_key = api_key
-    prompt = f"""
-    Analyze the following summarized YouTube video transcript and:
-    1. Extract the top 10 keywords.
-    2. Generate an optimized title (less than 65 characters).
-    3. Create an engaging description.
-    4. Generate related tags for the video.
-    Summarized Transcript:
-    {summarized_transcript}
-    Provide the results in the following JSON format:
-    {{
-        "keywords": ["keyword1", "keyword2", ..., "keyword10"],
-        "title": "Generated Title",
-        "description": "Generated Description",
-        "tags": ["tag1", "tag2", ..., "tag10"]
-    }}
-    """
-    try:
-        response = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are an SEO expert."},
-                {"role": "user", "content": prompt}
-            ]
-        )
-        return json.loads(response['choices'][0]['message']['content'])
-    except Exception as e:
-        print(f"Error generating content: {e}")
-        return None
-def youtube_seo_pipeline(youtube_url):
-    YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
-    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-    if not YOUTUBE_API_KEY or not OPENAI_API_KEY:
-        return "API keys missing! Please check environment variables."
-    video_id = extract_video_id(youtube_url)
-    if not video_id:
-        return "Invalid YouTube URL."
-    transcript = get_transcript(youtube_url, YOUTUBE_API_KEY)
-    if not transcript:
-        return "Failed to fetch transcript."
-    summarized_text = summarize_text_huggingface(transcript)
-    optimized_content = generate_optimized_content(OPENAI_API_KEY, summarized_text)
-    return json.dumps(optimized_content, indent=4) if optimized_content else "Failed to generate SEO content."
-# Gradio Interface
-iface = gr.Interface(
-    fn=youtube_seo_pipeline,
-    inputs="text",
-    outputs="text",
-    title="YouTube SEO Optimizer",
-    description="Enter a YouTube video URL to fetch and optimize SEO content."
-)
-if __name__ == "__main__":
-    iface.launch()

 import os
+# Fetch the API key from the environment
+youtube_api_key = os.getenv("YOUTUBE_API_KEY")
+# Check if the key is set and print accordingly
+if youtube_api_key:
+    print("YOUTUBE_API_KEY is set.")
+else:
+    print("YOUTUBE_API_KEY is not set.")