podcastgen

Running on L40S

App Files Files Community

Rausda6 commited on May 21

Commit

7faf9f3

verified ·

1 Parent(s): caafdf1

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -151

app.py CHANGED Viewed

@@ -1,164 +1,167 @@
 import gradio as gr
-from pydub import AudioSegment
-import json
-import uuid
-import edge_tts
-import asyncio
-import aiofiles
-import os
 import time
-import mimetypes
-from typing import List, Dict
-# NEW – Hugging Face Transformers
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# NEW – external model id
-MODEL_ID = "tabularisai/german-gemma-3-1b-it"
-# Constants
-MAX_FILE_SIZE_MB = 20
-MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024  # Convert MB to bytes
-class PodcastGenerator:
-    def __init__(self):
-        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-            device_map="auto",
-        ).eval()
-    async def generate_script(
-        self,
-        prompt: str,
-        language: str,
-        api_key: str,
-        file_obj=None,
-        progress=None,
-    ) -> Dict:
-        example = """
-{
-  "topic": "AGI",
-  "podcast": [
-    {
-      "speaker": 2,
-      "line": "So, AGI, huh? Seems like everyone's talking about it these days."
-    },
-    {
-      "speaker": 1,
-      "line": "Yeah, it's definitely having a moment, isn't it?"
-    }
-  ]
-}
-"""
-        if language == "Auto Detect":
-            language_instruction = (
-                "- The podcast MUST be in the same language as the user input."
-            )
         else:
-            language_instruction = f"- The podcast MUST be in {language} language"
-        system_prompt = f"""
-You are a professional podcast generator. Your task is to generate a professional podcast script based on the user input.
-{language_instruction}
-- The podcast should have 2 speakers.
-- The podcast should be long.
-- Do not use names for the speakers.
-- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
-- The input text might be disorganized or unformatted, originating from sources like PDFs or text files. Ignore any formatting inconsistencies or irrelevant details; your task is to distill the essential points, identify key definitions, and highlight intriguing facts that would be suitable for discussion in a podcast.
-- The script must be in JSON format.
-Follow this example structure:
-{example}
-"""
-        if prompt and file_obj:
-            user_prompt = (
-                f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
             )
-        elif prompt:
-            user_prompt = (
-                f"Please generate a podcast script based on the following user input:\n{prompt}"
             )
-        else:
-            user_prompt = "Please generate a podcast script based on the uploaded file."
-        # If a file is provided we still read it for completeness (not required for HF generation)
-        if file_obj:
-            _ = await self._read_file_bytes(file_obj)
-        if progress:
-            progress(0.3, "Generating podcast script...")
-        inputs = self.tokenizer(
-            f"{system_prompt}\n\n{user_prompt}", return_tensors="pt"
-        ).to(self.model.device)
-        try:
-            output = self.model.generate(**inputs, max_new_tokens=2048, temperature=1.0)
-            response_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        except Exception as e:
-            raise Exception(f"Failed to generate podcast script: {e}")
-        print(f"Generated podcast script:\n{response_text}")
-        if progress:
-            progress(0.4, "Script generated successfully!")
-        return json.loads(response_text)
-    async def _read_file_bytes(self, file_obj) -> bytes:
-        if hasattr(file_obj, "size"):
-            file_size = file_obj.size
-        else:
-            file_size = os.path.getsize(file_obj.name)
-        if file_size > MAX_FILE_SIZE_BYTES:
-            raise Exception(
-                f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file."
             )
-        if hasattr(file_obj, "read"):
-            return file_obj.read()
-        else:
-            async with aiofiles.open(file_obj.name, "rb") as f:
-                return await f.read()
-    @staticmethod
-    def _get_mime_type(filename: str) -> str:
-        ext = os.path.splitext(filename)[1].lower()
-        if ext == ".pdf":
-            return "application/pdf"
-        elif ext == ".txt":
-            return "text/plain"
-        else:
-            mime_type, _ = mimetypes.guess_type(filename)
-            return mime_type or "application/octet-stream"
-# Re-add UI definition for Gradio
-async def generate_interface(prompt, language, api_key, file):
-    gen = PodcastGenerator()
-    result = await gen.generate_script(prompt, language, api_key, file)
-    return json.dumps(result, indent=2)
-interface = gr.Interface(
-    fn=generate_interface,
-    inputs=[
-        gr.Textbox(label="Prompt"),
-        gr.Radio(["English", "German", "Auto Detect"], label="Language", value="Auto Detect"),
-        gr.Textbox(label="API Key", type="password"),
-        gr.File(label="Upload File (optional)")
-    ],
-    outputs=gr.Textbox(label="Generated Podcast JSON"),
-    title="Podcast Generator using Gemma",
-    description="Generate a lively podcast script from your input text or uploaded file using the tabularisai/german-gemma-3-1b-it model."
-)
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
+import random
 import time
+import os
+from elevenlabs import generate, set_api_key, save
+from pathlib import Path
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+    "HuggingFaceH4/zephyr-7b-alpha",
+    torch_dtype=torch.float16,  # Use float16 for memory efficiency
+    device_map="auto"  # Automatically determine device placement
+)
+api_key = os.getenv("ELEVENLABS_API_KEY")
+set_api_key(api_key)
+podcasts_directory = "podcasts"
+os.makedirs(podcasts_directory, exist_ok=True)
+def progress_callback(progress):
+    if progress:
+        if isinstance(progress, int):
+            return progress
         else:
+            try:
+                return float(progress)
+            except (ValueError, TypeError):
+                return 0
+    return 0
+def generate_podcast_intro(podcast_topic, structure, perspective, tone, existing_podcast_info):
+    with open("prompt_engineered.txt", "r", encoding='utf-8') as file:
+        prompt_template = file.read()
+    prompt = prompt_template.format(
+        podcast_topic=podcast_topic,
+        structure=structure,
+        perspective=perspective,
+        tone=tone,
+        existing_podcast_info=existing_podcast_info
+    )
+    return prompt
+# Function to generate content
+def generate_content(prompt):
+    # Format prompt for the Zephyr model (which follows ChatML format)
+    messages = [{"role": "user", "content": prompt}]
+    # Convert to model inputs
+    encoded_input = tokenizer.apply_chat_template(
+        messages,
+        return_tensors="pt"
+    ).to(model.device)
+    # Generate response
+    with torch.no_grad():
+        output = model.generate(
+            encoded_input,
+            max_new_tokens=1500,  # Adjust based on desired output length
+            do_sample=True,
+            temperature=0.7,  # Adjust for creativity vs determinism
+            top_p=0.95
+        )
+    # Decode and return only the new tokens (response)
+    response = tokenizer.decode(output[0][encoded_input.shape[1]:], skip_special_tokens=True)
+    return response
+def generate_podcast_audio(podcast_script, voice, progress=gr.Progress()):
+    if not api_key:
+        return "Error: ElevenLabs API key not set. Please set the ELEVENLABS_API_KEY environment variable."
+    try:
+        audio = generate(
+            text=podcast_script,
+            voice=voice,
+            model="eleven_turbo_v2"
+        )
+        random_id = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=6))
+        filename = os.path.join(podcasts_directory, f"podcast_{random_id}.mp3")
+        save(audio, filename)
+        return filename
+    except Exception as e:
+        return f"Error generating audio: {str(e)}"
+def create_podcast(podcast_topic, structure, perspective, tone, existing_podcast_info, voice_option, progress=gr.Progress()):
+    progress(0, desc="Generating podcast content...")
+    prompt = generate_podcast_intro(podcast_topic, structure, perspective, tone, existing_podcast_info)
+    progress(20, desc="Processing with AI...")
+    podcast_content = generate_content(prompt)
+    progress(60, desc="Generating audio...")
+    audio_file = generate_podcast_audio(podcast_content, voice_option, progress)
+    progress(100, desc="Complete!")
+    return podcast_content, audio_file
+available_voices = [
+    "Adam", "Antoni", "Arnold", "Bella", "Callum", "Charlie", "Christina", "Clyde", "Daniel", "Dorothy",
+    "Ella", "Elli", "Emily", "Fin", "Freya", "Gigi", "Giovanni", "Glinda", "Grace", "Harry",
+    "James", "Jeremy", "Joseph", "Josh", "Knightley", "Liam", "Matilda", "Matthew", "Michael", "Nicole",
+    "Patrick", "Rachel", "Richard", "Sam", "Sarah", "Serena", "Thomas", "Victor", "Wayne", "Charlotte"
+]
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎙️ AI Podcast Generator")
+    gr.Markdown("Generate a complete podcast with AI, including audio narration.")
+    with gr.Row():
+        with gr.Column():
+            podcast_topic = gr.Textbox(
+                label="Podcast Topic",
+                placeholder="Enter the main topic of your podcast",
+                lines=2
             )
+            structure = gr.Radio(
+                ["Interview Style", "Solo Monologue", "Panel Discussion", "Storytelling", "Educational"],
+                label="Podcast Structure",
+                value="Interview Style"
             )
+            perspective = gr.Radio(
+                ["Balanced and Objective", "Personal Opinion", "Expert Analysis", "Conversational", "Investigative"],
+                label="Perspective",
+                value="Balanced and Objective"
             )
+            tone = gr.Radio(
+                ["Professional", "Casual & Friendly", "Humorous", "Serious & Formal", "Inspirational"],
+                label="Tone",
+                value="Professional"
+            )
+            existing_podcast_info = gr.Textbox(
+                label="Additional Context (Optional)",
+                placeholder="Any additional information, context, or specific points you want to include",
+                lines=3
+            )
+            voice_option = gr.Dropdown(
+                choices=available_voices,
+                label="Voice for Audio",
+                value="Adam"
+            )
+            generate_btn = gr.Button("Generate Podcast", variant="primary")
+        with gr.Column():
+            podcast_output = gr.Textbox(label="Generated Podcast Script", lines=12)
+            audio_output = gr.Audio(label="Podcast Audio")
+    generate_btn.click(
+        create_podcast,
+        inputs=[podcast_topic, structure, perspective, tone, existing_podcast_info, voice_option],
+        outputs=[podcast_output, audio_output]
+    )
 if __name__ == "__main__":
+    demo.launch()