Spaces:

nihalaninihal
/

knowledgeCast

Sleeping

App Files Files Community

nihalaninihal commited on 25 days ago

Commit

7cbea0a

verified ·

1 Parent(s): bdcbc27

Update app.py

Browse files

Files changed (1) hide show

app.py +335 -58

app.py CHANGED Viewed

@@ -1,64 +1,341 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
     ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import base64
+import mimetypes
+import os
+import re
+import struct
+import tempfile
+import asyncio
+from google import genai
+from google.genai import types
+def save_binary_file(file_name, data):
+    """Save binary data to a file."""
+    with open(file_name, "wb") as f:
+        f.write(data)
+    return file_name
+def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
+    """Generates a WAV file header for the given audio data and parameters."""
+    parameters = parse_audio_mime_type(mime_type)
+    bits_per_sample = parameters["bits_per_sample"]
+    sample_rate = parameters["rate"]
+    num_channels = 1
+    data_size = len(audio_data)
+    bytes_per_sample = bits_per_sample // 8
+    block_align = num_channels * bytes_per_sample
+    byte_rate = sample_rate * block_align
+    chunk_size = 36 + data_size
+    header = struct.pack(
+        "<4sI4s4sIHHIIHH4sI",
+        b"RIFF",          # ChunkID
+        chunk_size,       # ChunkSize (total file size - 8 bytes)
+        b"WAVE",          # Format
+        b"fmt ",          # Subchunk1ID
+        16,               # Subchunk1Size (16 for PCM)
+        1,                # AudioFormat (1 for PCM)
+        num_channels,     # NumChannels
+        sample_rate,      # SampleRate
+        byte_rate,        # ByteRate
+        block_align,      # BlockAlign
+        bits_per_sample,  # BitsPerSample
+        b"data",          # Subchunk2ID
+        data_size         # Subchunk2Size (size of audio data)
+    )
+    return header + audio_data
+def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
+    """Parses bits per sample and rate from an audio MIME type string."""
+    bits_per_sample = 16
+    rate = 24000
+    parts = mime_type.split(";")
+    for param in parts:
+        param = param.strip()
+        if param.lower().startswith("rate="):
+            try:
+                rate_str = param.split("=", 1)[1]
+                rate = int(rate_str)
+            except (ValueError, IndexError):
+                pass
+        elif param.startswith("audio/L"):
+            try:
+                bits_per_sample = int(param.split("L", 1)[1])
+            except (ValueError, IndexError):
+                pass
+    return {"bits_per_sample": bits_per_sample, "rate": rate}
+def fetch_web_content(url, progress=gr.Progress()):
+    """Fetch and analyze web content using Gemini with tools."""
+    progress(0.1, desc="Initializing Gemini client...")
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        raise ValueError("GEMINI_API_KEY environment variable is not set")
+    client = genai.Client(api_key=api_key)
+    progress(0.2, desc="Fetching web content...")
+    model = "gemini-2.5-flash-preview-04-17"
+    contents = [
+        types.Content(
+            role="user",
+            parts=[
+                types.Part.from_text(text=f"""Please analyze the content from this URL: {url}
+                Create a comprehensive summary that would be suitable for a podcast discussion between two hosts.
+                Focus on the key points, interesting aspects, and discussion-worthy topics.
+                Format your response as a natural conversation between two podcast hosts discussing the content."""),
+            ],
+        ),
+    ]
+    tools = [
+        types.Tool(url_context=types.UrlContext()),
+        types.Tool(google_search=types.GoogleSearch()),
+    ]
+    generate_content_config = types.GenerateContentConfig(
+        tools=tools,
+        response_mime_type="text/plain",
+    )
+    progress(0.4, desc="Analyzing content with AI...")
+    content_text = ""
+    for chunk in client.models.generate_content_stream(
+        model=model,
+        contents=contents,
+        config=generate_content_config,
     ):
+        content_text += chunk.text
+    progress(0.6, desc="Content analysis complete!")
+    return content_text
+def generate_podcast_from_content(content_text, speaker1_name="Anna Chope", speaker2_name="Adam Chan", progress=gr.Progress()):
+    """Generate audio podcast from text content."""
+    progress(0.7, desc="Generating podcast audio...")
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        raise ValueError("GEMINI_API_KEY environment variable is not set")
+    client = genai.Client(api_key=api_key)
+    model = "gemini-2.5-flash-preview-tts"
+    podcast_prompt = f"""Please read aloud the following content in a natural podcast interview style with two distinct speakers.
+    Make it sound conversational and engaging:
+    {content_text}
+    If the content is not already in dialogue format, please convert it into a natural conversation between two podcast hosts Speaker 1 {speaker1_name} and Speaker 2 {speaker2_name} discussing the topic. They should introduce themselves at the beginning."""
+    contents = [
+        types.Content(
+            role="user",
+            parts=[
+                types.Part.from_text(text=podcast_prompt),
+            ],
+        ),
+    ]
+    generate_content_config = types.GenerateContentConfig(
+        temperature=1,
+        response_modalities=[
+            "audio",
+        ],
+        speech_config=types.SpeechConfig(
+            multi_speaker_voice_config=types.MultiSpeakerVoiceConfig(
+                speaker_voice_configs=[
+                    types.SpeakerVoiceConfig(
+                        speaker="Speaker 1",
+                        voice_config=types.VoiceConfig(
+                            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                                voice_name="Zephyr"
+                            )
+                        ),
+                    ),
+                    types.SpeakerVoiceConfig(
+                        speaker="Speaker 2",
+                        voice_config=types.VoiceConfig(
+                            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                                voice_name="Puck"
+                            )
+                        ),
+                    ),
+                ]
+            ),
         ),
+    )
+    progress(0.8, desc="Converting to audio...")
+    # Create temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    temp_file.close()
+    audio_chunks = []
+    for chunk in client.models.generate_content_stream(
+        model=model,
+        contents=contents,
+        config=generate_content_config,
+    ):
+        if (
+            chunk.candidates is None
+            or chunk.candidates[0].content is None
+            or chunk.candidates[0].content.parts is None
+        ):
+            continue
+        if (chunk.candidates[0].content.parts[0].inline_data and
+            chunk.candidates[0].content.parts[0].inline_data.data):
+            inline_data = chunk.candidates[0].content.parts[0].inline_data
+            data_buffer = inline_data.data
+            # Convert to WAV if needed
+            if inline_data.mime_type != "audio/wav":
+                data_buffer = convert_to_wav(inline_data.data, inline_data.mime_type)
+            audio_chunks.append(data_buffer)
+    # Combine all audio chunks
+    if audio_chunks:
+        # For simplicity, just use the first chunk (you might want to concatenate them)
+        final_audio = audio_chunks[0]
+        save_binary_file(temp_file.name, final_audio)
+        progress(1.0, desc="Podcast generated successfully!")
+        return temp_file.name
+    else:
+        raise ValueError("No audio data generated")
+def generate_web_podcast(url, speaker1_name, speaker2_name, progress=gr.Progress()):
+    """Main function to fetch web content and generate podcast."""
+    try:
+        progress(0.0, desc="Starting podcast generation...")
+        # Validate URL
+        if not url or not url.startswith(('http://', 'https://')):
+            raise ValueError("Please enter a valid URL starting with http:// or https://")
+        # Step 1: Fetch and analyze web content
+        content_text = fetch_web_content(url, progress)
+        # Step 2: Generate podcast from the content
+        audio_file = generate_podcast_from_content(content_text, speaker1_name, speaker2_name, progress)
+        return audio_file, "✅ Podcast generated successfully!", content_text
+    except Exception as e:
+        error_msg = f"❌ Error generating podcast: {str(e)}"
+        return None, error_msg, ""
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="🎙️ Web-to-Podcast Generator", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🎙️ Web-to-Podcast Generator
+        Transform any website into an engaging podcast conversation between two AI hosts!
+        Simply paste a URL and let AI create a natural dialogue discussing the content.
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                url_input = gr.Textbox(
+                    label="Website URL",
+                    placeholder="https://example.com",
+                    info="Enter the URL of the website you want to convert to a podcast"
+                )
+                with gr.Row():
+                    speaker1_input = gr.Textbox(
+                        label="Host 1 Name",
+                        value="Anna Chope",
+                        info="Name of the first podcast host"
+                    )
+                    speaker2_input = gr.Textbox(
+                        label="Host 2 Name",
+                        value="Adam Chan",
+                        info="Name of the second podcast host"
+                    )
+                generate_btn = gr.Button("🎙️ Generate Podcast", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                gr.Markdown("""
+                ### Instructions:
+                1. Enter a website URL
+                2. Customize host names (optional)
+                3. Click "Generate Podcast"
+                4. Wait for the AI to analyze content and create audio
+                5. Download your podcast!
+                ### Examples:
+                - News articles
+                - Blog posts
+                - Product pages
+                - Documentation
+                - Research papers
+                """)
+        with gr.Row():
+            status_output = gr.Textbox(label="Status", interactive=False)
+        with gr.Row():
+            audio_output = gr.Audio(label="Generated Podcast", type="filepath")
+        with gr.Accordion("📝 Generated Script Preview", open=False):
+            script_output = gr.Textbox(
+                label="Podcast Script",
+                lines=10,
+                interactive=False,
+                info="Preview of the conversation script generated from the website content"
+            )
+        # Event handlers
+        generate_btn.click(
+            fn=generate_web_podcast,
+            inputs=[url_input, speaker1_input, speaker2_input],
+            outputs=[audio_output, status_output, script_output],
+            show_progress=True
+        )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["https://github.com/weaviate/weaviate", "Anna", "Adam"],
+                ["https://huggingface.co/blog", "Sarah", "Mike"],
+                ["https://openai.com/blog", "Emma", "John"],
+            ],
+            inputs=[url_input, speaker1_input, speaker2_input],
+        )
+        gr.Markdown("""
+        ---
+        **Note:** This app requires a Gemini API key to function. Make sure the `GEMINI_API_KEY` environment variable is set.
+        The generated podcast will feature two AI voices having a natural conversation about the website content.
+        """)
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()