nihalaninihal commited on
Commit
7cbea0a
Β·
verified Β·
1 Parent(s): bdcbc27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +335 -58
app.py CHANGED
@@ -1,64 +1,341 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ),
59
- ],
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
1
  import gradio as gr
2
+ import base64
3
+ import mimetypes
4
+ import os
5
+ import re
6
+ import struct
7
+ import tempfile
8
+ import asyncio
9
+ from google import genai
10
+ from google.genai import types
11
+
12
+
13
+ def save_binary_file(file_name, data):
14
+ """Save binary data to a file."""
15
+ with open(file_name, "wb") as f:
16
+ f.write(data)
17
+ return file_name
18
+
19
+
20
+ def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
21
+ """Generates a WAV file header for the given audio data and parameters."""
22
+ parameters = parse_audio_mime_type(mime_type)
23
+ bits_per_sample = parameters["bits_per_sample"]
24
+ sample_rate = parameters["rate"]
25
+ num_channels = 1
26
+ data_size = len(audio_data)
27
+ bytes_per_sample = bits_per_sample // 8
28
+ block_align = num_channels * bytes_per_sample
29
+ byte_rate = sample_rate * block_align
30
+ chunk_size = 36 + data_size
31
+
32
+ header = struct.pack(
33
+ "<4sI4s4sIHHIIHH4sI",
34
+ b"RIFF", # ChunkID
35
+ chunk_size, # ChunkSize (total file size - 8 bytes)
36
+ b"WAVE", # Format
37
+ b"fmt ", # Subchunk1ID
38
+ 16, # Subchunk1Size (16 for PCM)
39
+ 1, # AudioFormat (1 for PCM)
40
+ num_channels, # NumChannels
41
+ sample_rate, # SampleRate
42
+ byte_rate, # ByteRate
43
+ block_align, # BlockAlign
44
+ bits_per_sample, # BitsPerSample
45
+ b"data", # Subchunk2ID
46
+ data_size # Subchunk2Size (size of audio data)
47
+ )
48
+ return header + audio_data
49
+
50
+
51
+ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
52
+ """Parses bits per sample and rate from an audio MIME type string."""
53
+ bits_per_sample = 16
54
+ rate = 24000
55
+
56
+ parts = mime_type.split(";")
57
+ for param in parts:
58
+ param = param.strip()
59
+ if param.lower().startswith("rate="):
60
+ try:
61
+ rate_str = param.split("=", 1)[1]
62
+ rate = int(rate_str)
63
+ except (ValueError, IndexError):
64
+ pass
65
+ elif param.startswith("audio/L"):
66
+ try:
67
+ bits_per_sample = int(param.split("L", 1)[1])
68
+ except (ValueError, IndexError):
69
+ pass
70
+
71
+ return {"bits_per_sample": bits_per_sample, "rate": rate}
72
+
73
+
74
+ def fetch_web_content(url, progress=gr.Progress()):
75
+ """Fetch and analyze web content using Gemini with tools."""
76
+ progress(0.1, desc="Initializing Gemini client...")
77
+
78
+ api_key = os.environ.get("GEMINI_API_KEY")
79
+ if not api_key:
80
+ raise ValueError("GEMINI_API_KEY environment variable is not set")
81
+
82
+ client = genai.Client(api_key=api_key)
83
+
84
+ progress(0.2, desc="Fetching web content...")
85
+
86
+ model = "gemini-2.5-flash-preview-04-17"
87
+ contents = [
88
+ types.Content(
89
+ role="user",
90
+ parts=[
91
+ types.Part.from_text(text=f"""Please analyze the content from this URL: {url}
92
+
93
+ Create a comprehensive summary that would be suitable for a podcast discussion between two hosts.
94
+ Focus on the key points, interesting aspects, and discussion-worthy topics.
95
+
96
+ Format your response as a natural conversation between two podcast hosts discussing the content."""),
97
+ ],
98
+ ),
99
+ ]
100
+
101
+ tools = [
102
+ types.Tool(url_context=types.UrlContext()),
103
+ types.Tool(google_search=types.GoogleSearch()),
104
+ ]
105
+
106
+ generate_content_config = types.GenerateContentConfig(
107
+ tools=tools,
108
+ response_mime_type="text/plain",
109
+ )
110
+
111
+ progress(0.4, desc="Analyzing content with AI...")
112
+
113
+ content_text = ""
114
+ for chunk in client.models.generate_content_stream(
115
+ model=model,
116
+ contents=contents,
117
+ config=generate_content_config,
118
  ):
119
+ content_text += chunk.text
120
+
121
+ progress(0.6, desc="Content analysis complete!")
122
+ return content_text
123
+
124
+
125
+ def generate_podcast_from_content(content_text, speaker1_name="Anna Chope", speaker2_name="Adam Chan", progress=gr.Progress()):
126
+ """Generate audio podcast from text content."""
127
+ progress(0.7, desc="Generating podcast audio...")
128
+
129
+ api_key = os.environ.get("GEMINI_API_KEY")
130
+ if not api_key:
131
+ raise ValueError("GEMINI_API_KEY environment variable is not set")
132
+
133
+ client = genai.Client(api_key=api_key)
134
+
135
+ model = "gemini-2.5-flash-preview-tts"
136
+
137
+ podcast_prompt = f"""Please read aloud the following content in a natural podcast interview style with two distinct speakers.
138
+ Make it sound conversational and engaging:
139
+
140
+ {content_text}
141
+
142
+ If the content is not already in dialogue format, please convert it into a natural conversation between two podcast hosts Speaker 1 {speaker1_name} and Speaker 2 {speaker2_name} discussing the topic. They should introduce themselves at the beginning."""
143
+
144
+ contents = [
145
+ types.Content(
146
+ role="user",
147
+ parts=[
148
+ types.Part.from_text(text=podcast_prompt),
149
+ ],
150
+ ),
151
+ ]
152
+
153
+ generate_content_config = types.GenerateContentConfig(
154
+ temperature=1,
155
+ response_modalities=[
156
+ "audio",
157
+ ],
158
+ speech_config=types.SpeechConfig(
159
+ multi_speaker_voice_config=types.MultiSpeakerVoiceConfig(
160
+ speaker_voice_configs=[
161
+ types.SpeakerVoiceConfig(
162
+ speaker="Speaker 1",
163
+ voice_config=types.VoiceConfig(
164
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
165
+ voice_name="Zephyr"
166
+ )
167
+ ),
168
+ ),
169
+ types.SpeakerVoiceConfig(
170
+ speaker="Speaker 2",
171
+ voice_config=types.VoiceConfig(
172
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
173
+ voice_name="Puck"
174
+ )
175
+ ),
176
+ ),
177
+ ]
178
+ ),
179
  ),
180
+ )
181
+
182
+ progress(0.8, desc="Converting to audio...")
183
+
184
+ # Create temporary file
185
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
186
+ temp_file.close()
187
+
188
+ audio_chunks = []
189
+
190
+ for chunk in client.models.generate_content_stream(
191
+ model=model,
192
+ contents=contents,
193
+ config=generate_content_config,
194
+ ):
195
+ if (
196
+ chunk.candidates is None
197
+ or chunk.candidates[0].content is None
198
+ or chunk.candidates[0].content.parts is None
199
+ ):
200
+ continue
201
+
202
+ if (chunk.candidates[0].content.parts[0].inline_data and
203
+ chunk.candidates[0].content.parts[0].inline_data.data):
204
+
205
+ inline_data = chunk.candidates[0].content.parts[0].inline_data
206
+ data_buffer = inline_data.data
207
+
208
+ # Convert to WAV if needed
209
+ if inline_data.mime_type != "audio/wav":
210
+ data_buffer = convert_to_wav(inline_data.data, inline_data.mime_type)
211
+
212
+ audio_chunks.append(data_buffer)
213
+
214
+ # Combine all audio chunks
215
+ if audio_chunks:
216
+ # For simplicity, just use the first chunk (you might want to concatenate them)
217
+ final_audio = audio_chunks[0]
218
+ save_binary_file(temp_file.name, final_audio)
219
+ progress(1.0, desc="Podcast generated successfully!")
220
+ return temp_file.name
221
+ else:
222
+ raise ValueError("No audio data generated")
223
+
224
+
225
+ def generate_web_podcast(url, speaker1_name, speaker2_name, progress=gr.Progress()):
226
+ """Main function to fetch web content and generate podcast."""
227
+ try:
228
+ progress(0.0, desc="Starting podcast generation...")
229
+
230
+ # Validate URL
231
+ if not url or not url.startswith(('http://', 'https://')):
232
+ raise ValueError("Please enter a valid URL starting with http:// or https://")
233
+
234
+ # Step 1: Fetch and analyze web content
235
+ content_text = fetch_web_content(url, progress)
236
+
237
+ # Step 2: Generate podcast from the content
238
+ audio_file = generate_podcast_from_content(content_text, speaker1_name, speaker2_name, progress)
239
+
240
+ return audio_file, "βœ… Podcast generated successfully!", content_text
241
+
242
+ except Exception as e:
243
+ error_msg = f"❌ Error generating podcast: {str(e)}"
244
+ return None, error_msg, ""
245
+
246
+
247
+ # Create Gradio interface
248
+ def create_interface():
249
+ with gr.Blocks(title="πŸŽ™οΈ Web-to-Podcast Generator", theme=gr.themes.Soft()) as demo:
250
+ gr.Markdown("""
251
+ # πŸŽ™οΈ Web-to-Podcast Generator
252
+
253
+ Transform any website into an engaging podcast conversation between two AI hosts!
254
+
255
+ Simply paste a URL and let AI create a natural dialogue discussing the content.
256
+ """)
257
+
258
+ with gr.Row():
259
+ with gr.Column(scale=2):
260
+ url_input = gr.Textbox(
261
+ label="Website URL",
262
+ placeholder="https://example.com",
263
+ info="Enter the URL of the website you want to convert to a podcast"
264
+ )
265
+
266
+ with gr.Row():
267
+ speaker1_input = gr.Textbox(
268
+ label="Host 1 Name",
269
+ value="Anna Chope",
270
+ info="Name of the first podcast host"
271
+ )
272
+ speaker2_input = gr.Textbox(
273
+ label="Host 2 Name",
274
+ value="Adam Chan",
275
+ info="Name of the second podcast host"
276
+ )
277
+
278
+ generate_btn = gr.Button("πŸŽ™οΈ Generate Podcast", variant="primary", size="lg")
279
+
280
+ with gr.Column(scale=1):
281
+ gr.Markdown("""
282
+ ### Instructions:
283
+ 1. Enter a website URL
284
+ 2. Customize host names (optional)
285
+ 3. Click "Generate Podcast"
286
+ 4. Wait for the AI to analyze content and create audio
287
+ 5. Download your podcast!
288
+
289
+ ### Examples:
290
+ - News articles
291
+ - Blog posts
292
+ - Product pages
293
+ - Documentation
294
+ - Research papers
295
+ """)
296
+
297
+ with gr.Row():
298
+ status_output = gr.Textbox(label="Status", interactive=False)
299
+
300
+ with gr.Row():
301
+ audio_output = gr.Audio(label="Generated Podcast", type="filepath")
302
+
303
+ with gr.Accordion("πŸ“ Generated Script Preview", open=False):
304
+ script_output = gr.Textbox(
305
+ label="Podcast Script",
306
+ lines=10,
307
+ interactive=False,
308
+ info="Preview of the conversation script generated from the website content"
309
+ )
310
+
311
+ # Event handlers
312
+ generate_btn.click(
313
+ fn=generate_web_podcast,
314
+ inputs=[url_input, speaker1_input, speaker2_input],
315
+ outputs=[audio_output, status_output, script_output],
316
+ show_progress=True
317
+ )
318
+
319
+ # Examples
320
+ gr.Examples(
321
+ examples=[
322
+ ["https://github.com/weaviate/weaviate", "Anna", "Adam"],
323
+ ["https://huggingface.co/blog", "Sarah", "Mike"],
324
+ ["https://openai.com/blog", "Emma", "John"],
325
+ ],
326
+ inputs=[url_input, speaker1_input, speaker2_input],
327
+ )
328
+
329
+ gr.Markdown("""
330
+ ---
331
+ **Note:** This app requires a Gemini API key to function. Make sure the `GEMINI_API_KEY` environment variable is set.
332
+
333
+ The generated podcast will feature two AI voices having a natural conversation about the website content.
334
+ """)
335
+
336
+ return demo
337
 
338
 
339
  if __name__ == "__main__":
340
+ demo = create_interface()
341
+ demo.launch()