Hamed744 commited on
Commit
3f42665
Β·
verified Β·
1 Parent(s): 6ca7d8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +287 -203
app.py CHANGED
@@ -9,14 +9,19 @@ import zipfile
9
  from google import genai
10
  from google.genai import types
11
 
 
 
 
 
12
  try:
13
  from pydub import AudioSegment
14
  PYDUB_AVAILABLE = True
15
  except ImportError:
16
  PYDUB_AVAILABLE = False
17
- print("⚠️ pydub is not available. Audio files will be saved separately.")
 
18
 
19
- # --- Constants (previously from @param) ---
20
  SPEAKER_VOICES = [
21
  "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat",
22
  "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux", "Pulcherrima",
@@ -26,51 +31,61 @@ SPEAKER_VOICES = [
26
  ]
27
  MODELS = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
28
 
29
- # --- Helper functions (mostly unchanged, minor adaptations if any) ---
30
  def save_binary_file(file_name, data):
31
- # Ensure we are writing to a path Gradio can access (usually current dir is fine for temp files)
32
  abs_file_name = os.path.abspath(file_name)
33
- with open(abs_file_name, "wb") as f:
34
- f.write(data)
35
- print(f"βœ… File saved at: {abs_file_name}")
36
- return abs_file_name
 
 
 
 
37
 
38
  def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
39
  parameters = parse_audio_mime_type(mime_type)
40
  bits_per_sample = parameters["bits_per_sample"]
41
  sample_rate = parameters["rate"]
42
- num_channels = 1
43
  data_size = len(audio_data)
44
  bytes_per_sample = bits_per_sample // 8
45
  block_align = num_channels * bytes_per_sample
46
  byte_rate = sample_rate * block_align
47
- chunk_size = 36 + data_size
 
48
  header = struct.pack(
49
  "<4sI4s4sIHHIIHH4sI",
50
- b"RIFF", chunk_size, b"WAVE", b"fmt ", 16, 1, num_channels,
51
- sample_rate, byte_rate, block_align, bits_per_sample, b"data", data_size
 
 
52
  )
53
  return header + audio_data
54
 
55
  def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
56
- bits_per_sample = 16
57
- rate = 24000
58
- parts = mime_type.split(";")
59
- for param in parts:
60
- param = param.strip()
61
- if param.lower().startswith("rate="):
62
- try:
63
- rate_str = param.split("=", 1)[1]
64
- rate = int(rate_str)
65
- except (ValueError, IndexError): pass
66
- elif param.startswith("audio/L"):
67
- try:
68
- bits_per_sample = int(param.split("L", 1)[1])
69
- except (ValueError, IndexError): pass
 
 
 
 
 
70
  return {"bits_per_sample": bits_per_sample, "rate": rate}
71
 
72
  def load_text_from_gr_file(file_obj):
73
- """Load text from a Gradio file object."""
74
  if file_obj is None:
75
  return "", "No file provided for text input."
76
  try:
@@ -78,7 +93,6 @@ def load_text_from_gr_file(file_obj):
78
  content = f.read().strip()
79
  if not content:
80
  return "", "Text file is empty."
81
- print(f"πŸ“– Text loaded from file: {len(content)} characters")
82
  return content, f"Successfully loaded {len(content)} chars from {os.path.basename(file_obj.name)}."
83
  except Exception as e:
84
  return "", f"Error reading text file: {e}"
@@ -88,25 +102,39 @@ def smart_text_split(text, max_size=3800):
88
  return [text]
89
  chunks = []
90
  current_chunk = ""
91
- sentences = re.split(r'(?<=[.!?])\s+', text)
92
  for sentence in sentences:
93
- if len(current_chunk) + len(sentence) + 1 > max_size:
94
- if current_chunk:
 
 
95
  chunks.append(current_chunk.strip())
96
- # Simplified logic for very long sentences (can be improved)
 
 
97
  if len(sentence) > max_size:
98
- # Split long sentence further
99
- start = 0
100
- while start < len(sentence):
101
- end = start + max_size
102
- chunks.append(sentence[start:end].strip())
103
- start = end
104
- current_chunk = ""
105
- else:
 
 
 
 
 
 
 
 
 
106
  current_chunk = sentence
107
- else:
108
  current_chunk += (" " if current_chunk else "") + sentence
109
- if current_chunk:
 
110
  chunks.append(current_chunk.strip())
111
  return chunks
112
 
@@ -114,24 +142,32 @@ def smart_text_split(text, max_size=3800):
114
  def merge_audio_files_func(file_paths, output_path):
115
  if not PYDUB_AVAILABLE:
116
  return False, "pydub is not available. Cannot merge files.", None
 
 
117
  try:
118
- print(f"πŸ”— Merging {len(file_paths)} audio files...")
119
  combined = AudioSegment.empty()
120
  for i, file_path in enumerate(file_paths):
121
  if os.path.exists(file_path):
122
- print(f"πŸ“Ž Adding file {i+1}: {file_path}")
123
- audio = AudioSegment.from_file(file_path)
124
- combined += audio
125
- if i < len(file_paths) - 1: # Add small silence between segments
126
- combined += AudioSegment.silent(duration=200)
 
 
 
 
 
127
  else:
128
- print(f"⚠️ File not found: {file_path}")
 
 
129
 
130
  abs_output_path = os.path.abspath(output_path)
131
  combined.export(abs_output_path, format="wav")
132
- print(f"βœ… Merged file saved: {abs_output_path}")
133
  return True, f"Merged file saved: {os.path.basename(abs_output_path)}", abs_output_path
134
  except Exception as e:
 
135
  return False, f"Error merging files: {e}", None
136
 
137
  def create_zip_file(file_paths, zip_name):
@@ -141,30 +177,37 @@ def create_zip_file(file_paths, zip_name):
141
  for file_path in file_paths:
142
  if os.path.exists(file_path):
143
  zipf.write(file_path, os.path.basename(file_path))
144
- print(f"πŸ“¦ ZIP file created: {abs_zip_name}")
145
  return True, f"ZIP file created: {os.path.basename(abs_zip_name)}", abs_zip_name
146
  except Exception as e:
147
  return False, f"Error creating ZIP file: {e}", None
148
 
149
- # --- Main generation function (modified for Gradio) ---
150
  def generate_audio_for_gradio(
151
- api_key_input,
152
- use_file_input_checkbox, text_file_obj, # text_file_obj is from gr.File
153
  speech_prompt_input, text_to_speak_input,
154
  max_chunk_slider, sleep_slider, temperature_slider,
155
  model_dropdown, speaker_dropdown, output_filename_base_input,
156
- merge_checkbox, delete_partials_checkbox
 
 
157
  ):
158
  status_messages = []
159
- print("πŸš€ Starting Text-to-Speech process...")
160
  status_messages.append("πŸš€ Starting Text-to-Speech process...")
161
-
162
- # 1. API Key Validation
163
- if not api_key_input:
164
- status_messages.append("❌ Error: Gemini API Key is required.")
 
 
 
 
 
 
165
  return None, None, "\n".join(status_messages)
166
- os.environ["GEMINI_API_KEY"] = api_key_input
167
- status_messages.append("πŸ”‘ API Key set.")
 
168
 
169
  # 2. Determine Text Input
170
  actual_text_input = ""
@@ -187,6 +230,7 @@ def generate_audio_for_gradio(
187
  # 3. Initialize GenAI Client
188
  try:
189
  status_messages.append("πŸ› οΈ Initializing Gemini client...")
 
190
  client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
191
  status_messages.append("βœ… Gemini client initialized.")
192
  except Exception as e:
@@ -196,22 +240,25 @@ def generate_audio_for_gradio(
196
  # 4. Split text
197
  text_chunks = smart_text_split(actual_text_input, int(max_chunk_slider))
198
  status_messages.append(f"πŸ“Š Text split into {len(text_chunks)} chunk(s).")
199
- for i, chunk in enumerate(text_chunks):
200
- status_messages.append(f" πŸ“ Chunk {i+1}: {len(chunk)} chars")
201
 
202
  # 5. Generate audio for each chunk
203
  generated_audio_files = []
204
- # Create a unique temp directory for this run to avoid conflicts
205
  run_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
206
  temp_output_dir = f"temp_audio_{run_id}"
207
  os.makedirs(temp_output_dir, exist_ok=True)
208
-
209
- output_base_name_safe = re.sub(r'\W+', '_', output_filename_base_input) # Sanitize filename
210
 
211
- for i, chunk in enumerate(text_chunks):
212
- status_messages.append(f"\nπŸ”Š Generating audio for chunk {i+1}/{len(text_chunks)}...")
213
- final_text = f'"{speech_prompt_input}"\n{chunk}' if speech_prompt_input.strip() else chunk
214
- contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
 
 
 
 
 
215
  generate_content_config = types.GenerateContentConfig(
216
  temperature=float(temperature_slider),
217
  response_modalities=["audio"],
@@ -223,244 +270,281 @@ def generate_audio_for_gradio(
223
  )
224
  try:
225
  chunk_filename_base = f"{output_base_name_safe}_part_{i+1:03d}"
226
- # Save chunks in the temporary directory
227
  chunk_filepath_prefix = os.path.join(temp_output_dir, chunk_filename_base)
228
-
229
- for stream_chunk_data in client.models.generate_content_stream(
230
- model=model_dropdown, contents=contents, config=generate_content_config,
 
231
  ):
232
- if (stream_chunk_data.candidates and stream_chunk_data.candidates[0].content and
233
- stream_chunk_data.candidates[0].content.parts and
234
- stream_chunk_data.candidates[0].content.parts[0].inline_data):
235
 
236
- inline_data = stream_chunk_data.candidates[0].content.parts[0].inline_data
237
  data_buffer = inline_data.data
238
- file_extension = mimetypes.guess_extension(inline_data.mime_type)
239
- if file_extension is None or file_extension == ".bin": # Gemini sometimes returns .bin for mp3/wav
240
- if "mp3" in inline_data.mime_type.lower():
241
- file_extension = ".mp3"
242
- elif "wav" in inline_data.mime_type.lower():
243
- file_extension = ".wav"
244
- else: # default to wav and try conversion if necessary
245
- file_extension = ".wav"
246
- # data_buffer = convert_to_wav(inline_data.data, inline_data.mime_type)
247
-
248
- # If model returns audio/mpeg (MP3), pydub needs it as .mp3 to read.
249
- # If it's audio/wav, pydub needs .wav.
250
- # Let's aim to always save as WAV for consistency if merging.
251
- # For now, save with detected extension.
252
- # If pydub fails to read, it means ffmpeg might not have the right codec or format is unexpected.
253
-
 
 
 
 
 
 
 
 
254
  generated_file_path = save_binary_file(f"{chunk_filepath_prefix}{file_extension}", data_buffer)
255
- generated_audio_files.append(generated_file_path)
256
- status_messages.append(f"βœ… Chunk {i+1} generated: {os.path.basename(generated_file_path)}")
257
- break # Processed this chunk
258
- elif stream_chunk_data.text:
259
- status_messages.append(f"ℹ️ API Text Message: {stream_chunk_data.text}")
260
-
261
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  except Exception as e:
263
- status_messages.append(f"❌ Error generating chunk {i+1}: {e}")
264
- # Consider if we should stop or continue
265
- continue # Continue to next chunk
 
266
 
267
- if i < len(text_chunks) - 1:
268
  status_messages.append(f"⏱️ Waiting {sleep_slider}s...")
269
  time.sleep(float(sleep_slider))
270
 
 
271
  # 6. Handle output files
272
  if not generated_audio_files:
273
- status_messages.append("❌ No audio files were generated!")
274
- return None, None, "\n".join(status_messages)
 
 
 
275
 
276
  status_messages.append(f"\nπŸŽ‰ {len(generated_audio_files)} audio file(s) generated!")
277
 
278
- output_audio_path = None
279
- output_download_path = None # This will be for ZIP or single audio
280
 
281
  if merge_checkbox and len(generated_audio_files) > 1:
282
  if not PYDUB_AVAILABLE:
283
  status_messages.append("⚠️ pydub not available. Cannot merge. Returning ZIP of parts.")
284
- success, msg, zip_path = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
285
- status_messages.append(msg)
286
- if success:
287
- output_download_path = zip_path
288
- # No single audio player if zipped
289
  else:
 
 
 
 
 
290
  merged_filename_path = os.path.join(temp_output_dir, f"{output_base_name_safe}_merged.wav")
291
- success, msg, merged_path = merge_audio_files_func(generated_audio_files, merged_filename_path)
292
- status_messages.append(msg)
293
- if success:
294
- output_audio_path = merged_path
295
- output_download_path = merged_path # User can download the merged file
296
  if delete_partials_checkbox:
297
  status_messages.append("πŸ—‘οΈ Deleting partial files...")
298
  for file_p in generated_audio_files:
299
- try:
300
- os.remove(file_p)
301
- status_messages.append(f" πŸ—‘οΈ Deleted: {os.path.basename(file_p)}")
302
- except Exception as e_del:
303
- status_messages.append(f" ⚠️ Could not delete {os.path.basename(file_p)}: {e_del}")
304
- else: # Merge failed, provide ZIP
305
  status_messages.append("⚠��� Merge failed. Providing ZIP of parts.")
306
  success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
307
  status_messages.append(msg_zip)
308
- if success_zip:
309
- output_download_path = zip_p
310
-
311
  elif len(generated_audio_files) == 1:
312
- output_audio_path = generated_audio_files[0]
313
- output_download_path = generated_audio_files[0]
314
- status_messages.append(f"🎡 Single audio file generated: {os.path.basename(output_audio_path)}")
315
-
 
 
 
 
 
 
 
 
316
  else: # Multiple files, no merge requested
317
  status_messages.append("πŸ“¦ Multiple parts generated. Creating ZIP file.")
318
- success, msg, zip_path = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
319
- status_messages.append(msg)
320
- if success:
321
- output_download_path = zip_path
322
- # No single audio player if zipped
323
 
324
  final_status = "\n".join(status_messages)
325
  print(final_status)
326
- return output_audio_path, output_download_path, final_status
327
-
 
 
328
 
329
  # --- Gradio Interface Definition ---
330
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
331
  gr.Markdown("# 🎡 Gemini Text-to-Speech UI πŸ—£οΈ")
 
 
 
 
 
 
 
 
 
332
  gr.Markdown(
333
  "Convert text to speech using Google's Gemini API. "
334
- "You need your own [Gemini API Key](https://aistudio.google.com/app/apikey)."
 
335
  )
336
 
337
  with gr.Row():
338
- with gr.Column(scale=1):
339
- api_key = gr.Textbox(
340
- label="πŸ”‘ Gemini API Key",
341
- type="password",
342
- placeholder="Enter your Gemini API Key here"
343
- )
344
- use_file = gr.Checkbox(label="πŸ“ Use Text File Input", value=False)
345
  text_file = gr.File(
346
- label="Upload Text File (.txt)",
347
  file_types=['.txt'],
348
  visible=False # Initially hidden
349
  )
350
  text_to_speak = gr.Textbox(
351
- label="πŸ“ Text to Speak",
352
- lines=7,
353
- placeholder="Enter text here if not using a file...",
354
  visible=True # Initially visible
355
  )
356
- # Dynamic visibility for text input vs file input
357
  use_file.change(
358
  lambda x: (gr.update(visible=x), gr.update(visible=not x)),
359
  [use_file],
360
  [text_file, text_to_speak]
361
  )
362
-
363
  speech_prompt = gr.Textbox(
364
  label="πŸ—£οΈ Speech Prompt (Optional)",
365
  placeholder="e.g., 'As an energetic YouTuber speaking to an audience'",
366
  info="Influences style, emotion, and voice characteristics."
367
  )
368
- output_filename_base = gr.Textbox(
369
- label="πŸ’Ύ Output Filename Base",
370
- value="gemini_tts_output",
371
- info="Base name for generated files (no extension)."
372
- )
373
 
374
  with gr.Column(scale=1):
375
  model_name = gr.Dropdown(
376
- MODELS,
377
- label="πŸ€– Model",
378
- value=MODELS[0]
379
  )
380
  speaker_voice = gr.Dropdown(
381
- SPEAKER_VOICES,
382
- label="🎀 Speaker Voice",
383
- value="Charon"
384
  )
385
  temperature = gr.Slider(
386
- minimum=0.0, maximum=2.0, step=0.05, value=1.0,
387
  label="🌑️ Temperature",
388
- info="Controls randomness. Higher values (e.g., 1.0) for more variation, lower for more deterministic."
389
  )
390
  max_chunk_size = gr.Slider(
391
- minimum=2000, maximum=4000, step=100, value=3800,
392
  label="🧩 Max Characters per Chunk",
393
- info="Text is split into chunks for API requests."
394
  )
395
  sleep_between_requests = gr.Slider(
396
- minimum=5, maximum=20, step=0.5, value=14, # Adjusted min for Gemini API
397
- label="⏱️ Sleep Between Requests (seconds)",
398
- info="Helps manage API rate limits for multiple chunks."
399
  )
400
- merge_audio = gr.Checkbox(label="πŸ”— Merge Audio Chunks", value=True, visible=PYDUB_AVAILABLE)
401
- delete_partials = gr.Checkbox(label="πŸ—‘οΈ Delete Partial Files After Merge", value=False, visible=PYDUB_AVAILABLE)
402
-
403
- # Dynamic visibility for delete_partials based on merge_audio
404
- if PYDUB_AVAILABLE:
 
 
405
  merge_audio.change(lambda x: gr.update(visible=x), [merge_audio], [delete_partials])
 
 
 
406
 
407
 
408
- submit_button = gr.Button("✨ Generate Audio ✨", variant="primary")
409
 
410
  with gr.Row():
411
  with gr.Column(scale=1):
412
- output_audio_player = gr.Audio(label="🎧 Generated Audio Output", type="filepath")
413
  with gr.Column(scale=1):
414
  output_file_download = gr.File(label="πŸ“₯ Download Output File", type="filepath")
415
 
416
- status_textbox = gr.Textbox(label="πŸ“Š Status Log", lines=10, interactive=False)
417
 
418
- # Connect button to the function
419
  submit_button.click(
420
  fn=generate_audio_for_gradio,
421
  inputs=[
422
- api_key, use_file, text_file, speech_prompt, text_to_speak,
423
  max_chunk_size, sleep_between_requests, temperature,
424
  model_name, speaker_voice, output_filename_base,
425
- merge_audio, delete_partials
426
  ],
427
  outputs=[output_audio_player, output_file_download, status_textbox]
428
  )
429
 
430
  gr.Markdown("---")
431
- gr.Markdown(f"Created by aigolden - pydub available: {PYDUB_AVAILABLE}")
432
  # The encoded text part:
433
- encoded_text = "Q3JlYXRlIGJ5IDogYWlnb2xkZW4="
434
  try:
435
- decoded_text = base64.b64decode(encoded_text.encode()).decode()
436
- gr.Markdown(f"<div style='text-align:center;'><sub>{decoded_text}</sub></div>")
437
- except:
 
438
  pass
439
-
440
- # Example Usage (if needed, for testing locally)
441
  gr.Examples(
442
  examples=[
443
- [ # API Key, use_file, text_file, speech_prompt, text_to_speak, chunk_size, sleep, temp, model, speaker, output_base, merge, delete
444
- "YOUR_API_KEY_HERE", False, None, "A friendly and informative narrator.", "Hello world, this is a test of the Gemini text to speech API using Gradio.", 3800, 10, 1.0, MODELS[0], "Charon", "example_hello", True, False
445
- ],
446
- [
447
- "YOUR_API_KEY_HERE", False, None, "An excited news anchor.", "Breaking news! Artificial intelligence can now generate human-like speech with incredible clarity. This opens up a world of possibilities for content creation and accessibility.", 3000, 12, 0.9, MODELS[1], "Achernar", "example_news", True, True
448
- ]
449
  ],
450
- inputs=[
451
- api_key, use_file, text_file, speech_prompt, text_to_speak,
 
452
  max_chunk_size, sleep_between_requests, temperature,
453
  model_name, speaker_voice, output_filename_base,
454
  merge_audio, delete_partials
455
  ],
456
  outputs=[output_audio_player, output_file_download, status_textbox],
457
- fn=generate_audio_for_gradio, # Cache examples can be slow for API calls
458
- cache_examples=False # Set to True if inputs are static and fn is pure
459
  )
 
460
 
461
 
462
  if __name__ == "__main__":
463
  if not PYDUB_AVAILABLE:
464
  print("WARNING: pydub library is not installed or working. Audio file merging will be disabled.")
465
- print("Please install it (`pip install pydub`) and ensure ffmpeg is in your system's PATH.")
466
- demo.launch(debug=True) # debug=True for local testing, Hugging Face handles this automatically
 
 
 
 
 
 
 
 
9
  from google import genai
10
  from google.genai import types
11
 
12
+ # Attempt to load API key from Hugging Face Secrets
13
+ # The Space's runtime will inject this environment variable if the secret is set.
14
+ HF_GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
15
+
16
  try:
17
  from pydub import AudioSegment
18
  PYDUB_AVAILABLE = True
19
  except ImportError:
20
  PYDUB_AVAILABLE = False
21
+ print("⚠️ pydub is not available. Audio file merging will be disabled.")
22
+ print("If merging is desired, ensure pydub is in requirements.txt and ffmpeg is available in the environment.")
23
 
24
+ # --- Constants ---
25
  SPEAKER_VOICES = [
26
  "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat",
27
  "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux", "Pulcherrima",
 
31
  ]
32
  MODELS = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
33
 
34
+ # --- Helper functions ---
35
  def save_binary_file(file_name, data):
 
36
  abs_file_name = os.path.abspath(file_name)
37
+ try:
38
+ with open(abs_file_name, "wb") as f:
39
+ f.write(data)
40
+ print(f"βœ… File saved at: {abs_file_name}")
41
+ return abs_file_name
42
+ except Exception as e:
43
+ print(f"❌ Error saving file {abs_file_name}: {e}")
44
+ return None
45
 
46
  def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
47
  parameters = parse_audio_mime_type(mime_type)
48
  bits_per_sample = parameters["bits_per_sample"]
49
  sample_rate = parameters["rate"]
50
+ num_channels = 1 # Gemini TTS seems to output mono
51
  data_size = len(audio_data)
52
  bytes_per_sample = bits_per_sample // 8
53
  block_align = num_channels * bytes_per_sample
54
  byte_rate = sample_rate * block_align
55
+ chunk_size = 36 + data_size # Size of the 'fmt ' and 'data' chunks and their headers
56
+
57
  header = struct.pack(
58
  "<4sI4s4sIHHIIHH4sI",
59
+ b"RIFF", chunk_size, b"WAVE", b"fmt ", 16, # 16 for PCM
60
+ 1, # PCM format
61
+ num_channels, sample_rate, byte_rate, block_align, bits_per_sample,
62
+ b"data", data_size
63
  )
64
  return header + audio_data
65
 
66
  def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
67
+ bits_per_sample = 16 # Default
68
+ rate = 24000 # Default for Gemini TTS
69
+ if mime_type:
70
+ mime_type_lower = mime_type.lower()
71
+ parts = mime_type_lower.split(";")
72
+ for param in parts:
73
+ param = param.strip()
74
+ if param.startswith("rate="):
75
+ try:
76
+ rate_str = param.split("=", 1)[1]
77
+ rate = int(rate_str)
78
+ except (ValueError, IndexError): pass
79
+ elif param.startswith("audio/l"): # e.g., audio/L16 or audio/L24
80
+ try:
81
+ # Attempt to parse bits from "L<bits>"
82
+ potential_bits = param.split("l", 1)[1]
83
+ if potential_bits.isdigit():
84
+ bits_per_sample = int(potential_bits)
85
+ except (ValueError, IndexError): pass
86
  return {"bits_per_sample": bits_per_sample, "rate": rate}
87
 
88
  def load_text_from_gr_file(file_obj):
 
89
  if file_obj is None:
90
  return "", "No file provided for text input."
91
  try:
 
93
  content = f.read().strip()
94
  if not content:
95
  return "", "Text file is empty."
 
96
  return content, f"Successfully loaded {len(content)} chars from {os.path.basename(file_obj.name)}."
97
  except Exception as e:
98
  return "", f"Error reading text file: {e}"
 
102
  return [text]
103
  chunks = []
104
  current_chunk = ""
105
+ sentences = re.split(r'(?<=[.!?])\s+', text) # Split by sentences
106
  for sentence in sentences:
107
+ if not sentence: continue
108
+ # If adding the current sentence exceeds max_size
109
+ if len(current_chunk) + len(sentence) + 1 > max_size: # +1 for space
110
+ if current_chunk: # If there's something in current_chunk, add it
111
  chunks.append(current_chunk.strip())
112
+ current_chunk = "" # Reset current_chunk
113
+
114
+ # If the sentence itself is too long, split it by words or even characters
115
  if len(sentence) > max_size:
116
+ words = sentence.split(' ')
117
+ temp_sentence_part = ""
118
+ for word in words:
119
+ if len(temp_sentence_part) + len(word) + 1 > max_size:
120
+ if temp_sentence_part: chunks.append(temp_sentence_part.strip())
121
+ # If word itself is too long (rare for TTS practical limits)
122
+ if len(word) > max_size:
123
+ for i in range(0, len(word), max_size):
124
+ chunks.append(word[i:i+max_size])
125
+ temp_sentence_part = ""
126
+ else:
127
+ temp_sentence_part = word
128
+ else:
129
+ temp_sentence_part += (" " if temp_sentence_part else "") + word
130
+ if temp_sentence_part: chunks.append(temp_sentence_part.strip())
131
+ # current_chunk remains empty as the long sentence was fully processed
132
+ else: # Sentence is not too long itself, start a new chunk with it
133
  current_chunk = sentence
134
+ else: # Sentence fits, add to current_chunk
135
  current_chunk += (" " if current_chunk else "") + sentence
136
+
137
+ if current_chunk: # Add any remaining part
138
  chunks.append(current_chunk.strip())
139
  return chunks
140
 
 
142
  def merge_audio_files_func(file_paths, output_path):
143
  if not PYDUB_AVAILABLE:
144
  return False, "pydub is not available. Cannot merge files.", None
145
+ if not file_paths:
146
+ return False, "No audio files to merge.", None
147
  try:
 
148
  combined = AudioSegment.empty()
149
  for i, file_path in enumerate(file_paths):
150
  if os.path.exists(file_path):
151
+ try:
152
+ # Explicitly state format if known, otherwise pydub tries to guess
153
+ # Assuming all inputs are WAV due to our conversion logic
154
+ audio = AudioSegment.from_file(file_path, format="wav")
155
+ combined += audio
156
+ if i < len(file_paths) - 1:
157
+ combined += AudioSegment.silent(duration=200) # Small silence
158
+ except Exception as e_load:
159
+ print(f"⚠️ Error loading audio file {file_path} with pydub: {e_load}")
160
+ return False, f"Error loading audio file {os.path.basename(file_path)}: {e_load}", None
161
  else:
162
+ print(f"⚠️ File not found for merging: {file_path}")
163
+ # Decide if this is critical; for now, we'll say it is.
164
+ return False, f"File not found for merging: {os.path.basename(file_path)}", None
165
 
166
  abs_output_path = os.path.abspath(output_path)
167
  combined.export(abs_output_path, format="wav")
 
168
  return True, f"Merged file saved: {os.path.basename(abs_output_path)}", abs_output_path
169
  except Exception as e:
170
+ print(f"❌ Error merging files: {e}")
171
  return False, f"Error merging files: {e}", None
172
 
173
  def create_zip_file(file_paths, zip_name):
 
177
  for file_path in file_paths:
178
  if os.path.exists(file_path):
179
  zipf.write(file_path, os.path.basename(file_path))
 
180
  return True, f"ZIP file created: {os.path.basename(abs_zip_name)}", abs_zip_name
181
  except Exception as e:
182
  return False, f"Error creating ZIP file: {e}", None
183
 
184
+ # --- Main generation function (modified for Gradio & HF Secrets) ---
185
  def generate_audio_for_gradio(
186
+ # api_key_input_field is removed, will use HF_GEMINI_API_KEY
187
+ use_file_input_checkbox, text_file_obj,
188
  speech_prompt_input, text_to_speak_input,
189
  max_chunk_slider, sleep_slider, temperature_slider,
190
  model_dropdown, speaker_dropdown, output_filename_base_input,
191
+ merge_checkbox, delete_partials_checkbox,
192
+ # Progress for Gradio (optional but good for long tasks)
193
+ progress=gr.Progress(track_tqdm=True)
194
  ):
195
  status_messages = []
 
196
  status_messages.append("πŸš€ Starting Text-to-Speech process...")
197
+ progress(0, desc="Initializing...")
198
+
199
+ # 1. API Key Validation (from HF Secrets)
200
+ api_key_to_use = HF_GEMINI_API_KEY
201
+ if not api_key_to_use:
202
+ # Fallback if user provides one in a field (though we removed the field)
203
+ # This part can be removed if you *only* want to use secrets
204
+ # For now, let's assume if HF_GEMINI_API_KEY is None, we raise an error.
205
+ status_messages.append("❌ Error: GEMINI_API_KEY not found in Hugging Face Secrets.")
206
+ status_messages.append("➑️ Please set it in your Space's Settings > Secrets.")
207
  return None, None, "\n".join(status_messages)
208
+
209
+ os.environ["GEMINI_API_KEY"] = api_key_to_use # Set for genai library
210
+ status_messages.append("πŸ”‘ API Key loaded from Secrets.")
211
 
212
  # 2. Determine Text Input
213
  actual_text_input = ""
 
230
  # 3. Initialize GenAI Client
231
  try:
232
  status_messages.append("πŸ› οΈ Initializing Gemini client...")
233
+ progress(0.1, desc="Initializing Gemini Client...")
234
  client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
235
  status_messages.append("βœ… Gemini client initialized.")
236
  except Exception as e:
 
240
  # 4. Split text
241
  text_chunks = smart_text_split(actual_text_input, int(max_chunk_slider))
242
  status_messages.append(f"πŸ“Š Text split into {len(text_chunks)} chunk(s).")
243
+ for i, chunk_text in enumerate(text_chunks): # Renamed 'chunk' to 'chunk_text'
244
+ status_messages.append(f" πŸ“ Chunk {i+1}: {len(chunk_text)} chars")
245
 
246
  # 5. Generate audio for each chunk
247
  generated_audio_files = []
 
248
  run_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
249
  temp_output_dir = f"temp_audio_{run_id}"
250
  os.makedirs(temp_output_dir, exist_ok=True)
251
+ output_base_name_safe = re.sub(r'[\s\\\/\:\*\?\"\<\>\|\%]+', '_', output_filename_base_input) # More robust sanitize
 
252
 
253
+ total_chunks = len(text_chunks)
254
+ for i, chunk_text_content in enumerate(text_chunks):
255
+ progress_val = 0.1 + (0.7 * (i / total_chunks)) # Progress from 10% to 80% during generation
256
+ progress(progress_val, desc=f"Generating chunk {i+1}/{total_chunks}...")
257
+
258
+ status_messages.append(f"\nπŸ”Š Generating audio for chunk {i+1}/{total_chunks}...")
259
+ final_text_for_api = f'"{speech_prompt_input}"\n{chunk_text_content}' if speech_prompt_input.strip() else chunk_text_content
260
+
261
+ contents_for_api = [types.Content(role="user", parts=[types.Part.from_text(text=final_text_for_api)])]
262
  generate_content_config = types.GenerateContentConfig(
263
  temperature=float(temperature_slider),
264
  response_modalities=["audio"],
 
270
  )
271
  try:
272
  chunk_filename_base = f"{output_base_name_safe}_part_{i+1:03d}"
 
273
  chunk_filepath_prefix = os.path.join(temp_output_dir, chunk_filename_base)
274
+
275
+ audio_data_received = False
276
+ for stream_response_chunk in client.models.generate_content_stream(
277
+ model=model_dropdown, contents=contents_for_api, config=generate_content_config,
278
  ):
279
+ if (stream_response_chunk.candidates and stream_response_chunk.candidates[0].content and
280
+ stream_response_chunk.candidates[0].content.parts and
281
+ stream_response_chunk.candidates[0].content.parts[0].inline_data):
282
 
283
+ inline_data = stream_response_chunk.candidates[0].content.parts[0].inline_data
284
  data_buffer = inline_data.data
285
+ api_mime_type = inline_data.mime_type
286
+ audio_data_received = True
287
+
288
+ status_messages.append(f"ℹ️ API returned MIME type: {api_mime_type}")
289
+
290
+ # Determine file extension and convert if necessary
291
+ file_extension = ".wav" # Default to .wav and convert
292
+ if api_mime_type and ("mp3" in api_mime_type.lower() or "mpeg" in api_mime_type.lower()):
293
+ file_extension = ".mp3"
294
+ # For MP3, data_buffer is already MP3. No conversion needed for saving.
295
+ # pydub will need ffmpeg to read MP3 for merging.
296
+ status_messages.append(f"ℹ️ Saving as MP3 based on MIME: {api_mime_type}")
297
+ elif api_mime_type and "wav" in api_mime_type.lower() and \
298
+ not ("audio/l16" in api_mime_type.lower() or "audio/l24" in api_mime_type.lower()):
299
+ file_extension = ".wav"
300
+ # API says WAV and it's not raw L16/L24, trust it.
301
+ status_messages.append(f"ℹ️ Saving as WAV based on MIME: {api_mime_type}")
302
+ else: # Raw PCM (like audio/L16), unknown, or .bin -> convert to WAV
303
+ file_extension = ".wav"
304
+ status_messages.append(f"ℹ️ Converting to WAV for MIME: {api_mime_type or 'Unknown'}")
305
+ data_buffer = convert_to_wav(data_buffer, api_mime_type)
306
+
307
+ status_messages.append(f"ℹ️ Determined file extension: {file_extension}")
308
+
309
  generated_file_path = save_binary_file(f"{chunk_filepath_prefix}{file_extension}", data_buffer)
310
+ if generated_file_path:
311
+ generated_audio_files.append(generated_file_path)
312
+ status_messages.append(f"βœ… Chunk {i+1} saved: {os.path.basename(generated_file_path)}")
313
+ else:
314
+ status_messages.append(f"❌ Failed to save chunk {i+1}.")
315
+ break # Processed this audio data from stream
316
+
317
+ elif stream_response_chunk.text:
318
+ status_messages.append(f"ℹ️ API Text Message (during stream): {stream_response_chunk.text}")
319
+
320
+ if not audio_data_received:
321
+ status_messages.append(f"❌ No audio data received in stream for chunk {i+1}.")
322
+ # Check for errors in the stream response if available
323
+ if stream_response_chunk and stream_response_chunk.prompt_feedback and stream_response_chunk.prompt_feedback.block_reason:
324
+ status_messages.append(f"πŸ›‘ API Block Reason: {stream_response_chunk.prompt_feedback.block_reason_message or stream_response_chunk.prompt_feedback.block_reason}")
325
+
326
+
327
+ except types.BlockedPromptException as bpe:
328
+ status_messages.append(f"❌ Content blocked for chunk {i+1}: {bpe}")
329
+ status_messages.append(f" Feedback: {bpe.response.prompt_feedback}")
330
+ except types.StopCandidateException as sce:
331
+ status_messages.append(f"❌ Generation stopped for chunk {i+1}: {sce}")
332
+ status_messages.append(f" Feedback: {sce.response.prompt_feedback}")
333
  except Exception as e:
334
+ status_messages.append(f"❌ Error generating/processing chunk {i+1}: {e}")
335
+ import traceback
336
+ status_messages.append(traceback.format_exc()) # More detailed error
337
+ continue
338
 
339
+ if i < total_chunks - 1:
340
  status_messages.append(f"⏱️ Waiting {sleep_slider}s...")
341
  time.sleep(float(sleep_slider))
342
 
343
+ progress(0.85, desc="Processing generated files...")
344
  # 6. Handle output files
345
  if not generated_audio_files:
346
+ status_messages.append("❌ No audio files were successfully generated or saved!")
347
+ final_status = "\n".join(status_messages)
348
+ print(final_status)
349
+ progress(1, desc="Finished with errors.")
350
+ return None, None, final_status
351
 
352
  status_messages.append(f"\nπŸŽ‰ {len(generated_audio_files)} audio file(s) generated!")
353
 
354
+ output_audio_path_for_player = None # For gr.Audio, ideally a single WAV
355
+ output_path_for_download = None # For gr.File, can be WAV or ZIP
356
 
357
  if merge_checkbox and len(generated_audio_files) > 1:
358
  if not PYDUB_AVAILABLE:
359
  status_messages.append("⚠️ pydub not available. Cannot merge. Returning ZIP of parts.")
360
+ success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
361
+ status_messages.append(msg_zip)
362
+ if success_zip: output_path_for_download = zip_p
 
 
363
  else:
364
+ status_messages.append(f"πŸ”— Merging {len(generated_audio_files)} files (all should be WAVs now)...")
365
+ # Ensure all files for merging are WAV, convert if any MP3s were saved and pydub is used
366
+ # For simplicity, our save logic now tries to make them WAV if not MP3 from API.
367
+ # If an MP3 was saved and PYDUB_AVAILABLE, it should handle it.
368
+
369
  merged_filename_path = os.path.join(temp_output_dir, f"{output_base_name_safe}_merged.wav")
370
+ success_merge, msg_merge, merged_p = merge_audio_files_func(generated_audio_files, merged_filename_path)
371
+ status_messages.append(msg_merge)
372
+ if success_merge:
373
+ output_audio_path_for_player = merged_p
374
+ output_path_for_download = merged_p
375
  if delete_partials_checkbox:
376
  status_messages.append("πŸ—‘οΈ Deleting partial files...")
377
  for file_p in generated_audio_files:
378
+ try: os.remove(file_p); status_messages.append(f" πŸ—‘οΈ Deleted: {os.path.basename(file_p)}")
379
+ except Exception as e_del: status_messages.append(f" ⚠️ Could not delete {os.path.basename(file_p)}: {e_del}")
380
+ else:
 
 
 
381
  status_messages.append("⚠��� Merge failed. Providing ZIP of parts.")
382
  success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
383
  status_messages.append(msg_zip)
384
+ if success_zip: output_path_for_download = zip_p
 
 
385
  elif len(generated_audio_files) == 1:
386
+ # Single file, should be WAV due to our conversion logic or MP3 if API sent that
387
+ single_file_path = generated_audio_files[0]
388
+ if single_file_path.lower().endswith(".mp3") and PYDUB_AVAILABLE:
389
+ # Convert MP3 to WAV for Gradio player if it prefers WAV
390
+ # Or, gr.Audio might handle MP3 directly. Let's test.
391
+ # For now, assume gr.Audio handles common types.
392
+ output_audio_path_for_player = single_file_path
393
+ status_messages.append(f"🎡 Single MP3 file: {os.path.basename(single_file_path)}")
394
+ else: # Assume WAV
395
+ output_audio_path_for_player = single_file_path
396
+ status_messages.append(f"🎡 Single WAV file: {os.path.basename(single_file_path)}")
397
+ output_path_for_download = single_file_path
398
  else: # Multiple files, no merge requested
399
  status_messages.append("πŸ“¦ Multiple parts generated. Creating ZIP file.")
400
+ success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
401
+ status_messages.append(msg_zip)
402
+ if success_zip: output_path_for_download = zip_p
 
 
403
 
404
  final_status = "\n".join(status_messages)
405
  print(final_status)
406
+ print(f"DEBUG: output_audio_path_for_player: {output_audio_path_for_player}")
407
+ print(f"DEBUG: output_path_for_download: {output_path_for_download}")
408
+ progress(1, desc="Finished!")
409
+ return output_audio_path_for_player, output_path_for_download, final_status
410
 
411
  # --- Gradio Interface Definition ---
412
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
413
  gr.Markdown("# 🎡 Gemini Text-to-Speech UI πŸ—£οΈ")
414
+ if not HF_GEMINI_API_KEY:
415
+ gr.Warning(
416
+ "GEMINI_API_KEY not found in Hugging Face Secrets. "
417
+ "Please add it in your Space's 'Settings' > 'Secrets' tab for the app to work. "
418
+ "Name the secret `GEMINI_API_KEY`."
419
+ )
420
+ else:
421
+ gr.Info("Gemini API Key loaded successfully from Space Secrets. Ready to generate!")
422
+
423
  gr.Markdown(
424
  "Convert text to speech using Google's Gemini API. "
425
+ "Your Gemini API Key must be set as a Secret named `GEMINI_API_KEY` in this Space's settings."
426
+ "\n\nGet your API Key from [Google AI Studio](https://aistudio.google.com/app/apikey)."
427
  )
428
 
429
  with gr.Row():
430
+ with gr.Column(scale=2): # Wider column for text inputs
431
+ use_file = gr.Checkbox(label="πŸ“ Use Text File Input (.txt)", value=False)
 
 
 
 
 
432
  text_file = gr.File(
433
+ label="Upload Text File", # Simpler label
434
  file_types=['.txt'],
435
  visible=False # Initially hidden
436
  )
437
  text_to_speak = gr.Textbox(
438
+ label="πŸ“ Text to Speak (or use file above)",
439
+ lines=10,
440
+ placeholder="Enter text here...",
441
  visible=True # Initially visible
442
  )
 
443
  use_file.change(
444
  lambda x: (gr.update(visible=x), gr.update(visible=not x)),
445
  [use_file],
446
  [text_file, text_to_speak]
447
  )
 
448
  speech_prompt = gr.Textbox(
449
  label="πŸ—£οΈ Speech Prompt (Optional)",
450
  placeholder="e.g., 'As an energetic YouTuber speaking to an audience'",
451
  info="Influences style, emotion, and voice characteristics."
452
  )
 
 
 
 
 
453
 
454
  with gr.Column(scale=1):
455
  model_name = gr.Dropdown(
456
+ MODELS, label="πŸ€– Model", value=MODELS[0]
 
 
457
  )
458
  speaker_voice = gr.Dropdown(
459
+ SPEAKER_VOICES, label="🎀 Speaker Voice", value="Charon"
 
 
460
  )
461
  temperature = gr.Slider(
462
+ minimum=0.0, maximum=1.0, step=0.05, value=0.7, # Gemini TTS often uses temp <= 1
463
  label="🌑️ Temperature",
464
+ info="Controls randomness (0.0-1.0). Higher for more variation."
465
  )
466
  max_chunk_size = gr.Slider(
467
+ minimum=1000, maximum=4000, step=100, value=3800,
468
  label="🧩 Max Characters per Chunk",
469
+ info="Text is split for API. Max 4096 per request for some models."
470
  )
471
  sleep_between_requests = gr.Slider(
472
+ minimum=1, maximum=15, step=0.5, value=2, # Reduced default sleep
473
+ label="⏱️ Sleep Between Chunks (sec)",
474
+ info="Helps manage API rate limits (e.g. Gemini Flash has 60 RPM limit)."
475
  )
476
+ output_filename_base = gr.Textbox(
477
+ label="πŸ’Ύ Output Filename Base", value="gemini_tts_audio"
478
+ )
479
+
480
+ with gr.Group(visible=PYDUB_AVAILABLE):
481
+ merge_audio = gr.Checkbox(label="πŸ”— Merge Audio Chunks (if >1)", value=True)
482
+ delete_partials = gr.Checkbox(label="πŸ—‘οΈ Delete Chunks After Merge", value=True, visible=True) # Default visible
483
  merge_audio.change(lambda x: gr.update(visible=x), [merge_audio], [delete_partials])
484
+
485
+ if not PYDUB_AVAILABLE:
486
+ gr.Markdown("<small>⚠️ Merging disabled: `pydub` library not found. Install if needed.</small>")
487
 
488
 
489
+ submit_button = gr.Button("✨ Generate Audio ✨", variant="primary", scale=2) # Centered button
490
 
491
  with gr.Row():
492
  with gr.Column(scale=1):
493
+ output_audio_player = gr.Audio(label="🎧 Generated Audio Output", type="filepath", format="wav") # Specify format if known
494
  with gr.Column(scale=1):
495
  output_file_download = gr.File(label="πŸ“₯ Download Output File", type="filepath")
496
 
497
+ status_textbox = gr.Textbox(label="πŸ“Š Status Log", lines=10, interactive=False, max_lines=20)
498
 
 
499
  submit_button.click(
500
  fn=generate_audio_for_gradio,
501
  inputs=[
502
+ use_file, text_file, speech_prompt, text_to_speak,
503
  max_chunk_size, sleep_between_requests, temperature,
504
  model_name, speaker_voice, output_filename_base,
505
+ merge_audio, delete_partials # Even if not visible, pass them
506
  ],
507
  outputs=[output_audio_player, output_file_download, status_textbox]
508
  )
509
 
510
  gr.Markdown("---")
 
511
  # The encoded text part:
512
+ encoded_text = "Q3JlYXRlIGJ5IDogYWlnb2xkZW4=" # "Created by : aigolden"
513
  try:
514
+ decoded_text = base64.b64decode(encoded_text.encode('utf-8')).decode('utf-8')
515
+ gr.Markdown(f"<p style='text-align:center; font-size:small;'><em>{decoded_text}</em></p>")
516
+ except Exception as e_decode:
517
+ print(f"Error decoding/displaying credit: {e_decode}")
518
  pass
519
+
 
520
  gr.Examples(
521
  examples=[
522
+ [False, None, "A friendly and informative narrator.", "Hello world, this is a test of the Gemini text to speech API using Gradio. I hope this works well!", 3800, 2, 0.7, MODELS[0], "Charon", "example_hello", True, True],
523
+ [False, None, "An excited news reporter.", "Breaking news! Artificial intelligence can now generate human-like speech. This technology is rapidly evolving!", 3000, 2, 0.8, MODELS[1], "Achernar", "example_news", True, True],
524
+ [True, "sample_text.txt", "A calm storyteller.", "", 3500, 3, 0.6, MODELS[0], "Vindemiatrix", "example_from_file", True, False]
 
 
 
525
  ],
526
+ fn=generate_audio_for_gradio, # Ensure example fn is the same as main
527
+ inputs=[ # Ensure these match the function's inputs exactly (order and number)
528
+ use_file, text_file, speech_prompt, text_to_speak,
529
  max_chunk_size, sleep_between_requests, temperature,
530
  model_name, speaker_voice, output_filename_base,
531
  merge_audio, delete_partials
532
  ],
533
  outputs=[output_audio_player, output_file_download, status_textbox],
534
+ cache_examples=False # API calls, so don't cache results based on static inputs
 
535
  )
536
+ gr.Markdown("<small>To use the 'example_from_file', please create a `sample_text.txt` file in the root of this Space with some text content, or upload your own text file.</small>")
537
 
538
 
539
  if __name__ == "__main__":
540
  if not PYDUB_AVAILABLE:
541
  print("WARNING: pydub library is not installed or working. Audio file merging will be disabled.")
542
+ if not HF_GEMINI_API_KEY:
543
+ print("WARNING: GEMINI_API_KEY environment variable not set. The app might not work in local if it relies on this for API key.")
544
+
545
+ # For local testing, you might want to provide a way to input the API key
546
+ # or set the GEMINI_API_KEY environment variable before running.
547
+ # e.g., export GEMINI_API_KEY="your_key_here"
548
+ # then run python app.py
549
+
550
+ demo.launch(debug=True, share=False) # share=False for local, HF Spaces handles public link