Hamed744 commited on
Commit
8a7ac92
·
verified ·
1 Parent(s): 290123f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -287
app.py CHANGED
@@ -78,7 +78,7 @@ SPEAKER_VOICES = [
78
  "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
79
  "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
80
  ]
81
- FIXED_MODEL_NAME = "gemini-1.5-flash-preview-tts" # As per Alpha TTS; ensure this model is correct and available
82
  DEFAULT_MAX_CHUNK_SIZE = 3800
83
  DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
84
  DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
@@ -120,28 +120,26 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int]:
120
  def smart_text_split(text, max_size=3800, log_list_ref=None):
121
  if len(text) <= max_size: return [text]
122
  chunks, current_chunk = [], ""
123
- # Improved sentence splitting for Persian and English
124
  sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
125
  for sentence in sentences:
126
  if len(current_chunk) + len(sentence) + 1 > max_size:
127
  if current_chunk: chunks.append(current_chunk.strip())
128
  current_chunk = sentence
129
- while len(current_chunk) > max_size: # Handle very long sentences
130
- # Try to split at common punctuation or spaces, working backwards
131
  split_idx = -1
132
- for punc in ['،', ',', ';', ':', ' ']: # Persian comma added
133
  idx = current_chunk.rfind(punc, max_size // 2, max_size)
134
  if idx > split_idx : split_idx = idx
135
 
136
  if split_idx != -1:
137
  part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
138
- else: # Force split
139
  part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
140
  chunks.append(part.strip())
141
  else:
142
- current_chunk += (" " if current_chunk and sentence else "") + sentence # Avoid leading space if current_chunk is empty
143
  if current_chunk: chunks.append(current_chunk.strip())
144
- final_chunks = [c for c in chunks if c] # Remove empty chunks
145
  if log_list_ref: _log_tts(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list_ref)
146
  return final_chunks
147
 
@@ -157,12 +155,12 @@ def merge_audio_files_func(file_paths, output_path, log_list_ref):
157
  if os.path.exists(fp):
158
  segment = AudioSegment.from_file(fp)
159
  combined += segment
160
- if i < len(file_paths) - 1: # Add short silence between segments
161
- combined += AudioSegment.silent(duration=150) # 150ms silence
162
  else:
163
  _log_tts(f"⚠️ فایل صوتی برای ادغام یافت نشد: {fp}", log_list_ref)
164
 
165
- combined.export(output_path, format="wav") # Gemini TTS often returns WAV or can be converted.
166
  _log_tts(f"✅ فایل صوتی با موفقیت در '{output_path}' ادغام و ذخیره شد.", log_list_ref)
167
  return True
168
  except Exception as e:
@@ -181,50 +179,7 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
181
  return None, "خطا: کلید API جیمینای برای سرویس TTS در دسترس نیست."
182
 
183
  try:
184
- # Configure genai with the specific API key for this operation
185
- # Note: genai.configure is global. If running concurrent operations with different keys,
186
- # genai.Client(api_key=api_key) is safer. Let's assume genai.Client for TTS.
187
- client = genai.GenerativeModel(model_name=FIXED_MODEL_NAME, api_key=api_key) # Old way
188
- # For specific model like TTS, often it's through client.models or genai.get_model
189
- # The second script used: client = genai.Client(api_key=api_key)
190
- # And then client.models.generate_content(model=FIXED_MODEL_NAME, ...)
191
- # Let's stick to genai.Client for TTS as it's more direct for such models.
192
-
193
- # Re-instantiate client with the specific key (safer than global configure if other parts of app use genai)
194
- # However, the get_gemini_api_key_sync rotates a global index, so global configure is implied.
195
- # For simplicity with provided key rotation:
196
- current_genai_client = genai.get_model(f"models/{FIXED_MODEL_NAME}") # Simpler if model name is just 'tts-model'
197
- # But FIXED_MODEL_NAME is "gemini-1.5-flash-preview-tts"
198
- # This might require `genai.configure(api_key=api_key)` first.
199
- # Let's use the direct method from AlphaTTS script for robustneess.
200
- genai.configure(api_key=api_key) # Configure with the rotated key
201
- # model_instance = genai.GenerativeModel(FIXED_MODEL_NAME) # This is usually for text/chat models
202
- # For TTS, the AlphaTTS code used `client.models.generate_content`
203
- # which implies `genai.Client(api_key=...)` then `client.models.generate_content(...)`
204
- # Or if `genai.configure` is used, then `genai.generate_text(...)` or similar global funcs.
205
- # Let's assume `genai.configure` is enough and then use a top-level function if available,
206
- # or stick to client.
207
-
208
- # Given the AlphaTTS structure:
209
- # client = genai.Client(api_key=api_key) # This is the most direct way if Client takes api_key
210
- # Let's assume genai.configure is what's intended with the key rotation logic.
211
- # genai.configure(api_key=api_key) # Already done by key rotation
212
- # This is a bit messy. Let's refine `get_gemini_api_key_sync` to also call `genai.configure`.
213
- # No, `get_gemini_api_key_sync` should just return the key. The caller configures.
214
-
215
- # Safest approach: configure genai globally for this operation
216
  genai.configure(api_key=api_key)
217
- # Then use a model instance. The second script used client.models.generate_content
218
- # which is not directly available on GenerativeModel typically.
219
- # Let's try to use the structure from Alpha TTS as much as possible.
220
- # It used `client.models.generate_content`.
221
- # This means we might need to instantiate `genai.Client` instead of `genai.GenerativeModel`.
222
- # However, `genai.Client` is usually for the full API surface.
223
- # Let's try with `genai.GenerativeModel` and see if it supports speech config.
224
- # If not, we'll need to use `genai.generate_content` with the full model path.
225
-
226
- model_to_use_direct = f"models/{FIXED_MODEL_NAME}" # e.g., "models/gemini-1.5-flash-preview-tts"
227
-
228
  except Exception as e:
229
  _log_tts(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list_ref)
230
  return None, f"خطا در ارتباط با Gemini: {e}"
@@ -242,195 +197,74 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
242
  for i, chunk in enumerate(text_chunks):
243
  _log_tts(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list_ref)
244
 
245
- # Constructing the request content based on AlphaTTS structure
246
  final_text_for_tts = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
247
 
248
- # This part needs to align with how Gemini TTS API expects requests via the Python SDK
249
- # AlphaTTS used:
250
- # contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text)])]
251
- # config = genai_types.GenerateContentConfig(temperature=temperature_val, response_modalities=["audio"],
252
- # speech_config=genai_types.SpeechConfig(voice_config=genai_types.VoiceConfig(
253
- # prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice))))
254
- # response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
255
-
256
- # Using global `generate_content` after `genai.configure(api_key=...)`
257
  try:
258
- request_contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text_for_tts)])]
259
- generation_config = genai_types.GenerationConfig(temperature=temperature_val) # Only temperature here
260
-
261
- # Speech config is part of the request, not generation_config for some APIs
262
- # For the new unified models, it might be different.
263
- # Let's assume the model name implies TTS and speech config is passed differently or is part of GenerateContentRequest
 
 
 
 
 
 
264
 
265
- # This is the tricky part: How to pass speech config with global generate_content
266
- # The `client.models.generate_content` from AlphaTTS is more specific.
267
- # `genai.GenerativeModel(model_name).generate_content` is the current standard.
268
 
269
- tts_model_instance = genai.GenerativeModel(
270
- model_name=FIXED_MODEL_NAME, # Use the direct model name like gemini-1.5-flash-preview-tts
271
- generation_config=generation_config,
272
- # system_instruction=prompt_input, # If supported for TTS prompt
273
- )
274
- # The `speech_config` would be part of the `generate_content` call if supported by this model type
275
- # This needs verification against current Gemini SDK for TTS.
276
- # The `response_modalities=["audio"]` and `speech_config` were used with `client.models.generate_content`.
277
- # If `GenerativeModel` is used, the request structure might be different.
278
- # The `generate_content` method of `GenerativeModel` takes `request_options` for things like `response_mime_type`.
279
-
280
- # Let's revert to the structure most likely to work based on AlphaTTS's use of `client.models.generate_content`
281
- # This means we may need to use `genai.Client(api_key=api_key).models.get(FIXED_MODEL_NAME).generate_content(...)`
282
- # OR `genai.generate_content(model=f"models/{FIXED_MODEL_NAME}", contents=..., generation_config=..., speech_config=...)` if that signature exists.
283
-
284
- # Simplest path if `genai.configure` is used and there's a global way:
285
- # This is a common pattern for `GenerateContentRequest`
286
- gc_request = genai_types.GenerateContentRequest(
287
- model=f"models/{FIXED_MODEL_NAME}", # Ensure "models/" prefix if needed
288
- contents=request_contents,
289
  generation_config=genai_types.GenerationConfig(
290
  temperature=temperature_val,
291
- response_mime_type="audio/wav" # Request WAV directly
292
  ),
293
- # How to pass voice and prompt? This is SDK specific.
294
- # Re-checking AlphaTTS: `speech_config` was part of `GenerateContentConfig` passed to `client.models.generate_content`
295
- # This is non-standard for `genai.GenerationConfig`.
296
- # It seems `genai.Client().model().generate_content()` has a different `config` param.
297
  )
298
-
299
- # Let's use the exact structure from AlphaTTS for `config` as it was working there.
300
- # This implies that `genai.generate_content` (global) or `GenerativeModel.generate_content`
301
- # must accept a similar config object if `client.models.generate_content` is not used.
302
-
303
- custom_config_for_tts = genai_types.GenerationConfig( # This seems to be the new way
304
- temperature=temperature_val,
305
- # response_modalities=["audio"], # This might be implicit or handled by response_mime_type
306
- # The following was from AlphaTTS, might need to be adapted or is for older/different client path
307
- # speech_config=genai_types.SpeechConfig(
308
- # voice_config=genai_types.VoiceConfig(
309
- # prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)
310
- # )
311
- # )
312
- # For new Gemini 1.5 Flash/Pro with native audio output, it's often simpler:
313
- # You might pass voice parameters differently, or the model implicitly handles it.
314
- # The `FIXED_MODEL_NAME` "gemini-1.5-flash-preview-tts" suggests it IS a TTS model.
315
- # Let's assume for now the model name and text input are enough, and voice/prompt are part of the text or model behavior.
316
- # If `selected_voice` and `prompt_input` are crucial, they need to be part of the request.
317
- # `prompt_input` can be part of `final_text_for_tts`.
318
- # `selected_voice` needs a parameter in the API call.
319
- # The Gemini API for TTS usually involves specifying the voice in the request.
320
- # e.g. in `synthesis_input` or `voice_selection_params`
321
-
322
- # The most up-to-date way for Gemini 1.5 Flash TTS might involve `tools` or specific TTS request structures.
323
- # Given the AlphaTTS code, it used `speech_config`. Let's try to replicate.
324
- # `genai.GenerativeModel.generate_content` has `generation_config` and `safety_settings`.
325
- # The `speech_config` is not standard there.
326
-
327
- # If `FIXED_MODEL_NAME` is a true generative model that can output audio,
328
- # the prompt needs to guide it.
329
- # "Generate an audio of the following text with voice {selected_voice}: {chunk}"
330
- # This is less likely for specialized TTS models.
331
-
332
- # Fallback to a more direct call if available for TTS, or ensure `GenerativeModel` is configured correctly.
333
- # The most robust way is to use the specific SDK features for TTS.
334
- # If `genai.get_model("models/text-to-speech")` exists:
335
- # tts_service_model = genai.get_model("models/text-to-speech")
336
- # response = tts_service_model.synthesize_speech(text=final_text_for_tts, voice=selected_voice, ...)
337
- # This is typical for dedicated TTS.
338
- # Since AlphaTTS used `gemini-X.Y-flash-preview-tts`, it's likely a multimodal model.
339
-
340
- # Replicating the structure that AlphaTTS `client.models.generate_content` used:
341
- # This is the most likely path to success given it worked in AlphaTTS.
342
- # We need to call a similar function. `genai.GenerativeModel(FIXED_MODEL_NAME)` is the modern way.
343
- tts_model = genai.GenerativeModel(FIXED_MODEL_NAME) # api_key is globally configured
344
-
345
- # Constructing the specific configuration for TTS with GenerativeModel.
346
- # This is where `selected_voice` and `prompt_input` (as system instruction or context) matter.
347
- # The Gemini API documentation for multimodal models with audio output is key here.
348
- # Often, it's done by specifying `response_mime_type='audio/wav'` in `generation_config`.
349
- # The voice selection might be a parameter in `GenerationConfig` or part of the prompt for some models.
350
-
351
- # Let's assume `selected_voice` can be part of the text prompt for now if not a direct API param.
352
- # And `prompt_input` is part of the context.
353
-
354
- # Simpler request, assuming model handles voice from name or general quality from prompt:
355
- # The `prompt_input` from AlphaTTS was more like a system instruction for tone.
356
- # The `selected_voice` was a specific voice model name.
357
-
358
- # The crucial part from AlphaTTS was:
359
- # speech_config = genai_types.SpeechConfig(
360
- # voice_config=genai_types.VoiceConfig(
361
- # prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)))
362
- # And this `speech_config` was passed into a `GenerateContentConfig` like object.
363
- # Let's try to pass this to `tts_model.generate_content`.
364
-
365
- # The new SDK might use `ToolConfig` for speech synthesis or `Content` with specific parts.
366
- # For direct audio generation, it's often simpler:
367
- response = tts_model.generate_content(
368
- final_text_for_tts, # The text to synthesize
369
- generation_config=genai_types.GenerationConfig(
370
- temperature=temperature_val,
371
- # Candidate count, stop sequences, etc.
372
- # For audio output, you specify the desired mime type:
373
- response_mime_type="audio/wav" # Or "audio/mp3" if supported
374
- ),
375
- # How to specify voice like 'Achird', 'Zephyr'?
376
- # This is the missing link if `selected_voice` is not part of the model name itself.
377
- # If `FIXED_MODEL_NAME` implies a voice, or if it's a general TTS synthesizer,
378
- # voice selection must be in the request.
379
- # The `speech_config` from AlphaTTS is the best hint.
380
- # It might be that `genai.GenerativeModel` does not support this `speech_config`.
381
- # And `genai.Client(api_key=...).models.get(model_name).generate_content(...)` was the correct path.
382
-
383
- # Let's assume `request_options` might hold it, or a `Tool` for TTS.
384
- # Given `SPEAKER_VOICES`, it implies a selection mechanism.
385
- # If `genai_types.SpeechConfig` exists, it's meant to be used.
386
- # Perhaps `GenerativeModel.generate_content(contents=..., generation_config=..., tools=...)`
387
- # where one tool is configured for speech synthesis with the voice.
388
-
389
- # Sticking to the most direct interpretation of AlphaTTS's success:
390
- # It used a `config` object containing `speech_config`.
391
- # `genai.GenerativeModel.generate_content`'s `generation_config` is standard.
392
- # The `client.models.generate_content` in AlphaTTS might be an older or different SDK path.
393
-
394
- # Let's try with the most direct `GenerativeModel` call, assuming voice is part of the prompt or fixed for the model.
395
- # If `selected_voice` is critical, this will fail to use it.
396
- # The `prompt_input` (e.g., "با لحنی شاد") can be prepended to `final_text_for_tts`.
397
- # text_with_prompt_and_voice_hint = f"{prompt_input}. Voice: {selected_voice}. Text: {chunk}"
398
- # This is a guess if dedicated voice parameters are not obvious.
399
- )
400
 
401
  fname_base = f"{output_base_name}_part{i+1:03d}"
402
 
403
- # Process response (this part is from AlphaTTS and should be largely correct if response structure is similar)
404
- # Assuming response.parts[0].audio_data or similar for new SDK
405
- # The AlphaTTS code expected `response.candidates[0].content.parts[0].inline_data`
406
- # For `GenerativeModel.generate_content`, it's usually `response.text` or `response.parts`.
407
-
408
- if response.parts and hasattr(response.parts[0], 'audio_data'): # Hypothetical new SDK attribute
409
- audio_bytes = response.parts[0].audio_data # This is a guess
410
- mime_type = "audio/wav" # Assuming we requested WAV
411
  elif response.candidates and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data: # AlphaTTS way
412
  inline_data = response.candidates[0].content.parts[0].inline_data
413
  audio_bytes = inline_data.data
414
  mime_type = inline_data.mime_type
415
- else: # Try to find audio in a standard way for new SDK
416
  audio_part = None
417
- for part in response.parts:
418
- if part.mime_type.startswith("audio/"):
419
- audio_part = part
420
- break
421
- if audio_part and hasattr(audio_part, 'data'): # Common for blob data
 
422
  audio_bytes = audio_part.data
423
  mime_type = audio_part.mime_type
424
- elif audio_part and hasattr(audio_part, '_blob'): # Another common pattern
425
  audio_bytes = audio_part._blob.data
426
  mime_type = audio_part._blob.mime_type
427
- else:
 
428
  _log_tts(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی معتبر دریافت شد.", log_list_ref)
429
- _log_tts(f"Response structure: {response}", log_list_ref) # Log structure for debugging
430
  continue
431
 
432
- ext = mimetypes.guess_extension(mime_type) or ".wav" # Default to .wav
433
- if "audio/L" in mime_type and ext == ".wav": # PCM data that needs WAV header
 
 
 
 
 
434
  audio_bytes = convert_to_wav(audio_bytes, mime_type)
435
  if not ext.startswith("."): ext = "." + ext
436
 
@@ -440,21 +274,9 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
440
 
441
  except Exception as e:
442
  _log_tts(f"❌ خطا در تولید قطعه صوتی {i+1} با Gemini: {e}\n{traceback.format_exc()}", log_list_ref)
443
- # Try to get more detailed error from Gemini if available
444
  if hasattr(e, 'response') and e.response:
445
- _log_tts(f"Gemini API error details: {e.response}", log_list_ref)
446
- # Fallback message for UI
447
- error_message_for_ui = f"خطا در تولید صدا ({type(e).__name__})."
448
- if NUM_GEMINI_KEYS > 1:
449
- error_message_for_ui += " کلید بعدی امتحان خواهد شد اگر قطعات دیگری وجود داشته باشد." # Not quite, key rotates per call not per chunk failure.
450
- # For now, a single chunk failure might stop the whole process if we don't continue.
451
- # The loop `continue`s, so other chunks will be tried.
452
- # The error message for the UI should be a summary at the end.
453
- # This needs to be handled in `gradio_tts_interface`.
454
- # Let's have `core_generate_audio` return (None, error_message) on first critical failure.
455
- # No, it should try all chunks and return what it could.
456
- # The `gradio_tts_interface` will decide the final message.
457
- continue # Try next chunk
458
 
459
  if i < len(text_chunks) - 1 and len(text_chunks) > 1:
460
  _log_tts(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از پردازش قطعه بعدی...", log_list_ref)
@@ -467,23 +289,22 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
467
  _log_tts(f"🎉 {len(generated_files)} فایل(های) صوتی با موفقیت تولید شد.", log_list_ref)
468
 
469
  final_audio_file = None
470
- final_output_path_base = f"{output_base_name}_final" # Consistent base name
471
 
472
  if len(generated_files) > 1:
473
  if PYDUB_AVAILABLE:
474
- merged_fn = f"{final_output_path_base}.wav" # Merge to WAV
475
  if os.path.exists(merged_fn):
476
  try: os.remove(merged_fn)
477
  except OSError as e: _log_tts(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e}", log_list_ref)
478
 
479
  if merge_audio_files_func(generated_files, merged_fn, log_list_ref):
480
  final_audio_file = merged_fn
481
- # Clean up individual parts
482
  for fp in generated_files:
483
  if os.path.abspath(fp) != os.path.abspath(merged_fn):
484
  try: os.remove(fp)
485
  except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{fp}': {e_del}", log_list_ref)
486
- else: # Merge failed, provide first chunk if pydub available but failed
487
  _log_tts("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list_ref)
488
  if generated_files:
489
  try:
@@ -493,16 +314,14 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
493
  if os.path.exists(fallback_fn): os.remove(fallback_fn)
494
  os.rename(first_chunk_path, fallback_fn)
495
  final_audio_file = fallback_fn
496
- # Clean up other parts
497
  for i_gf in range(1, len(generated_files)):
498
  try: os.remove(generated_files[i_gf])
499
  except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{generated_files[i_gf]}': {e_del}", log_list_ref)
500
  except Exception as e_rename_fallback:
501
  _log_tts(f"خطا در تغییر نام فایل اولین قطعه (fallback): {e_rename_fallback}", log_list_ref)
502
- final_audio_file = generated_files[0] # Original path
503
- else: # Pydub not available, offer to download parts or provide first
504
  _log_tts("⚠️ Pydub برای ادغام در دسترس نیست. اولین قطعه صوتی ارائه می‌شود.", log_list_ref)
505
- # Similar renaming logic for the first part as above for consistency
506
  if generated_files:
507
  try:
508
  first_chunk_path = generated_files[0]
@@ -511,9 +330,6 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
511
  if os.path.exists(single_fallback_fn): os.remove(single_fallback_fn)
512
  os.rename(first_chunk_path, single_fallback_fn)
513
  final_audio_file = single_fallback_fn
514
- # Clean up other parts (optional, user might want them if no merge)
515
- # For simplicity, let's not clean them up if pydub is missing, they might be useful.
516
- # Or, offer a ZIP. For now, just the first.
517
  for i_gf in range(1, len(generated_files)):
518
  _log_tts(f"قطعه اضافی موجود: {generated_files[i_gf]} (ادغام نشده)", log_list_ref)
519
 
@@ -535,19 +351,16 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
535
  final_audio_file = final_single_fn
536
  except Exception as e_rename_single:
537
  _log_tts(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single}", log_list_ref)
538
- final_audio_file = generated_files[0] # Fallback to original temp name
539
 
540
  if final_audio_file and not os.path.exists(final_audio_file):
541
  _log_tts(f"⚠️ فایل صوتی نهایی '{final_audio_file}' پس ��ز پردازش وجود ندارد!", log_list_ref)
542
  return None, "خطا: فایل صوتی نهایی یافت نشد."
543
 
544
- return final_audio_file, "موفق" # Return success message string
545
 
546
  def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature):
547
- # Progress object can be added here if core_generate_audio is instrumented
548
- # progress=gr.Progress(track_tqdm=True)
549
-
550
- logs_for_this_run = [] # For this specific run, to potentially show user or debug
551
  actual_text = ""
552
  status_message = "شروع پردازش..."
553
  final_audio_path = None
@@ -577,7 +390,6 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
577
  _log_tts(f"تنظیمات: Speaker={speaker_voice}, Temp={temperature}, Prompt='{speech_prompt[:30]}...'", logs_for_this_run)
578
 
579
  try:
580
- # Call the core audio generation function
581
  final_audio_path, generation_status_msg = core_generate_audio(
582
  actual_text, speech_prompt, speaker_voice, temperature, logs_for_this_run
583
  )
@@ -586,11 +398,11 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
586
  status_message = "✅ تبدیل متن به گفتار با موفقیت انجام شد."
587
  _log_tts(status_message, logs_for_this_run)
588
  return final_audio_path, status_message
589
- elif final_audio_path and generation_status_msg != "موفق": # Partial success or warning
590
  status_message = f"⚠️ {generation_status_msg}. فایل صوتی ممکن است ناقص باشد: {final_audio_path}"
591
  _log_tts(status_message, logs_for_this_run)
592
  return final_audio_path, status_message
593
- else: # No file path, error occurred
594
  status_message = f"❌ {generation_status_msg}"
595
  _log_tts(status_message, logs_for_this_run)
596
  return None, status_message
@@ -605,22 +417,21 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
605
  # --- START: بخش UI و Gradio (Adapted from Alpha Translator, content from Alpha TTS) ---
606
  FLY_PRIMARY_COLOR_HEX = "#4F46E5"
607
  FLY_SECONDARY_COLOR_HEX = "#10B981"
608
- FLY_ACCENT_COLOR_HEX = "#D97706" # Orange, used for buttons in Alpha Translator
609
  FLY_TEXT_COLOR_HEX = "#1F2937"
610
  FLY_SUBTLE_TEXT_HEX = "#6B7280"
611
- FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB"
612
  FLY_WHITE_HEX = "#FFFFFF"
613
  FLY_BORDER_COLOR_HEX = "#D1D5DB"
614
  FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
615
- FLY_PANEL_BG_SIMPLE = "#E0F2FE" # Light blue for specific panels like translated text
616
 
617
  app_theme_outer = gr.themes.Base(
618
- font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], # Base font
619
  ).set(
620
- body_background_fill=FLY_LIGHT_BACKGROUND_HEX, # Overall page background
621
  )
622
 
623
- # Using CSS from Alpha Translator (first code)
624
  custom_css = f"""
625
  @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
626
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
@@ -632,7 +443,7 @@ custom_css = f"""
632
  --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
633
  --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
634
  --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
635
- --font-english: 'Poppins', 'Inter', system-ui, sans-serif; /* For English text outputs if any */
636
  --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
637
  --shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
638
  --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
@@ -649,38 +460,29 @@ body {{font-family:var(--font-global);direction:rtl;background-color:var(--fly-b
649
  footer,.gradio-footer,.flagging-container,.flex.row.gap-2.absolute.bottom-2.right-2.gr-compact.gr-box.gr-text-gray-500,div[data-testid="flag"],button[title="Flag"],button[aria-label="Flag"],.footer-utils {{display:none !important;visibility:hidden !important;}}
650
  .main-content-area {{flex-grow:1;padding:0.75rem;width:100%;margin:0 auto;box-sizing:border-box;}}
651
  .content-panel-simple {{background-color:var(--fly-bg-white);padding:1rem;border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);margin-top:-2rem;position:relative;z-index:10;margin-bottom:2rem;width:100%;box-sizing:border-box;}}
652
- /* Main button styling from Alpha Translator */
653
  .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{background:var(--fly-accent) !important;margin-top:1rem !important;padding:12px 20px !important;transition:all 0.25s ease-in-out !important;color:white !important;font-weight:600 !important;border-radius:10px !important;border:none !important;box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;width:100% !important;font-size:1em !important;display:flex;align-items:center;justify-content:center;}}
654
  .content-panel-simple .gr-button.lg.primary:hover,.content-panel-simple button[variant="primary"]:hover {{background:#B45309 !important;transform:translateY(-1px) !important;box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;}}
655
- /* Input styling from Alpha Translator */
656
  .content-panel-simple .gr-input > label + div > textarea,.content-panel-simple .gr-dropdown > label + div > div > input,.content-panel-simple .gr-dropdown > label + div > div > select,.content-panel-simple .gr-textbox > label + div > textarea, .content-panel-simple .gr-file > label + div {{border-radius:8px !important;border:1.5px solid var(--fly-border-color) !important;font-size:0.95em !important;background-color:var(--fly-input-bg-simple) !important;padding:10px 12px !important;color:var(--fly-text-primary) !important;}}
657
  .content-panel-simple .gr-input > label + div > textarea:focus,.content-panel-simple .gr-dropdown > label + div > div > input:focus,.content-panel-simple .gr-dropdown > label + div > div > select:focus,.content-panel-simple .gr-textbox > label + div > textarea:focus, .content-panel-simple .gr-file > label + div:focus-within {{border-color:var(--fly-primary) !important;box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;background-color:var(--fly-bg-white) !important;}}
658
- .content-panel-simple .gr-file > label + div {{ text-align:center; border-style: dashed !important; }} /* File input specific style */
659
  .content-panel-simple .gr-dropdown select {{font-family:var(--font-global) !important;width:100%;cursor:pointer;}}
660
- /* Output text area styling (if we add one for status messages) */
661
  .content-panel-simple .gr-textbox[label*="وضعیت"] > label + div > textarea {{background-color:var(--fly-panel-bg-simple) !important;border-color:#A5D5FE !important;min-height:80px;font-family:var(--font-global);font-size:0.9em !important;line-height:1.5;padding:10px !important;}}
662
- /* Panel and Accordion styling (not using accordion here, but good to have) */
663
  .content-panel-simple .gr-panel,.content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > .gr-panel {{border-radius:8px !important;border:1px solid var(--fly-border-color) !important;background-color:var(--fly-input-bg-simple) !important;padding:0.8rem 1rem !important;margin-top:0.6rem;box-shadow:none;}}
664
  .content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > button.gr-button {{font-weight:500 !important;padding:8px 10px !important;border-radius:6px !important;background-color:#E5E7EB !important;color:var(--fly-text-primary) !important;border:1px solid #D1D5DB !important;}}
665
- /* Label styling */
666
  .content-panel-simple label > span.label-text {{font-weight:500 !important;color:#4B5563 !important;font-size:0.88em !important;margin-bottom:6px !important;display:inline-block;}}
667
- /* Slider label styling */
668
  .content-panel-simple .gr-slider label span {{font-size:0.82em !important;color:var(--fly-text-secondary);}}
669
- /* Temperature description class from AlphaTTS, adapted */
670
  .temp-description-tts {{ font-size: 0.82em !important; color: var(--fly-text-secondary) !important; margin-top: -0.5rem; margin-bottom: 1rem; padding-right: 5px; }}
671
- /* Examples styling from Alpha Translator */
672
  .content-panel-simple div[label*="نمونه"] {{margin-top:1.5rem;}}
673
  .content-panel-simple div[label*="نمونه"] .gr-button.gr-button-tool,.content-panel-simple div[label*="نمونه"] .gr-sample-button {{background-color:#E0E7FF !important;color:var(--fly-primary) !important;border-radius:6px !important;font-size:0.78em !important;padding:4px 8px !important;}}
674
  .content-panel-simple .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
675
  .api-warning-message {{background-color:#FFFBEB !important;color:#92400E !important;padding:10px 12px !important;border-radius:8px !important;border:1px solid #FDE68A !important;text-align:center !important;margin:0 0.2rem 1rem 0.2rem !important;font-size:0.85em !important;}}
676
- /* Audio player styling */
677
  .content-panel-simple #output_audio_tts audio {{ width: 100%; border-radius: var(--radius-md); margin-top:0.5rem; }}
678
  @media (min-width:640px) {{.main-content-area {{padding:1.5rem;max-width:700px;}} .content-panel-simple {{padding:1.5rem;}} .app-title-card h1 {{font-size:2.5em !important;}} .app-title-card p {{font-size:1.05em !important;}} }}
679
  @media (min-width:768px) {{
680
  .main-content-area {{max-width:780px;}} .content-panel-simple {{padding:2rem;}}
681
  .content-panel-simple .main-content-row {{display:flex !important;flex-direction:row !important;gap:1.5rem !important;}}
682
- .content-panel-simple .main-content-row > .gr-column:nth-child(1) {{flex-basis:60%; min-width:0;}} /* Allow shrinking */
683
- .content-panel-simple .main-content-row > .gr-column:nth-child(2) {{flex-basis:40%; min-width:0;}} /* Allow shrinking */
684
  .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{width:auto !important;align-self:flex-start;}}
685
  .app-title-card h1 {{font-size:2.75em !important;}} .app-title-card p {{font-size:1.1em !important;}}
686
  }}
@@ -711,11 +513,10 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
711
  status_message_output = gr.Textbox(label="وضعیت پردازش", interactive=False, lines=1, placeholder="پیام‌های وضعیت اینجا نمایش داده می‌شوند...")
712
 
713
  with gr.Row(elem_classes=["main-content-row"]):
714
- with gr.Column(scale=3): # Left column for main inputs
715
  use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False)
716
- # Label for file input is handled by checkbox visibility change
717
  uploaded_file_input = gr.File(
718
- label="آپلود فایل متنی", # Will be changed by checkbox logic
719
  file_types=['.txt'],
720
  visible=False
721
  )
@@ -728,24 +529,23 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
728
  speech_prompt_tb = gr.Textbox(
729
  label="🗣️ سبک و زمینه گفتار (اختیاری)",
730
  placeholder="مثال: با لحنی شاد و پرانرژی",
731
- value="با لحنی دوستانه و رسا صحبت کن.", # Default from AlphaTTS
732
  lines=2
733
  )
734
- with gr.Column(scale=2): # Right column for settings and output
735
  speaker_voice_dd = gr.Dropdown(
736
  SPEAKER_VOICES,
737
  label="🎤 انتخاب گوینده",
738
- value="Charon" # Default from AlphaTTS
739
  )
740
  temperature_slider = gr.Slider(
741
- minimum=0.1, maximum=1.5, step=0.05, value=0.9, # Default from AlphaTTS
742
  label="🌡️ میزان خلاقیت صدا (دما)"
743
  )
744
  gr.Markdown("<p class='temp-description-tts'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>", elem_classes=["temp-description-tts-container"])
745
 
746
  output_audio = gr.Audio(label="🎧 فایل صوتی خروجی", type="filepath", elem_id="output_audio_tts")
747
 
748
- # Button below the columns
749
  generate_button = gr.Button("🚀 تولید و پخش صدا", variant="primary", elem_classes=["lg"])
750
 
751
  gr.HTML("<hr class='custom-hr'>")
@@ -757,7 +557,7 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
757
  [False, None, "آیا می‌توانم یک پیتزای پپرونی سفارش دهم؟", "پرسشی و مودبانه.", "Achird", 0.75],
758
  ],
759
  inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
760
- outputs=[output_audio, status_message_output], # Outputting to status message as well
761
  fn=gradio_tts_interface,
762
  cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true",
763
  label="💡 نمونه‌های کاربردی"
@@ -765,10 +565,9 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
765
 
766
  gr.Markdown("<p class='app-footer-fly'>Alpha TTS © 2024</p>")
767
 
768
- # --- Event Handlers ---
769
  def toggle_file_input(use_file):
770
  if use_file:
771
- return gr.update(visible=True, label=" "), gr.update(visible=False) # Hide text_to_speak_tb label or placeholder
772
  else:
773
  return gr.update(visible=False), gr.update(visible=True, label="📝 متن فار��ی برای تبدیل به گفتار")
774
 
@@ -782,18 +581,16 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
782
  generate_button.click(
783
  fn=gradio_tts_interface,
784
  inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
785
- outputs=[output_audio, status_message_output] # Ensure this matches function's return tuple
786
  )
787
  else:
788
  logging.error("دکمه تولید صدا (generate_button) به درستی مقداردهی اولیه نشده است.")
789
 
790
 
791
  if __name__ == "__main__":
792
- # --- START: شروع ترد ری‌استارت خودکار (From Alpha Translator) ---
793
- if os.getenv("AUTO_RESTART_ENABLED", "true").lower() == "true": # Optional via env var
794
  restart_scheduler_thread = threading.Thread(target=auto_restart_service, daemon=True)
795
  restart_scheduler_thread.start()
796
- # --- END: شروع ترد ری‌استارت خودکار ---
797
 
798
  demo.launch(
799
  server_name="0.0.0.0",
 
78
  "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
79
  "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
80
  ]
81
+ FIXED_MODEL_NAME = "gemini-1.5-flash-preview-tts"
82
  DEFAULT_MAX_CHUNK_SIZE = 3800
83
  DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
84
  DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
 
120
  def smart_text_split(text, max_size=3800, log_list_ref=None):
121
  if len(text) <= max_size: return [text]
122
  chunks, current_chunk = [], ""
 
123
  sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
124
  for sentence in sentences:
125
  if len(current_chunk) + len(sentence) + 1 > max_size:
126
  if current_chunk: chunks.append(current_chunk.strip())
127
  current_chunk = sentence
128
+ while len(current_chunk) > max_size:
 
129
  split_idx = -1
130
+ for punc in ['،', ',', ';', ':', ' ']:
131
  idx = current_chunk.rfind(punc, max_size // 2, max_size)
132
  if idx > split_idx : split_idx = idx
133
 
134
  if split_idx != -1:
135
  part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
136
+ else:
137
  part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
138
  chunks.append(part.strip())
139
  else:
140
+ current_chunk += (" " if current_chunk and sentence else "") + sentence
141
  if current_chunk: chunks.append(current_chunk.strip())
142
+ final_chunks = [c for c in chunks if c]
143
  if log_list_ref: _log_tts(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list_ref)
144
  return final_chunks
145
 
 
155
  if os.path.exists(fp):
156
  segment = AudioSegment.from_file(fp)
157
  combined += segment
158
+ if i < len(file_paths) - 1:
159
+ combined += AudioSegment.silent(duration=150)
160
  else:
161
  _log_tts(f"⚠️ فایل صوتی برای ادغام یافت نشد: {fp}", log_list_ref)
162
 
163
+ combined.export(output_path, format="wav")
164
  _log_tts(f"✅ فایل صوتی با موفقیت در '{output_path}' ادغام و ذخیره شد.", log_list_ref)
165
  return True
166
  except Exception as e:
 
179
  return None, "خطا: کلید API جیمینای برای سرویس TTS در دسترس نیست."
180
 
181
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  genai.configure(api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
183
  except Exception as e:
184
  _log_tts(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list_ref)
185
  return None, f"خطا در ارتباط با Gemini: {e}"
 
197
  for i, chunk in enumerate(text_chunks):
198
  _log_tts(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list_ref)
199
 
 
200
  final_text_for_tts = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
201
 
 
 
 
 
 
 
 
 
 
202
  try:
203
+ # The `selected_voice` from the dropdown is not currently used in this call.
204
+ # This would require knowing the specific API parameter for voice selection with this model.
205
+ # For now, the model will use its default voice or whatever behavior is programmed.
206
+ # A more advanced implementation would pass `selected_voice` to the API if possible.
207
+
208
+ # This is where the SyntaxError occurred. The `custom_config_for_tts` variable was an
209
+ # incomplete assignment. It's removed/commented out. The actual config is inline below.
210
+ #
211
+ # # custom_config_for_tts = genai_types.GenerationConfig( # This seems to be the new way # THIS LINE CAUSED SyntaxError
212
+ # temperature=temperature_val,
213
+ # # ... (rest of the commented out block) ...
214
+ # # )
215
 
216
+ tts_model = genai.GenerativeModel(FIXED_MODEL_NAME)
 
 
217
 
218
+ # Note: `selected_voice` is not used here yet. This means the dropdown for voice selection
219
+ # will not have an effect until this part is updated to correctly pass the voice
220
+ # to the Gemini API for the `FIXED_MODEL_NAME`.
221
+ # The `final_text_for_tts` includes the `prompt_input` for style.
222
+ response = tts_model.generate_content(
223
+ final_text_for_tts,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  generation_config=genai_types.GenerationConfig(
225
  temperature=temperature_val,
226
+ response_mime_type="audio/wav"
227
  ),
 
 
 
 
228
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  fname_base = f"{output_base_name}_part{i+1:03d}"
231
 
232
+ audio_bytes = None
233
+ mime_type = None
234
+
235
+ if response.parts and hasattr(response.parts[0], 'blob') and response.parts[0].blob.mime_type.startswith("audio/"): # More common for new SDK
236
+ audio_bytes = response.parts[0].blob.data
237
+ mime_type = response.parts[0].blob.mime_type
 
 
238
  elif response.candidates and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data: # AlphaTTS way
239
  inline_data = response.candidates[0].content.parts[0].inline_data
240
  audio_bytes = inline_data.data
241
  mime_type = inline_data.mime_type
242
+ else:
243
  audio_part = None
244
+ if response.parts:
245
+ for part in response.parts:
246
+ if hasattr(part, 'mime_type') and part.mime_type.startswith("audio/"): # Check for mime_type attr
247
+ audio_part = part
248
+ break
249
+ if audio_part and hasattr(audio_part, 'data'):
250
  audio_bytes = audio_part.data
251
  mime_type = audio_part.mime_type
252
+ elif audio_part and hasattr(audio_part, '_blob'):
253
  audio_bytes = audio_part._blob.data
254
  mime_type = audio_part._blob.mime_type
255
+
256
+ if not audio_bytes:
257
  _log_tts(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی معتبر دریافت شد.", log_list_ref)
258
+ _log_tts(f"ساختار پاسخ (Response structure): {response}", log_list_ref)
259
  continue
260
 
261
+ if not mime_type: # Safety net if mime_type wasn't extracted
262
+ _log_tts(f"⚠️ MIME type برای قطعه {i+1} یافت نشد. پیش‌فرض wav.", log_list_ref)
263
+ mime_type = "audio/wav"
264
+
265
+
266
+ ext = mimetypes.guess_extension(mime_type) or ".wav"
267
+ if "audio/L" in mime_type and ext == ".wav":
268
  audio_bytes = convert_to_wav(audio_bytes, mime_type)
269
  if not ext.startswith("."): ext = "." + ext
270
 
 
274
 
275
  except Exception as e:
276
  _log_tts(f"❌ خطا در تولید قطعه صوتی {i+1} با Gemini: {e}\n{traceback.format_exc()}", log_list_ref)
 
277
  if hasattr(e, 'response') and e.response:
278
+ _log_tts(f"جزئیات خطای Gemini API: {e.response}", log_list_ref)
279
+ continue
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  if i < len(text_chunks) - 1 and len(text_chunks) > 1:
282
  _log_tts(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از پردازش قطعه بعدی...", log_list_ref)
 
289
  _log_tts(f"🎉 {len(generated_files)} فایل(های) صوتی با موفقیت تولید شد.", log_list_ref)
290
 
291
  final_audio_file = None
292
+ final_output_path_base = f"{output_base_name}_final"
293
 
294
  if len(generated_files) > 1:
295
  if PYDUB_AVAILABLE:
296
+ merged_fn = f"{final_output_path_base}.wav"
297
  if os.path.exists(merged_fn):
298
  try: os.remove(merged_fn)
299
  except OSError as e: _log_tts(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e}", log_list_ref)
300
 
301
  if merge_audio_files_func(generated_files, merged_fn, log_list_ref):
302
  final_audio_file = merged_fn
 
303
  for fp in generated_files:
304
  if os.path.abspath(fp) != os.path.abspath(merged_fn):
305
  try: os.remove(fp)
306
  except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{fp}': {e_del}", log_list_ref)
307
+ else:
308
  _log_tts("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list_ref)
309
  if generated_files:
310
  try:
 
314
  if os.path.exists(fallback_fn): os.remove(fallback_fn)
315
  os.rename(first_chunk_path, fallback_fn)
316
  final_audio_file = fallback_fn
 
317
  for i_gf in range(1, len(generated_files)):
318
  try: os.remove(generated_files[i_gf])
319
  except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{generated_files[i_gf]}': {e_del}", log_list_ref)
320
  except Exception as e_rename_fallback:
321
  _log_tts(f"خطا در تغییر نام فایل اولین قطعه (fallback): {e_rename_fallback}", log_list_ref)
322
+ final_audio_file = generated_files[0]
323
+ else:
324
  _log_tts("⚠️ Pydub برای ادغام در دسترس نیست. اولین قطعه صوتی ارائه می‌شود.", log_list_ref)
 
325
  if generated_files:
326
  try:
327
  first_chunk_path = generated_files[0]
 
330
  if os.path.exists(single_fallback_fn): os.remove(single_fallback_fn)
331
  os.rename(first_chunk_path, single_fallback_fn)
332
  final_audio_file = single_fallback_fn
 
 
 
333
  for i_gf in range(1, len(generated_files)):
334
  _log_tts(f"قطعه اضافی موجود: {generated_files[i_gf]} (ادغام نشده)", log_list_ref)
335
 
 
351
  final_audio_file = final_single_fn
352
  except Exception as e_rename_single:
353
  _log_tts(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single}", log_list_ref)
354
+ final_audio_file = generated_files[0]
355
 
356
  if final_audio_file and not os.path.exists(final_audio_file):
357
  _log_tts(f"⚠️ فایل صوتی نهایی '{final_audio_file}' پس ��ز پردازش وجود ندارد!", log_list_ref)
358
  return None, "خطا: فایل صوتی نهایی یافت نشد."
359
 
360
+ return final_audio_file, "موفق"
361
 
362
  def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature):
363
+ logs_for_this_run = []
 
 
 
364
  actual_text = ""
365
  status_message = "شروع پردازش..."
366
  final_audio_path = None
 
390
  _log_tts(f"تنظیمات: Speaker={speaker_voice}, Temp={temperature}, Prompt='{speech_prompt[:30]}...'", logs_for_this_run)
391
 
392
  try:
 
393
  final_audio_path, generation_status_msg = core_generate_audio(
394
  actual_text, speech_prompt, speaker_voice, temperature, logs_for_this_run
395
  )
 
398
  status_message = "✅ تبدیل متن به گفتار با موفقیت انجام شد."
399
  _log_tts(status_message, logs_for_this_run)
400
  return final_audio_path, status_message
401
+ elif final_audio_path and generation_status_msg != "موفق":
402
  status_message = f"⚠️ {generation_status_msg}. فایل صوتی ممکن است ناقص باشد: {final_audio_path}"
403
  _log_tts(status_message, logs_for_this_run)
404
  return final_audio_path, status_message
405
+ else:
406
  status_message = f"❌ {generation_status_msg}"
407
  _log_tts(status_message, logs_for_this_run)
408
  return None, status_message
 
417
  # --- START: بخش UI و Gradio (Adapted from Alpha Translator, content from Alpha TTS) ---
418
  FLY_PRIMARY_COLOR_HEX = "#4F46E5"
419
  FLY_SECONDARY_COLOR_HEX = "#10B981"
420
+ FLY_ACCENT_COLOR_HEX = "#D97706"
421
  FLY_TEXT_COLOR_HEX = "#1F2937"
422
  FLY_SUBTLE_TEXT_HEX = "#6B7280"
423
+ FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB"
424
  FLY_WHITE_HEX = "#FFFFFF"
425
  FLY_BORDER_COLOR_HEX = "#D1D5DB"
426
  FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
427
+ FLY_PANEL_BG_SIMPLE = "#E0F2FE"
428
 
429
  app_theme_outer = gr.themes.Base(
430
+ font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
431
  ).set(
432
+ body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
433
  )
434
 
 
435
  custom_css = f"""
436
  @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
437
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
 
443
  --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
444
  --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
445
  --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
446
+ --font-english: 'Poppins', 'Inter', system-ui, sans-serif;
447
  --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
448
  --shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
449
  --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
 
460
  footer,.gradio-footer,.flagging-container,.flex.row.gap-2.absolute.bottom-2.right-2.gr-compact.gr-box.gr-text-gray-500,div[data-testid="flag"],button[title="Flag"],button[aria-label="Flag"],.footer-utils {{display:none !important;visibility:hidden !important;}}
461
  .main-content-area {{flex-grow:1;padding:0.75rem;width:100%;margin:0 auto;box-sizing:border-box;}}
462
  .content-panel-simple {{background-color:var(--fly-bg-white);padding:1rem;border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);margin-top:-2rem;position:relative;z-index:10;margin-bottom:2rem;width:100%;box-sizing:border-box;}}
 
463
  .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{background:var(--fly-accent) !important;margin-top:1rem !important;padding:12px 20px !important;transition:all 0.25s ease-in-out !important;color:white !important;font-weight:600 !important;border-radius:10px !important;border:none !important;box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;width:100% !important;font-size:1em !important;display:flex;align-items:center;justify-content:center;}}
464
  .content-panel-simple .gr-button.lg.primary:hover,.content-panel-simple button[variant="primary"]:hover {{background:#B45309 !important;transform:translateY(-1px) !important;box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;}}
 
465
  .content-panel-simple .gr-input > label + div > textarea,.content-panel-simple .gr-dropdown > label + div > div > input,.content-panel-simple .gr-dropdown > label + div > div > select,.content-panel-simple .gr-textbox > label + div > textarea, .content-panel-simple .gr-file > label + div {{border-radius:8px !important;border:1.5px solid var(--fly-border-color) !important;font-size:0.95em !important;background-color:var(--fly-input-bg-simple) !important;padding:10px 12px !important;color:var(--fly-text-primary) !important;}}
466
  .content-panel-simple .gr-input > label + div > textarea:focus,.content-panel-simple .gr-dropdown > label + div > div > input:focus,.content-panel-simple .gr-dropdown > label + div > div > select:focus,.content-panel-simple .gr-textbox > label + div > textarea:focus, .content-panel-simple .gr-file > label + div:focus-within {{border-color:var(--fly-primary) !important;box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;background-color:var(--fly-bg-white) !important;}}
467
+ .content-panel-simple .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
468
  .content-panel-simple .gr-dropdown select {{font-family:var(--font-global) !important;width:100%;cursor:pointer;}}
 
469
  .content-panel-simple .gr-textbox[label*="وضعیت"] > label + div > textarea {{background-color:var(--fly-panel-bg-simple) !important;border-color:#A5D5FE !important;min-height:80px;font-family:var(--font-global);font-size:0.9em !important;line-height:1.5;padding:10px !important;}}
 
470
  .content-panel-simple .gr-panel,.content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > .gr-panel {{border-radius:8px !important;border:1px solid var(--fly-border-color) !important;background-color:var(--fly-input-bg-simple) !important;padding:0.8rem 1rem !important;margin-top:0.6rem;box-shadow:none;}}
471
  .content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > button.gr-button {{font-weight:500 !important;padding:8px 10px !important;border-radius:6px !important;background-color:#E5E7EB !important;color:var(--fly-text-primary) !important;border:1px solid #D1D5DB !important;}}
 
472
  .content-panel-simple label > span.label-text {{font-weight:500 !important;color:#4B5563 !important;font-size:0.88em !important;margin-bottom:6px !important;display:inline-block;}}
 
473
  .content-panel-simple .gr-slider label span {{font-size:0.82em !important;color:var(--fly-text-secondary);}}
 
474
  .temp-description-tts {{ font-size: 0.82em !important; color: var(--fly-text-secondary) !important; margin-top: -0.5rem; margin-bottom: 1rem; padding-right: 5px; }}
 
475
  .content-panel-simple div[label*="نمونه"] {{margin-top:1.5rem;}}
476
  .content-panel-simple div[label*="نمونه"] .gr-button.gr-button-tool,.content-panel-simple div[label*="نمونه"] .gr-sample-button {{background-color:#E0E7FF !important;color:var(--fly-primary) !important;border-radius:6px !important;font-size:0.78em !important;padding:4px 8px !important;}}
477
  .content-panel-simple .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
478
  .api-warning-message {{background-color:#FFFBEB !important;color:#92400E !important;padding:10px 12px !important;border-radius:8px !important;border:1px solid #FDE68A !important;text-align:center !important;margin:0 0.2rem 1rem 0.2rem !important;font-size:0.85em !important;}}
 
479
  .content-panel-simple #output_audio_tts audio {{ width: 100%; border-radius: var(--radius-md); margin-top:0.5rem; }}
480
  @media (min-width:640px) {{.main-content-area {{padding:1.5rem;max-width:700px;}} .content-panel-simple {{padding:1.5rem;}} .app-title-card h1 {{font-size:2.5em !important;}} .app-title-card p {{font-size:1.05em !important;}} }}
481
  @media (min-width:768px) {{
482
  .main-content-area {{max-width:780px;}} .content-panel-simple {{padding:2rem;}}
483
  .content-panel-simple .main-content-row {{display:flex !important;flex-direction:row !important;gap:1.5rem !important;}}
484
+ .content-panel-simple .main-content-row > .gr-column:nth-child(1) {{flex-basis:60%; min-width:0;}}
485
+ .content-panel-simple .main-content-row > .gr-column:nth-child(2) {{flex-basis:40%; min-width:0;}}
486
  .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{width:auto !important;align-self:flex-start;}}
487
  .app-title-card h1 {{font-size:2.75em !important;}} .app-title-card p {{font-size:1.1em !important;}}
488
  }}
 
513
  status_message_output = gr.Textbox(label="وضعیت پردازش", interactive=False, lines=1, placeholder="پیام‌های وضعیت اینجا نمایش داده می‌شوند...")
514
 
515
  with gr.Row(elem_classes=["main-content-row"]):
516
+ with gr.Column(scale=3):
517
  use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False)
 
518
  uploaded_file_input = gr.File(
519
+ label="آپلود فایل متنی",
520
  file_types=['.txt'],
521
  visible=False
522
  )
 
529
  speech_prompt_tb = gr.Textbox(
530
  label="🗣️ سبک و زمینه گفتار (اختیاری)",
531
  placeholder="مثال: با لحنی شاد و پرانرژی",
532
+ value="با لحنی دوستانه و رسا صحبت کن.",
533
  lines=2
534
  )
535
+ with gr.Column(scale=2):
536
  speaker_voice_dd = gr.Dropdown(
537
  SPEAKER_VOICES,
538
  label="🎤 انتخاب گوینده",
539
+ value="Charon"
540
  )
541
  temperature_slider = gr.Slider(
542
+ minimum=0.1, maximum=1.5, step=0.05, value=0.9,
543
  label="🌡️ میزان خلاقیت صدا (دما)"
544
  )
545
  gr.Markdown("<p class='temp-description-tts'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>", elem_classes=["temp-description-tts-container"])
546
 
547
  output_audio = gr.Audio(label="🎧 فایل صوتی خروجی", type="filepath", elem_id="output_audio_tts")
548
 
 
549
  generate_button = gr.Button("🚀 تولید و پخش صدا", variant="primary", elem_classes=["lg"])
550
 
551
  gr.HTML("<hr class='custom-hr'>")
 
557
  [False, None, "آیا می‌توانم یک پیتزای پپرونی سفارش دهم؟", "پرسشی و مودبانه.", "Achird", 0.75],
558
  ],
559
  inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
560
+ outputs=[output_audio, status_message_output],
561
  fn=gradio_tts_interface,
562
  cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true",
563
  label="💡 نمونه‌های کاربردی"
 
565
 
566
  gr.Markdown("<p class='app-footer-fly'>Alpha TTS © 2024</p>")
567
 
 
568
  def toggle_file_input(use_file):
569
  if use_file:
570
+ return gr.update(visible=True, label=" "), gr.update(visible=False)
571
  else:
572
  return gr.update(visible=False), gr.update(visible=True, label="📝 متن فار��ی برای تبدیل به گفتار")
573
 
 
581
  generate_button.click(
582
  fn=gradio_tts_interface,
583
  inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
584
+ outputs=[output_audio, status_message_output]
585
  )
586
  else:
587
  logging.error("دکمه تولید صدا (generate_button) به درستی مقداردهی اولیه نشده است.")
588
 
589
 
590
  if __name__ == "__main__":
591
+ if os.getenv("AUTO_RESTART_ENABLED", "true").lower() == "true":
 
592
  restart_scheduler_thread = threading.Thread(target=auto_restart_service, daemon=True)
593
  restart_scheduler_thread.start()
 
594
 
595
  demo.launch(
596
  server_name="0.0.0.0",