Spaces:

Hamed744
/

Ttspro

Running

App Files Files Community

Hamed744 commited on Jun 5

Commit

8a7ac92

verified ·

1 Parent(s): 290123f

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -287

app.py CHANGED Viewed

@@ -78,7 +78,7 @@ SPEAKER_VOICES = [
     "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
     "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
 ]
-FIXED_MODEL_NAME = "gemini-1.5-flash-preview-tts" # As per Alpha TTS; ensure this model is correct and available
 DEFAULT_MAX_CHUNK_SIZE = 3800
 DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
 DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
@@ -120,28 +120,26 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int]:
 def smart_text_split(text, max_size=3800, log_list_ref=None):
     if len(text) <= max_size: return [text]
     chunks, current_chunk = [], ""
-    # Improved sentence splitting for Persian and English
     sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
     for sentence in sentences:
         if len(current_chunk) + len(sentence) + 1 > max_size:
             if current_chunk: chunks.append(current_chunk.strip())
             current_chunk = sentence
-            while len(current_chunk) > max_size: # Handle very long sentences
-                # Try to split at common punctuation or spaces, working backwards
                 split_idx = -1
-                for punc in ['،', ',', ';', ':', ' ']: # Persian comma added
                     idx = current_chunk.rfind(punc, max_size // 2, max_size)
                     if idx > split_idx : split_idx = idx
                 if split_idx != -1:
                     part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
-                else: # Force split
                     part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
                 chunks.append(part.strip())
         else:
-            current_chunk += (" " if current_chunk and sentence else "") + sentence # Avoid leading space if current_chunk is empty
     if current_chunk: chunks.append(current_chunk.strip())
-    final_chunks = [c for c in chunks if c] # Remove empty chunks
     if log_list_ref: _log_tts(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list_ref)
     return final_chunks
@@ -157,12 +155,12 @@ def merge_audio_files_func(file_paths, output_path, log_list_ref):
             if os.path.exists(fp):
                 segment = AudioSegment.from_file(fp)
                 combined += segment
-                if i < len(file_paths) - 1: # Add short silence between segments
-                    combined += AudioSegment.silent(duration=150) # 150ms silence
             else:
                 _log_tts(f"⚠️ فایل صوتی برای ادغام یافت نشد: {fp}", log_list_ref)
-        combined.export(output_path, format="wav") # Gemini TTS often returns WAV or can be converted.
         _log_tts(f"✅ فایل صوتی با موفقیت در '{output_path}' ادغام و ذخیره شد.", log_list_ref)
         return True
     except Exception as e:
@@ -181,50 +179,7 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
         return None, "خطا: کلید API جیمینای برای سرویس TTS در دسترس نیست."
     try:
-        # Configure genai with the specific API key for this operation
-        # Note: genai.configure is global. If running concurrent operations with different keys,
-        # genai.Client(api_key=api_key) is safer. Let's assume genai.Client for TTS.
-        client = genai.GenerativeModel(model_name=FIXED_MODEL_NAME, api_key=api_key) # Old way
-        # For specific model like TTS, often it's through client.models or genai.get_model
-        # The second script used: client = genai.Client(api_key=api_key)
-        # And then client.models.generate_content(model=FIXED_MODEL_NAME, ...)
-        # Let's stick to genai.Client for TTS as it's more direct for such models.
-        # Re-instantiate client with the specific key (safer than global configure if other parts of app use genai)
-        # However, the get_gemini_api_key_sync rotates a global index, so global configure is implied.
-        # For simplicity with provided key rotation:
-        current_genai_client = genai.get_model(f"models/{FIXED_MODEL_NAME}") # Simpler if model name is just 'tts-model'
-                                                                            # But FIXED_MODEL_NAME is "gemini-1.5-flash-preview-tts"
-                                                                            # This might require `genai.configure(api_key=api_key)` first.
-                                                                            # Let's use the direct method from AlphaTTS script for robustneess.
-        genai.configure(api_key=api_key) # Configure with the rotated key
-        # model_instance = genai.GenerativeModel(FIXED_MODEL_NAME) # This is usually for text/chat models
-        # For TTS, the AlphaTTS code used `client.models.generate_content`
-        # which implies `genai.Client(api_key=...)` then `client.models.generate_content(...)`
-        # Or if `genai.configure` is used, then `genai.generate_text(...)` or similar global funcs.
-        # Let's assume `genai.configure` is enough and then use a top-level function if available,
-        # or stick to client.
-        # Given the AlphaTTS structure:
-        # client = genai.Client(api_key=api_key) # This is the most direct way if Client takes api_key
-        # Let's assume genai.configure is what's intended with the key rotation logic.
-        # genai.configure(api_key=api_key) # Already done by key rotation
-        # This is a bit messy. Let's refine `get_gemini_api_key_sync` to also call `genai.configure`.
-        # No, `get_gemini_api_key_sync` should just return the key. The caller configures.
-        # Safest approach: configure genai globally for this operation
         genai.configure(api_key=api_key)
-        # Then use a model instance. The second script used client.models.generate_content
-        # which is not directly available on GenerativeModel typically.
-        # Let's try to use the structure from Alpha TTS as much as possible.
-        # It used `client.models.generate_content`.
-        # This means we might need to instantiate `genai.Client` instead of `genai.GenerativeModel`.
-        # However, `genai.Client` is usually for the full API surface.
-        # Let's try with `genai.GenerativeModel` and see if it supports speech config.
-        # If not, we'll need to use `genai.generate_content` with the full model path.
-        model_to_use_direct = f"models/{FIXED_MODEL_NAME}" # e.g., "models/gemini-1.5-flash-preview-tts"
     except Exception as e:
         _log_tts(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list_ref)
         return None, f"خطا در ارتباط با Gemini: {e}"
@@ -242,195 +197,74 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
     for i, chunk in enumerate(text_chunks):
         _log_tts(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list_ref)
-        # Constructing the request content based on AlphaTTS structure
         final_text_for_tts = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
-        # This part needs to align with how Gemini TTS API expects requests via the Python SDK
-        # AlphaTTS used:
-        # contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text)])]
-        # config = genai_types.GenerateContentConfig(temperature=temperature_val, response_modalities=["audio"],
-        #     speech_config=genai_types.SpeechConfig(voice_config=genai_types.VoiceConfig(
-        #         prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice))))
-        # response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
-        # Using global `generate_content` after `genai.configure(api_key=...)`
         try:
-            request_contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text_for_tts)])]
-            generation_config = genai_types.GenerationConfig(temperature=temperature_val) # Only temperature here
-            # Speech config is part of the request, not generation_config for some APIs
-            # For the new unified models, it might be different.
-            # Let's assume the model name implies TTS and speech config is passed differently or is part of GenerateContentRequest
-            # This is the tricky part: How to pass speech config with global generate_content
-            # The `client.models.generate_content` from AlphaTTS is more specific.
-            # `genai.GenerativeModel(model_name).generate_content` is the current standard.
-            tts_model_instance = genai.GenerativeModel(
-                model_name=FIXED_MODEL_NAME, # Use the direct model name like gemini-1.5-flash-preview-tts
-                generation_config=generation_config,
-                 # system_instruction=prompt_input, # If supported for TTS prompt
-            )
-            # The `speech_config` would be part of the `generate_content` call if supported by this model type
-            # This needs verification against current Gemini SDK for TTS.
-            # The `response_modalities=["audio"]` and `speech_config` were used with `client.models.generate_content`.
-            # If `GenerativeModel` is used, the request structure might be different.
-            # The `generate_content` method of `GenerativeModel` takes `request_options` for things like `response_mime_type`.
-            # Let's revert to the structure most likely to work based on AlphaTTS's use of `client.models.generate_content`
-            # This means we may need to use `genai.Client(api_key=api_key).models.get(FIXED_MODEL_NAME).generate_content(...)`
-            # OR `genai.generate_content(model=f"models/{FIXED_MODEL_NAME}", contents=..., generation_config=..., speech_config=...)` if that signature exists.
-            # Simplest path if `genai.configure` is used and there's a global way:
-            # This is a common pattern for `GenerateContentRequest`
-            gc_request = genai_types.GenerateContentRequest(
-                model=f"models/{FIXED_MODEL_NAME}", # Ensure "models/" prefix if needed
-                contents=request_contents,
                 generation_config=genai_types.GenerationConfig(
                     temperature=temperature_val,
-                    response_mime_type="audio/wav" # Request WAV directly
                 ),
-                # How to pass voice and prompt? This is SDK specific.
-                # Re-checking AlphaTTS: `speech_config` was part of `GenerateContentConfig` passed to `client.models.generate_content`
-                # This is non-standard for `genai.GenerationConfig`.
-                # It seems `genai.Client().model().generate_content()` has a different `config` param.
             )
-            # Let's use the exact structure from AlphaTTS for `config` as it was working there.
-            # This implies that `genai.generate_content` (global) or `GenerativeModel.generate_content`
-            # must accept a similar config object if `client.models.generate_content` is not used.
-            custom_config_for_tts = genai_types.GenerationConfig( # This seems to be the new way
-                temperature=temperature_val,
-                # response_modalities=["audio"], # This might be implicit or handled by response_mime_type
-                # The following was from AlphaTTS, might need to be adapted or is for older/different client path
-                # speech_config=genai_types.SpeechConfig(
-                #     voice_config=genai_types.VoiceConfig(
-                #         prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)
-                #     )
-                # )
-                # For new Gemini 1.5 Flash/Pro with native audio output, it's often simpler:
-                # You might pass voice parameters differently, or the model implicitly handles it.
-                # The `FIXED_MODEL_NAME` "gemini-1.5-flash-preview-tts" suggests it IS a TTS model.
-                # Let's assume for now the model name and text input are enough, and voice/prompt are part of the text or model behavior.
-                # If `selected_voice` and `prompt_input` are crucial, they need to be part of the request.
-                # `prompt_input` can be part of `final_text_for_tts`.
-                # `selected_voice` needs a parameter in the API call.
-                # The Gemini API for TTS usually involves specifying the voice in the request.
-                # e.g. in `synthesis_input` or `voice_selection_params`
-                # The most up-to-date way for Gemini 1.5 Flash TTS might involve `tools` or specific TTS request structures.
-                # Given the AlphaTTS code, it used `speech_config`. Let's try to replicate.
-                # `genai.GenerativeModel.generate_content` has `generation_config` and `safety_settings`.
-                # The `speech_config` is not standard there.
-                # If `FIXED_MODEL_NAME` is a true generative model that can output audio,
-                # the prompt needs to guide it.
-                # "Generate an audio of the following text with voice {selected_voice}: {chunk}"
-                # This is less likely for specialized TTS models.
-                # Fallback to a more direct call if available for TTS, or ensure `GenerativeModel` is configured correctly.
-                # The most robust way is to use the specific SDK features for TTS.
-                # If `genai.get_model("models/text-to-speech")` exists:
-                #   tts_service_model = genai.get_model("models/text-to-speech")
-                #   response = tts_service_model.synthesize_speech(text=final_text_for_tts, voice=selected_voice, ...)
-                # This is typical for dedicated TTS.
-                # Since AlphaTTS used `gemini-X.Y-flash-preview-tts`, it's likely a multimodal model.
-                # Replicating the structure that AlphaTTS `client.models.generate_content` used:
-                # This is the most likely path to success given it worked in AlphaTTS.
-                # We need to call a similar function. `genai.GenerativeModel(FIXED_MODEL_NAME)` is the modern way.
-                tts_model = genai.GenerativeModel(FIXED_MODEL_NAME) # api_key is globally configured
-                # Constructing the specific configuration for TTS with GenerativeModel.
-                # This is where `selected_voice` and `prompt_input` (as system instruction or context) matter.
-                # The Gemini API documentation for multimodal models with audio output is key here.
-                # Often, it's done by specifying `response_mime_type='audio/wav'` in `generation_config`.
-                # The voice selection might be a parameter in `GenerationConfig` or part of the prompt for some models.
-                # Let's assume `selected_voice` can be part of the text prompt for now if not a direct API param.
-                # And `prompt_input` is part of the context.
-                # Simpler request, assuming model handles voice from name or general quality from prompt:
-                # The `prompt_input` from AlphaTTS was more like a system instruction for tone.
-                # The `selected_voice` was a specific voice model name.
-                # The crucial part from AlphaTTS was:
-                # speech_config = genai_types.SpeechConfig(
-                # voice_config=genai_types.VoiceConfig(
-                #    prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)))
-                # And this `speech_config` was passed into a `GenerateContentConfig` like object.
-                # Let's try to pass this to `tts_model.generate_content`.
-                # The new SDK might use `ToolConfig` for speech synthesis or `Content` with specific parts.
-                # For direct audio generation, it's often simpler:
-                response = tts_model.generate_content(
-                    final_text_for_tts, # The text to synthesize
-                    generation_config=genai_types.GenerationConfig(
-                        temperature=temperature_val,
-                        # Candidate count, stop sequences, etc.
-                        # For audio output, you specify the desired mime type:
-                        response_mime_type="audio/wav" # Or "audio/mp3" if supported
-                    ),
-                    # How to specify voice like 'Achird', 'Zephyr'?
-                    # This is the missing link if `selected_voice` is not part of the model name itself.
-                    # If `FIXED_MODEL_NAME` implies a voice, or if it's a general TTS synthesizer,
-                    # voice selection must be in the request.
-                    # The `speech_config` from AlphaTTS is the best hint.
-                    # It might be that `genai.GenerativeModel` does not support this `speech_config`.
-                    # And `genai.Client(api_key=...).models.get(model_name).generate_content(...)` was the correct path.
-                    # Let's assume `request_options` might hold it, or a `Tool` for TTS.
-                    # Given `SPEAKER_VOICES`, it implies a selection mechanism.
-                    # If `genai_types.SpeechConfig` exists, it's meant to be used.
-                    # Perhaps `GenerativeModel.generate_content(contents=..., generation_config=..., tools=...)`
-                    # where one tool is configured for speech synthesis with the voice.
-                    # Sticking to the most direct interpretation of AlphaTTS's success:
-                    # It used a `config` object containing `speech_config`.
-                    # `genai.GenerativeModel.generate_content`'s `generation_config` is standard.
-                    # The `client.models.generate_content` in AlphaTTS might be an older or different SDK path.
-                    # Let's try with the most direct `GenerativeModel` call, assuming voice is part of the prompt or fixed for the model.
-                    # If `selected_voice` is critical, this will fail to use it.
-                    # The `prompt_input` (e.g., "با لحنی شاد") can be prepended to `final_text_for_tts`.
-                    # text_with_prompt_and_voice_hint = f"{prompt_input}. Voice: {selected_voice}. Text: {chunk}"
-                    # This is a guess if dedicated voice parameters are not obvious.
-                )
             fname_base = f"{output_base_name}_part{i+1:03d}"
-            # Process response (this part is from AlphaTTS and should be largely correct if response structure is similar)
-            # Assuming response.parts[0].audio_data or similar for new SDK
-            # The AlphaTTS code expected `response.candidates[0].content.parts[0].inline_data`
-            # For `GenerativeModel.generate_content`, it's usually `response.text` or `response.parts`.
-            if response.parts and hasattr(response.parts[0], 'audio_data'): # Hypothetical new SDK attribute
-                audio_bytes = response.parts[0].audio_data # This is a guess
-                mime_type = "audio/wav" # Assuming we requested WAV
             elif response.candidates and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data: # AlphaTTS way
                  inline_data = response.candidates[0].content.parts[0].inline_data
                  audio_bytes = inline_data.data
                  mime_type = inline_data.mime_type
-            else: # Try to find audio in a standard way for new SDK
                 audio_part = None
-                for part in response.parts:
-                    if part.mime_type.startswith("audio/"):
-                        audio_part = part
-                        break
-                if audio_part and hasattr(audio_part, 'data'): # Common for blob data
                     audio_bytes = audio_part.data
                     mime_type = audio_part.mime_type
-                elif audio_part and hasattr(audio_part, '_blob'): # Another common pattern
                      audio_bytes = audio_part._blob.data
                      mime_type = audio_part._blob.mime_type
-                else:
                     _log_tts(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی معتبر دریافت شد.", log_list_ref)
-                    _log_tts(f"Response structure: {response}", log_list_ref) # Log structure for debugging
                     continue
-            ext = mimetypes.guess_extension(mime_type) or ".wav" # Default to .wav
-            if "audio/L" in mime_type and ext == ".wav": # PCM data that needs WAV header
                 audio_bytes = convert_to_wav(audio_bytes, mime_type)
             if not ext.startswith("."): ext = "." + ext
@@ -440,21 +274,9 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
         except Exception as e:
             _log_tts(f"❌ خطا در تولید قطعه صوتی {i+1} با Gemini: {e}\n{traceback.format_exc()}", log_list_ref)
-            # Try to get more detailed error from Gemini if available
             if hasattr(e, 'response') and e.response:
-                _log_tts(f"Gemini API error details: {e.response}", log_list_ref)
-            # Fallback message for UI
-            error_message_for_ui = f"خطا در تولید صدا ({type(e).__name__})."
-            if NUM_GEMINI_KEYS > 1:
-                error_message_for_ui += " کلید بعدی امتحان خواهد شد اگر قطعات دیگری وجود داشته باشد." # Not quite, key rotates per call not per chunk failure.
-            # For now, a single chunk failure might stop the whole process if we don't continue.
-            # The loop `continue`s, so other chunks will be tried.
-            # The error message for the UI should be a summary at the end.
-            # This needs to be handled in `gradio_tts_interface`.
-            # Let's have `core_generate_audio` return (None, error_message) on first critical failure.
-            # No, it should try all chunks and return what it could.
-            # The `gradio_tts_interface` will decide the final message.
-            continue # Try next chunk
         if i < len(text_chunks) - 1 and len(text_chunks) > 1:
             _log_tts(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از پردازش قطعه بعدی...", log_list_ref)
@@ -467,23 +289,22 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
     _log_tts(f"🎉 {len(generated_files)} فایل(های) صوتی با موفقیت تولید شد.", log_list_ref)
     final_audio_file = None
-    final_output_path_base = f"{output_base_name}_final" # Consistent base name
     if len(generated_files) > 1:
         if PYDUB_AVAILABLE:
-            merged_fn = f"{final_output_path_base}.wav" # Merge to WAV
             if os.path.exists(merged_fn):
                 try: os.remove(merged_fn)
                 except OSError as e: _log_tts(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e}", log_list_ref)
             if merge_audio_files_func(generated_files, merged_fn, log_list_ref):
                 final_audio_file = merged_fn
-                # Clean up individual parts
                 for fp in generated_files:
                     if os.path.abspath(fp) != os.path.abspath(merged_fn):
                         try: os.remove(fp)
                         except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{fp}': {e_del}", log_list_ref)
-            else: # Merge failed, provide first chunk if pydub available but failed
                 _log_tts("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list_ref)
                 if generated_files:
                     try:
@@ -493,16 +314,14 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
                         if os.path.exists(fallback_fn): os.remove(fallback_fn)
                         os.rename(first_chunk_path, fallback_fn)
                         final_audio_file = fallback_fn
-                        # Clean up other parts
                         for i_gf in range(1, len(generated_files)):
                              try: os.remove(generated_files[i_gf])
                              except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{generated_files[i_gf]}': {e_del}", log_list_ref)
                     except Exception as e_rename_fallback:
                         _log_tts(f"خطا در تغییر نام فایل اولین قطعه (fallback): {e_rename_fallback}", log_list_ref)
-                        final_audio_file = generated_files[0] # Original path
-        else: # Pydub not available, offer to download parts or provide first
             _log_tts("⚠️ Pydub برای ادغام در دسترس نیست. اولین قطعه صوتی ارائه می‌شود.", log_list_ref)
-            # Similar renaming logic for the first part as above for consistency
             if generated_files:
                 try:
                     first_chunk_path = generated_files[0]
@@ -511,9 +330,6 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
                     if os.path.exists(single_fallback_fn): os.remove(single_fallback_fn)
                     os.rename(first_chunk_path, single_fallback_fn)
                     final_audio_file = single_fallback_fn
-                    # Clean up other parts (optional, user might want them if no merge)
-                    # For simplicity, let's not clean them up if pydub is missing, they might be useful.
-                    # Or, offer a ZIP. For now, just the first.
                     for i_gf in range(1, len(generated_files)):
                         _log_tts(f"قطعه اضافی موجود: {generated_files[i_gf]} (ادغام نشده)", log_list_ref)
@@ -535,19 +351,16 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
             final_audio_file = final_single_fn
         except Exception as e_rename_single:
             _log_tts(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single}", log_list_ref)
-            final_audio_file = generated_files[0] # Fallback to original temp name
     if final_audio_file and not os.path.exists(final_audio_file):
         _log_tts(f"⚠️ فایل صوتی نهایی '{final_audio_file}' پس ��ز پردازش وجود ندارد!", log_list_ref)
         return None, "خطا: فایل صوتی نهایی یافت نشد."
-    return final_audio_file, "موفق" # Return success message string
 def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature):
-    # Progress object can be added here if core_generate_audio is instrumented
-    # progress=gr.Progress(track_tqdm=True)
-    logs_for_this_run = [] # For this specific run, to potentially show user or debug
     actual_text = ""
     status_message = "شروع پردازش..."
     final_audio_path = None
@@ -577,7 +390,6 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
     _log_tts(f"تنظیمات: Speaker={speaker_voice}, Temp={temperature}, Prompt='{speech_prompt[:30]}...'", logs_for_this_run)
     try:
-        # Call the core audio generation function
         final_audio_path, generation_status_msg = core_generate_audio(
             actual_text, speech_prompt, speaker_voice, temperature, logs_for_this_run
         )
@@ -586,11 +398,11 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
             status_message = "✅ تبدیل متن به گفتار با موفقیت انجام شد."
             _log_tts(status_message, logs_for_this_run)
             return final_audio_path, status_message
-        elif final_audio_path and generation_status_msg != "موفق": # Partial success or warning
             status_message = f"⚠️ {generation_status_msg}. فایل صوتی ممکن است ناقص باشد: {final_audio_path}"
             _log_tts(status_message, logs_for_this_run)
             return final_audio_path, status_message
-        else: # No file path, error occurred
             status_message = f"❌ {generation_status_msg}"
             _log_tts(status_message, logs_for_this_run)
             return None, status_message
@@ -605,22 +417,21 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
 # --- START: بخش UI و Gradio (Adapted from Alpha Translator, content from Alpha TTS) ---
 FLY_PRIMARY_COLOR_HEX = "#4F46E5"
 FLY_SECONDARY_COLOR_HEX = "#10B981"
-FLY_ACCENT_COLOR_HEX = "#D97706" # Orange, used for buttons in Alpha Translator
 FLY_TEXT_COLOR_HEX = "#1F2937"
 FLY_SUBTLE_TEXT_HEX = "#6B7280"
-FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB"
 FLY_WHITE_HEX = "#FFFFFF"
 FLY_BORDER_COLOR_HEX = "#D1D5DB"
 FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
-FLY_PANEL_BG_SIMPLE = "#E0F2FE" # Light blue for specific panels like translated text
 app_theme_outer = gr.themes.Base(
-    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], # Base font
 ).set(
-    body_background_fill=FLY_LIGHT_BACKGROUND_HEX, # Overall page background
 )
-# Using CSS from Alpha Translator (first code)
 custom_css = f"""
 @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
 @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
@@ -632,7 +443,7 @@ custom_css = f"""
     --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
     --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
     --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
-    --font-english: 'Poppins', 'Inter', system-ui, sans-serif; /* For English text outputs if any */
     --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
     --shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
     --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
@@ -649,38 +460,29 @@ body {{font-family:var(--font-global);direction:rtl;background-color:var(--fly-b
 footer,.gradio-footer,.flagging-container,.flex.row.gap-2.absolute.bottom-2.right-2.gr-compact.gr-box.gr-text-gray-500,div[data-testid="flag"],button[title="Flag"],button[aria-label="Flag"],.footer-utils {{display:none !important;visibility:hidden !important;}}
 .main-content-area {{flex-grow:1;padding:0.75rem;width:100%;margin:0 auto;box-sizing:border-box;}}
 .content-panel-simple {{background-color:var(--fly-bg-white);padding:1rem;border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);margin-top:-2rem;position:relative;z-index:10;margin-bottom:2rem;width:100%;box-sizing:border-box;}}
-/* Main button styling from Alpha Translator */
 .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{background:var(--fly-accent) !important;margin-top:1rem !important;padding:12px 20px !important;transition:all 0.25s ease-in-out !important;color:white !important;font-weight:600 !important;border-radius:10px !important;border:none !important;box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;width:100% !important;font-size:1em !important;display:flex;align-items:center;justify-content:center;}}
 .content-panel-simple .gr-button.lg.primary:hover,.content-panel-simple button[variant="primary"]:hover {{background:#B45309 !important;transform:translateY(-1px) !important;box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;}}
-/* Input styling from Alpha Translator */
 .content-panel-simple .gr-input > label + div > textarea,.content-panel-simple .gr-dropdown > label + div > div > input,.content-panel-simple .gr-dropdown > label + div > div > select,.content-panel-simple .gr-textbox > label + div > textarea, .content-panel-simple .gr-file > label + div {{border-radius:8px !important;border:1.5px solid var(--fly-border-color) !important;font-size:0.95em !important;background-color:var(--fly-input-bg-simple) !important;padding:10px 12px !important;color:var(--fly-text-primary) !important;}}
 .content-panel-simple .gr-input > label + div > textarea:focus,.content-panel-simple .gr-dropdown > label + div > div > input:focus,.content-panel-simple .gr-dropdown > label + div > div > select:focus,.content-panel-simple .gr-textbox > label + div > textarea:focus, .content-panel-simple .gr-file > label + div:focus-within {{border-color:var(--fly-primary) !important;box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;background-color:var(--fly-bg-white) !important;}}
-.content-panel-simple .gr-file > label + div {{ text-align:center; border-style: dashed !important; }} /* File input specific style */
 .content-panel-simple .gr-dropdown select {{font-family:var(--font-global) !important;width:100%;cursor:pointer;}}
-/* Output text area styling (if we add one for status messages) */
 .content-panel-simple .gr-textbox[label*="وضعیت"] > label + div > textarea {{background-color:var(--fly-panel-bg-simple) !important;border-color:#A5D5FE !important;min-height:80px;font-family:var(--font-global);font-size:0.9em !important;line-height:1.5;padding:10px !important;}}
-/* Panel and Accordion styling (not using accordion here, but good to have) */
 .content-panel-simple .gr-panel,.content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > .gr-panel {{border-radius:8px !important;border:1px solid var(--fly-border-color) !important;background-color:var(--fly-input-bg-simple) !important;padding:0.8rem 1rem !important;margin-top:0.6rem;box-shadow:none;}}
 .content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > button.gr-button {{font-weight:500 !important;padding:8px 10px !important;border-radius:6px !important;background-color:#E5E7EB !important;color:var(--fly-text-primary) !important;border:1px solid #D1D5DB !important;}}
-/* Label styling */
 .content-panel-simple label > span.label-text {{font-weight:500 !important;color:#4B5563 !important;font-size:0.88em !important;margin-bottom:6px !important;display:inline-block;}}
-/* Slider label styling */
 .content-panel-simple .gr-slider label span {{font-size:0.82em !important;color:var(--fly-text-secondary);}}
-/* Temperature description class from AlphaTTS, adapted */
 .temp-description-tts {{ font-size: 0.82em !important; color: var(--fly-text-secondary) !important; margin-top: -0.5rem; margin-bottom: 1rem; padding-right: 5px; }}
-/* Examples styling from Alpha Translator */
 .content-panel-simple div[label*="نمونه"] {{margin-top:1.5rem;}}
 .content-panel-simple div[label*="نمونه"] .gr-button.gr-button-tool,.content-panel-simple div[label*="نمونه"] .gr-sample-button {{background-color:#E0E7FF !important;color:var(--fly-primary) !important;border-radius:6px !important;font-size:0.78em !important;padding:4px 8px !important;}}
 .content-panel-simple .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
 .api-warning-message {{background-color:#FFFBEB !important;color:#92400E !important;padding:10px 12px !important;border-radius:8px !important;border:1px solid #FDE68A !important;text-align:center !important;margin:0 0.2rem 1rem 0.2rem !important;font-size:0.85em !important;}}
-/* Audio player styling */
 .content-panel-simple #output_audio_tts audio {{ width: 100%; border-radius: var(--radius-md); margin-top:0.5rem; }}
 @media (min-width:640px) {{.main-content-area {{padding:1.5rem;max-width:700px;}} .content-panel-simple {{padding:1.5rem;}} .app-title-card h1 {{font-size:2.5em !important;}} .app-title-card p {{font-size:1.05em !important;}} }}
 @media (min-width:768px) {{
     .main-content-area {{max-width:780px;}} .content-panel-simple {{padding:2rem;}}
     .content-panel-simple .main-content-row {{display:flex !important;flex-direction:row !important;gap:1.5rem !important;}}
-    .content-panel-simple .main-content-row > .gr-column:nth-child(1) {{flex-basis:60%; min-width:0;}} /* Allow shrinking */
-    .content-panel-simple .main-content-row > .gr-column:nth-child(2) {{flex-basis:40%; min-width:0;}} /* Allow shrinking */
     .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{width:auto !important;align-self:flex-start;}}
     .app-title-card h1 {{font-size:2.75em !important;}} .app-title-card p {{font-size:1.1em !important;}}
 }}
@@ -711,11 +513,10 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
             status_message_output = gr.Textbox(label="وضعیت پردازش", interactive=False, lines=1, placeholder="پیام‌های وضعیت اینجا نمایش داده می‌شوند...")
             with gr.Row(elem_classes=["main-content-row"]):
-                with gr.Column(scale=3): # Left column for main inputs
                     use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False)
-                    # Label for file input is handled by checkbox visibility change
                     uploaded_file_input = gr.File(
-                        label="آپلود فایل متنی", # Will be changed by checkbox logic
                         file_types=['.txt'],
                         visible=False
                     )
@@ -728,24 +529,23 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
                     speech_prompt_tb = gr.Textbox(
                         label="🗣️ سبک و زمینه گفتار (اختیاری)",
                         placeholder="مثال: با لحنی شاد و پرانرژی",
-                        value="با لحنی دوستانه و رسا صحبت کن.", # Default from AlphaTTS
                         lines=2
                     )
-                with gr.Column(scale=2): # Right column for settings and output
                     speaker_voice_dd = gr.Dropdown(
                         SPEAKER_VOICES,
                         label="🎤 انتخاب گوینده",
-                        value="Charon" # Default from AlphaTTS
                     )
                     temperature_slider = gr.Slider(
-                        minimum=0.1, maximum=1.5, step=0.05, value=0.9, # Default from AlphaTTS
                         label="🌡️ میزان خلاقیت صدا (دما)"
                     )
                     gr.Markdown("<p class='temp-description-tts'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>", elem_classes=["temp-description-tts-container"])
                     output_audio = gr.Audio(label="🎧 فایل صوتی خروجی", type="filepath", elem_id="output_audio_tts")
-            # Button below the columns
             generate_button = gr.Button("🚀 تولید و پخش صدا", variant="primary", elem_classes=["lg"])
             gr.HTML("<hr class='custom-hr'>")
@@ -757,7 +557,7 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
                     [False, None, "آیا می‌توانم یک پیتزای پپرونی سفارش دهم؟", "پرسشی و مودبانه.", "Achird", 0.75],
                 ],
                 inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
-                outputs=[output_audio, status_message_output], # Outputting to status message as well
                 fn=gradio_tts_interface,
                 cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true",
                 label="💡 نمونه‌های کاربردی"
@@ -765,10 +565,9 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
     gr.Markdown("<p class='app-footer-fly'>Alpha TTS © 2024</p>")
-    # --- Event Handlers ---
     def toggle_file_input(use_file):
         if use_file:
-            return gr.update(visible=True, label=" "), gr.update(visible=False) # Hide text_to_speak_tb label or placeholder
         else:
             return gr.update(visible=False), gr.update(visible=True, label="📝 متن فار��ی برای تبدیل به گفتار")
@@ -782,18 +581,16 @@ with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as d
          generate_button.click(
             fn=gradio_tts_interface,
             inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
-            outputs=[output_audio, status_message_output] # Ensure this matches function's return tuple
         )
     else:
         logging.error("دکمه تولید صدا (generate_button) به درستی مقداردهی اولیه نشده است.")
 if __name__ == "__main__":
-    # --- START: شروع ترد ری‌استارت خودکار (From Alpha Translator) ---
-    if os.getenv("AUTO_RESTART_ENABLED", "true").lower() == "true": # Optional via env var
         restart_scheduler_thread = threading.Thread(target=auto_restart_service, daemon=True)
         restart_scheduler_thread.start()
-    # --- END: شروع ترد ری‌استارت خودکار ---
     demo.launch(
         server_name="0.0.0.0",

     "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
     "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
 ]
+FIXED_MODEL_NAME = "gemini-1.5-flash-preview-tts"
 DEFAULT_MAX_CHUNK_SIZE = 3800
 DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
 DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
 def smart_text_split(text, max_size=3800, log_list_ref=None):
     if len(text) <= max_size: return [text]
     chunks, current_chunk = [], ""
     sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
     for sentence in sentences:
         if len(current_chunk) + len(sentence) + 1 > max_size:
             if current_chunk: chunks.append(current_chunk.strip())
             current_chunk = sentence
+            while len(current_chunk) > max_size:
                 split_idx = -1
+                for punc in ['،', ',', ';', ':', ' ']:
                     idx = current_chunk.rfind(punc, max_size // 2, max_size)
                     if idx > split_idx : split_idx = idx
                 if split_idx != -1:
                     part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
+                else:
                     part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
                 chunks.append(part.strip())
         else:
+            current_chunk += (" " if current_chunk and sentence else "") + sentence
     if current_chunk: chunks.append(current_chunk.strip())
+    final_chunks = [c for c in chunks if c]
     if log_list_ref: _log_tts(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list_ref)
     return final_chunks
             if os.path.exists(fp):
                 segment = AudioSegment.from_file(fp)
                 combined += segment
+                if i < len(file_paths) - 1:
+                    combined += AudioSegment.silent(duration=150)
             else:
                 _log_tts(f"⚠️ فایل صوتی برای ادغام یافت نشد: {fp}", log_list_ref)
+        combined.export(output_path, format="wav")
         _log_tts(f"✅ فایل صوتی با موفقیت در '{output_path}' ادغام و ذخیره شد.", log_list_ref)
         return True
     except Exception as e:
         return None, "خطا: کلید API جیمینای برای سرویس TTS در دسترس نیست."
     try:
         genai.configure(api_key=api_key)
     except Exception as e:
         _log_tts(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list_ref)
         return None, f"خطا در ارتباط با Gemini: {e}"
     for i, chunk in enumerate(text_chunks):
         _log_tts(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list_ref)
         final_text_for_tts = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
         try:
+            # The `selected_voice` from the dropdown is not currently used in this call.
+            # This would require knowing the specific API parameter for voice selection with this model.
+            # For now, the model will use its default voice or whatever behavior is programmed.
+            # A more advanced implementation would pass `selected_voice` to the API if possible.
+            # This is where the SyntaxError occurred. The `custom_config_for_tts` variable was an
+            # incomplete assignment. It's removed/commented out. The actual config is inline below.
+            #
+            # # custom_config_for_tts = genai_types.GenerationConfig( # This seems to be the new way # THIS LINE CAUSED SyntaxError
+            #     temperature=temperature_val,
+            #     # ... (rest of the commented out block) ...
+            #     # )
+            tts_model = genai.GenerativeModel(FIXED_MODEL_NAME)
+            # Note: `selected_voice` is not used here yet. This means the dropdown for voice selection
+            # will not have an effect until this part is updated to correctly pass the voice
+            # to the Gemini API for the `FIXED_MODEL_NAME`.
+            # The `final_text_for_tts` includes the `prompt_input` for style.
+            response = tts_model.generate_content(
+                final_text_for_tts,
                 generation_config=genai_types.GenerationConfig(
                     temperature=temperature_val,
+                    response_mime_type="audio/wav"
                 ),
             )
             fname_base = f"{output_base_name}_part{i+1:03d}"
+            audio_bytes = None
+            mime_type = None
+            if response.parts and hasattr(response.parts[0], 'blob') and response.parts[0].blob.mime_type.startswith("audio/"): # More common for new SDK
+                audio_bytes = response.parts[0].blob.data
+                mime_type = response.parts[0].blob.mime_type
             elif response.candidates and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data: # AlphaTTS way
                  inline_data = response.candidates[0].content.parts[0].inline_data
                  audio_bytes = inline_data.data
                  mime_type = inline_data.mime_type
+            else:
                 audio_part = None
+                if response.parts:
+                    for part in response.parts:
+                        if hasattr(part, 'mime_type') and part.mime_type.startswith("audio/"): # Check for mime_type attr
+                            audio_part = part
+                            break
+                if audio_part and hasattr(audio_part, 'data'):
                     audio_bytes = audio_part.data
                     mime_type = audio_part.mime_type
+                elif audio_part and hasattr(audio_part, '_blob'):
                      audio_bytes = audio_part._blob.data
                      mime_type = audio_part._blob.mime_type
+                if not audio_bytes:
                     _log_tts(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی معتبر دریافت شد.", log_list_ref)
+                    _log_tts(f"ساختار پاسخ (Response structure): {response}", log_list_ref)
                     continue
+            if not mime_type: # Safety net if mime_type wasn't extracted
+                _log_tts(f"⚠️ MIME type برای قطعه {i+1} یافت نشد. پیش‌فرض wav.", log_list_ref)
+                mime_type = "audio/wav"
+            ext = mimetypes.guess_extension(mime_type) or ".wav"
+            if "audio/L" in mime_type and ext == ".wav":
                 audio_bytes = convert_to_wav(audio_bytes, mime_type)
             if not ext.startswith("."): ext = "." + ext
         except Exception as e:
             _log_tts(f"❌ خطا در تولید قطعه صوتی {i+1} با Gemini: {e}\n{traceback.format_exc()}", log_list_ref)
             if hasattr(e, 'response') and e.response:
+                _log_tts(f"جزئیات خطای Gemini API: {e.response}", log_list_ref)
+            continue
         if i < len(text_chunks) - 1 and len(text_chunks) > 1:
             _log_tts(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از پردازش قطعه بعدی...", log_list_ref)
     _log_tts(f"🎉 {len(generated_files)} فایل(های) صوتی با موفقیت تولید شد.", log_list_ref)
     final_audio_file = None
+    final_output_path_base = f"{output_base_name}_final"
     if len(generated_files) > 1:
         if PYDUB_AVAILABLE:
+            merged_fn = f"{final_output_path_base}.wav"
             if os.path.exists(merged_fn):
                 try: os.remove(merged_fn)
                 except OSError as e: _log_tts(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e}", log_list_ref)
             if merge_audio_files_func(generated_files, merged_fn, log_list_ref):
                 final_audio_file = merged_fn
                 for fp in generated_files:
                     if os.path.abspath(fp) != os.path.abspath(merged_fn):
                         try: os.remove(fp)
                         except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{fp}': {e_del}", log_list_ref)
+            else:
                 _log_tts("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list_ref)
                 if generated_files:
                     try:
                         if os.path.exists(fallback_fn): os.remove(fallback_fn)
                         os.rename(first_chunk_path, fallback_fn)
                         final_audio_file = fallback_fn
                         for i_gf in range(1, len(generated_files)):
                              try: os.remove(generated_files[i_gf])
                              except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{generated_files[i_gf]}': {e_del}", log_list_ref)
                     except Exception as e_rename_fallback:
                         _log_tts(f"خطا در تغییر نام فایل اولین قطعه (fallback): {e_rename_fallback}", log_list_ref)
+                        final_audio_file = generated_files[0]
+        else:
             _log_tts("⚠️ Pydub برای ادغام در دسترس نیست. اولین قطعه صوتی ارائه می‌شود.", log_list_ref)
             if generated_files:
                 try:
                     first_chunk_path = generated_files[0]
                     if os.path.exists(single_fallback_fn): os.remove(single_fallback_fn)
                     os.rename(first_chunk_path, single_fallback_fn)
                     final_audio_file = single_fallback_fn
                     for i_gf in range(1, len(generated_files)):
                         _log_tts(f"قطعه اضافی موجود: {generated_files[i_gf]} (ادغام نشده)", log_list_ref)
             final_audio_file = final_single_fn
         except Exception as e_rename_single:
             _log_tts(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single}", log_list_ref)
+            final_audio_file = generated_files[0]
     if final_audio_file and not os.path.exists(final_audio_file):
         _log_tts(f"⚠️ فایل صوتی نهایی '{final_audio_file}' پس ��ز پردازش وجود ندارد!", log_list_ref)
         return None, "خطا: فایل صوتی نهایی یافت نشد."
+    return final_audio_file, "موفق"
 def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature):
+    logs_for_this_run = []
     actual_text = ""
     status_message = "شروع پردازش..."
     final_audio_path = None
     _log_tts(f"تنظیمات: Speaker={speaker_voice}, Temp={temperature}, Prompt='{speech_prompt[:30]}...'", logs_for_this_run)
     try:
         final_audio_path, generation_status_msg = core_generate_audio(
             actual_text, speech_prompt, speaker_voice, temperature, logs_for_this_run
         )
             status_message = "✅ تبدیل متن به گفتار با موفقیت انجام شد."
             _log_tts(status_message, logs_for_this_run)
             return final_audio_path, status_message
+        elif final_audio_path and generation_status_msg != "موفق":
             status_message = f"⚠️ {generation_status_msg}. فایل صوتی ممکن است ناقص باشد: {final_audio_path}"
             _log_tts(status_message, logs_for_this_run)
             return final_audio_path, status_message
+        else:
             status_message = f"❌ {generation_status_msg}"
             _log_tts(status_message, logs_for_this_run)
             return None, status_message
 # --- START: بخش UI و Gradio (Adapted from Alpha Translator, content from Alpha TTS) ---
 FLY_PRIMARY_COLOR_HEX = "#4F46E5"
 FLY_SECONDARY_COLOR_HEX = "#10B981"
+FLY_ACCENT_COLOR_HEX = "#D97706"
 FLY_TEXT_COLOR_HEX = "#1F2937"
 FLY_SUBTLE_TEXT_HEX = "#6B7280"
+FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB"
 FLY_WHITE_HEX = "#FFFFFF"
 FLY_BORDER_COLOR_HEX = "#D1D5DB"
 FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
+FLY_PANEL_BG_SIMPLE = "#E0F2FE"
 app_theme_outer = gr.themes.Base(
+    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
 ).set(
+    body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
 )
 custom_css = f"""
 @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
 @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
     --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
     --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
     --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
+    --font-english: 'Poppins', 'Inter', system-ui, sans-serif;
     --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
     --shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
     --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
 footer,.gradio-footer,.flagging-container,.flex.row.gap-2.absolute.bottom-2.right-2.gr-compact.gr-box.gr-text-gray-500,div[data-testid="flag"],button[title="Flag"],button[aria-label="Flag"],.footer-utils {{display:none !important;visibility:hidden !important;}}
 .main-content-area {{flex-grow:1;padding:0.75rem;width:100%;margin:0 auto;box-sizing:border-box;}}
 .content-panel-simple {{background-color:var(--fly-bg-white);padding:1rem;border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);margin-top:-2rem;position:relative;z-index:10;margin-bottom:2rem;width:100%;box-sizing:border-box;}}
 .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{background:var(--fly-accent) !important;margin-top:1rem !important;padding:12px 20px !important;transition:all 0.25s ease-in-out !important;color:white !important;font-weight:600 !important;border-radius:10px !important;border:none !important;box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;width:100% !important;font-size:1em !important;display:flex;align-items:center;justify-content:center;}}
 .content-panel-simple .gr-button.lg.primary:hover,.content-panel-simple button[variant="primary"]:hover {{background:#B45309 !important;transform:translateY(-1px) !important;box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;}}
 .content-panel-simple .gr-input > label + div > textarea,.content-panel-simple .gr-dropdown > label + div > div > input,.content-panel-simple .gr-dropdown > label + div > div > select,.content-panel-simple .gr-textbox > label + div > textarea, .content-panel-simple .gr-file > label + div {{border-radius:8px !important;border:1.5px solid var(--fly-border-color) !important;font-size:0.95em !important;background-color:var(--fly-input-bg-simple) !important;padding:10px 12px !important;color:var(--fly-text-primary) !important;}}
 .content-panel-simple .gr-input > label + div > textarea:focus,.content-panel-simple .gr-dropdown > label + div > div > input:focus,.content-panel-simple .gr-dropdown > label + div > div > select:focus,.content-panel-simple .gr-textbox > label + div > textarea:focus, .content-panel-simple .gr-file > label + div:focus-within {{border-color:var(--fly-primary) !important;box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;background-color:var(--fly-bg-white) !important;}}
+.content-panel-simple .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
 .content-panel-simple .gr-dropdown select {{font-family:var(--font-global) !important;width:100%;cursor:pointer;}}
 .content-panel-simple .gr-textbox[label*="وضعیت"] > label + div > textarea {{background-color:var(--fly-panel-bg-simple) !important;border-color:#A5D5FE !important;min-height:80px;font-family:var(--font-global);font-size:0.9em !important;line-height:1.5;padding:10px !important;}}
 .content-panel-simple .gr-panel,.content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > .gr-panel {{border-radius:8px !important;border:1px solid var(--fly-border-color) !important;background-color:var(--fly-input-bg-simple) !important;padding:0.8rem 1rem !important;margin-top:0.6rem;box-shadow:none;}}
 .content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > button.gr-button {{font-weight:500 !important;padding:8px 10px !important;border-radius:6px !important;background-color:#E5E7EB !important;color:var(--fly-text-primary) !important;border:1px solid #D1D5DB !important;}}
 .content-panel-simple label > span.label-text {{font-weight:500 !important;color:#4B5563 !important;font-size:0.88em !important;margin-bottom:6px !important;display:inline-block;}}
 .content-panel-simple .gr-slider label span {{font-size:0.82em !important;color:var(--fly-text-secondary);}}
 .temp-description-tts {{ font-size: 0.82em !important; color: var(--fly-text-secondary) !important; margin-top: -0.5rem; margin-bottom: 1rem; padding-right: 5px; }}
 .content-panel-simple div[label*="نمونه"] {{margin-top:1.5rem;}}
 .content-panel-simple div[label*="نمونه"] .gr-button.gr-button-tool,.content-panel-simple div[label*="نمونه"] .gr-sample-button {{background-color:#E0E7FF !important;color:var(--fly-primary) !important;border-radius:6px !important;font-size:0.78em !important;padding:4px 8px !important;}}
 .content-panel-simple .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
 .api-warning-message {{background-color:#FFFBEB !important;color:#92400E !important;padding:10px 12px !important;border-radius:8px !important;border:1px solid #FDE68A !important;text-align:center !important;margin:0 0.2rem 1rem 0.2rem !important;font-size:0.85em !important;}}
 .content-panel-simple #output_audio_tts audio {{ width: 100%; border-radius: var(--radius-md); margin-top:0.5rem; }}
 @media (min-width:640px) {{.main-content-area {{padding:1.5rem;max-width:700px;}} .content-panel-simple {{padding:1.5rem;}} .app-title-card h1 {{font-size:2.5em !important;}} .app-title-card p {{font-size:1.05em !important;}} }}
 @media (min-width:768px) {{
     .main-content-area {{max-width:780px;}} .content-panel-simple {{padding:2rem;}}
     .content-panel-simple .main-content-row {{display:flex !important;flex-direction:row !important;gap:1.5rem !important;}}
+    .content-panel-simple .main-content-row > .gr-column:nth-child(1) {{flex-basis:60%; min-width:0;}}
+    .content-panel-simple .main-content-row > .gr-column:nth-child(2) {{flex-basis:40%; min-width:0;}}
     .content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{width:auto !important;align-self:flex-start;}}
     .app-title-card h1 {{font-size:2.75em !important;}} .app-title-card p {{font-size:1.1em !important;}}
 }}
             status_message_output = gr.Textbox(label="وضعیت پردازش", interactive=False, lines=1, placeholder="پیام‌های وضعیت اینجا نمایش داده می‌شوند...")
             with gr.Row(elem_classes=["main-content-row"]):
+                with gr.Column(scale=3):
                     use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False)
                     uploaded_file_input = gr.File(
+                        label="آپلود فایل متنی",
                         file_types=['.txt'],
                         visible=False
                     )
                     speech_prompt_tb = gr.Textbox(
                         label="🗣️ سبک و زمینه گفتار (اختیاری)",
                         placeholder="مثال: با لحنی شاد و پرانرژی",
+                        value="با لحنی دوستانه و رسا صحبت کن.",
                         lines=2
                     )
+                with gr.Column(scale=2):
                     speaker_voice_dd = gr.Dropdown(
                         SPEAKER_VOICES,
                         label="🎤 انتخاب گوینده",
+                        value="Charon"
                     )
                     temperature_slider = gr.Slider(
+                        minimum=0.1, maximum=1.5, step=0.05, value=0.9,
                         label="🌡️ میزان خلاقیت صدا (دما)"
                     )
                     gr.Markdown("<p class='temp-description-tts'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>", elem_classes=["temp-description-tts-container"])
                     output_audio = gr.Audio(label="🎧 فایل صوتی خروجی", type="filepath", elem_id="output_audio_tts")
             generate_button = gr.Button("🚀 تولید و پخش صدا", variant="primary", elem_classes=["lg"])
             gr.HTML("<hr class='custom-hr'>")
                     [False, None, "آیا می‌توانم یک پیتزای پپرونی سفارش دهم؟", "پرسشی و مودبانه.", "Achird", 0.75],
                 ],
                 inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
+                outputs=[output_audio, status_message_output],
                 fn=gradio_tts_interface,
                 cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true",
                 label="💡 نمونه‌های کاربردی"
     gr.Markdown("<p class='app-footer-fly'>Alpha TTS © 2024</p>")
     def toggle_file_input(use_file):
         if use_file:
+            return gr.update(visible=True, label=" "), gr.update(visible=False)
         else:
             return gr.update(visible=False), gr.update(visible=True, label="📝 متن فار��ی برای تبدیل به گفتار")
          generate_button.click(
             fn=gradio_tts_interface,
             inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
+            outputs=[output_audio, status_message_output]
         )
     else:
         logging.error("دکمه تولید صدا (generate_button) به درستی مقداردهی اولیه نشده است.")
 if __name__ == "__main__":
+    if os.getenv("AUTO_RESTART_ENABLED", "true").lower() == "true":
         restart_scheduler_thread = threading.Thread(target=auto_restart_service, daemon=True)
         restart_scheduler_thread.start()
     demo.launch(
         server_name="0.0.0.0",