Spaces:

Hamed744
/

Ttspro

Running

App Files Files Community

Hamed744 commited on Jun 4

Commit

1cae8c0

verified ·

1 Parent(s): 3f42665

Update app.py

Browse files

Files changed (1) hide show

app.py +236 -284

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ import time
 import zipfile
 from google import genai
 from google.genai import types
-# Attempt to load API key from Hugging Face Secrets
-# The Space's runtime will inject this environment variable if the secret is set.
 HF_GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 try:
@@ -18,10 +18,9 @@ try:
     PYDUB_AVAILABLE = True
 except ImportError:
     PYDUB_AVAILABLE = False
-    print("⚠️ pydub is not available. Audio file merging will be disabled.")
-    print("If merging is desired, ensure pydub is in requirements.txt and ffmpeg is available in the environment.")
-# --- Constants ---
 SPEAKER_VOICES = [
     "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat",
     "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux", "Pulcherrima",
@@ -30,145 +29,128 @@ SPEAKER_VOICES = [
     "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
 ]
 MODELS = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
-# --- Helper functions ---
 def save_binary_file(file_name, data):
     abs_file_name = os.path.abspath(file_name)
     try:
         with open(abs_file_name, "wb") as f:
             f.write(data)
-        print(f"✅ File saved at: {abs_file_name}")
         return abs_file_name
     except Exception as e:
-        print(f"❌ Error saving file {abs_file_name}: {e}")
         return None
 def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
     parameters = parse_audio_mime_type(mime_type)
     bits_per_sample = parameters["bits_per_sample"]
     sample_rate = parameters["rate"]
-    num_channels = 1 # Gemini TTS seems to output mono
     data_size = len(audio_data)
     bytes_per_sample = bits_per_sample // 8
     block_align = num_channels * bytes_per_sample
     byte_rate = sample_rate * block_align
-    chunk_size = 36 + data_size # Size of the 'fmt ' and 'data' chunks and their headers
     header = struct.pack(
         "<4sI4s4sIHHIIHH4sI",
-        b"RIFF", chunk_size, b"WAVE", b"fmt ", 16,  # 16 for PCM
-        1,  # PCM format
-        num_channels, sample_rate, byte_rate, block_align, bits_per_sample,
-        b"data", data_size
     )
     return header + audio_data
 def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
-    bits_per_sample = 16  # Default
-    rate = 24000          # Default for Gemini TTS
     if mime_type:
         mime_type_lower = mime_type.lower()
         parts = mime_type_lower.split(";")
         for param in parts:
             param = param.strip()
             if param.startswith("rate="):
                 try:
-                    rate_str = param.split("=", 1)[1]
-                    rate = int(rate_str)
-                except (ValueError, IndexError): pass
-            elif param.startswith("audio/l"): # e.g., audio/L16 or audio/L24
-                try:
-                    # Attempt to parse bits from "L<bits>"
                     potential_bits = param.split("l", 1)[1]
-                    if potential_bits.isdigit():
-                         bits_per_sample = int(potential_bits)
-                except (ValueError, IndexError): pass
     return {"bits_per_sample": bits_per_sample, "rate": rate}
 def load_text_from_gr_file(file_obj):
     if file_obj is None:
-        return "", "No file provided for text input."
     try:
         with open(file_obj.name, 'r', encoding='utf-8') as f:
             content = f.read().strip()
         if not content:
-            return "", "Text file is empty."
-        return content, f"Successfully loaded {len(content)} chars from {os.path.basename(file_obj.name)}."
     except Exception as e:
-        return "", f"Error reading text file: {e}"
 def smart_text_split(text, max_size=3800):
-    if len(text) <= max_size:
-        return [text]
-    chunks = []
-    current_chunk = ""
-    sentences = re.split(r'(?<=[.!?])\s+', text) # Split by sentences
     for sentence in sentences:
         if not sentence: continue
-        # If adding the current sentence exceeds max_size
-        if len(current_chunk) + len(sentence) + 1 > max_size: # +1 for space
-            if current_chunk: # If there's something in current_chunk, add it
-                chunks.append(current_chunk.strip())
-                current_chunk = "" # Reset current_chunk
-            # If the sentence itself is too long, split it by words or even characters
             if len(sentence) > max_size:
-                words = sentence.split(' ')
-                temp_sentence_part = ""
                 for word in words:
-                    if len(temp_sentence_part) + len(word) + 1 > max_size:
-                        if temp_sentence_part: chunks.append(temp_sentence_part.strip())
-                        # If word itself is too long (rare for TTS practical limits)
                         if len(word) > max_size:
-                            for i in range(0, len(word), max_size):
-                                chunks.append(word[i:i+max_size])
-                            temp_sentence_part = ""
-                        else:
-                            temp_sentence_part = word
-                    else:
-                        temp_sentence_part += (" " if temp_sentence_part else "") + word
-                if temp_sentence_part: chunks.append(temp_sentence_part.strip())
-                # current_chunk remains empty as the long sentence was fully processed
-            else: # Sentence is not too long itself, start a new chunk with it
-                current_chunk = sentence
-        else: # Sentence fits, add to current_chunk
-            current_chunk += (" " if current_chunk else "") + sentence
-    if current_chunk: # Add any remaining part
-        chunks.append(current_chunk.strip())
     return chunks
 def merge_audio_files_func(file_paths, output_path):
-    if not PYDUB_AVAILABLE:
-        return False, "pydub is not available. Cannot merge files.", None
-    if not file_paths:
-        return False, "No audio files to merge.", None
     try:
         combined = AudioSegment.empty()
         for i, file_path in enumerate(file_paths):
             if os.path.exists(file_path):
                 try:
-                    # Explicitly state format if known, otherwise pydub tries to guess
-                    # Assuming all inputs are WAV due to our conversion logic
-                    audio = AudioSegment.from_file(file_path, format="wav")
                     combined += audio
-                    if i < len(file_paths) - 1:
-                        combined += AudioSegment.silent(duration=200) # Small silence
                 except Exception as e_load:
-                    print(f"⚠️ Error loading audio file {file_path} with pydub: {e_load}")
-                    return False, f"Error loading audio file {os.path.basename(file_path)}: {e_load}", None
             else:
-                print(f"⚠️ File not found for merging: {file_path}")
-                # Decide if this is critical; for now, we'll say it is.
-                return False, f"File not found for merging: {os.path.basename(file_path)}", None
         abs_output_path = os.path.abspath(output_path)
         combined.export(abs_output_path, format="wav")
-        return True, f"Merged file saved: {os.path.basename(abs_output_path)}", abs_output_path
     except Exception as e:
-        print(f"❌ Error merging files: {e}")
-        return False, f"Error merging files: {e}", None
 def create_zip_file(file_paths, zip_name):
     abs_zip_name = os.path.abspath(zip_name)
@@ -177,85 +159,75 @@ def create_zip_file(file_paths, zip_name):
             for file_path in file_paths:
                 if os.path.exists(file_path):
                     zipf.write(file_path, os.path.basename(file_path))
-        return True, f"ZIP file created: {os.path.basename(abs_zip_name)}", abs_zip_name
     except Exception as e:
-        return False, f"Error creating ZIP file: {e}", None
-# --- Main generation function (modified for Gradio & HF Secrets) ---
 def generate_audio_for_gradio(
-    # api_key_input_field is removed, will use HF_GEMINI_API_KEY
     use_file_input_checkbox, text_file_obj,
     speech_prompt_input, text_to_speak_input,
     max_chunk_slider, sleep_slider, temperature_slider,
-    model_dropdown, speaker_dropdown, output_filename_base_input,
     merge_checkbox, delete_partials_checkbox,
-    # Progress for Gradio (optional but good for long tasks)
     progress=gr.Progress(track_tqdm=True)
 ):
     status_messages = []
-    status_messages.append("🚀 Starting Text-to-Speech process...")
-    progress(0, desc="Initializing...")
-    # 1. API Key Validation (from HF Secrets)
     api_key_to_use = HF_GEMINI_API_KEY
     if not api_key_to_use:
-        # Fallback if user provides one in a field (though we removed the field)
-        # This part can be removed if you *only* want to use secrets
-        # For now, let's assume if HF_GEMINI_API_KEY is None, we raise an error.
-        status_messages.append("❌ Error: GEMINI_API_KEY not found in Hugging Face Secrets.")
-        status_messages.append("➡️ Please set it in your Space's Settings > Secrets.")
         return None, None, "\n".join(status_messages)
-    os.environ["GEMINI_API_KEY"] = api_key_to_use # Set for genai library
-    status_messages.append("🔑 API Key loaded from Secrets.")
-    # 2. Determine Text Input
     actual_text_input = ""
     if use_file_input_checkbox:
         if text_file_obj is None:
-            status_messages.append("❌ Error: 'Use Text File' is checked, but no file was uploaded.")
             return None, None, "\n".join(status_messages)
         actual_text_input, msg = load_text_from_gr_file(text_file_obj)
         status_messages.append(msg)
-        if not actual_text_input:
-            return None, None, "\n".join(status_messages)
     else:
         actual_text_input = text_to_speak_input
-        status_messages.append("⌨️ Using manually entered text.")
     if not actual_text_input or actual_text_input.strip() == "":
-        status_messages.append("❌ Error: Text input is empty.")
         return None, None, "\n".join(status_messages)
-    # 3. Initialize GenAI Client
     try:
-        status_messages.append("🛠️ Initializing Gemini client...")
-        progress(0.1, desc="Initializing Gemini Client...")
         client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
-        status_messages.append("✅ Gemini client initialized.")
     except Exception as e:
-        status_messages.append(f"❌ Error initializing Gemini client: {e}")
         return None, None, "\n".join(status_messages)
-    # 4. Split text
     text_chunks = smart_text_split(actual_text_input, int(max_chunk_slider))
-    status_messages.append(f"📊 Text split into {len(text_chunks)} chunk(s).")
-    for i, chunk_text in enumerate(text_chunks): # Renamed 'chunk' to 'chunk_text'
-        status_messages.append(f"  📝 Chunk {i+1}: {len(chunk_text)} chars")
-    # 5. Generate audio for each chunk
     generated_audio_files = []
     run_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
     temp_output_dir = f"temp_audio_{run_id}"
     os.makedirs(temp_output_dir, exist_ok=True)
-    output_base_name_safe = re.sub(r'[\s\\\/\:\*\?\"\<\>\|\%]+', '_', output_filename_base_input) # More robust sanitize
     total_chunks = len(text_chunks)
     for i, chunk_text_content in enumerate(text_chunks):
-        progress_val = 0.1 + (0.7 * (i / total_chunks)) # Progress from 10% to 80% during generation
-        progress(progress_val, desc=f"Generating chunk {i+1}/{total_chunks}...")
-        status_messages.append(f"\n🔊 Generating audio for chunk {i+1}/{total_chunks}...")
         final_text_for_api = f'"{speech_prompt_input}"\n{chunk_text_content}' if speech_prompt_input.strip() else chunk_text_content
         contents_for_api = [types.Content(role="user", parts=[types.Part.from_text(text=final_text_for_api)])]
@@ -264,17 +236,16 @@ def generate_audio_for_gradio(
             response_modalities=["audio"],
             speech_config=types.SpeechConfig(
                 voice_config=types.VoiceConfig(
-                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=speaker_dropdown)
                 )
             )
         )
         try:
             chunk_filename_base = f"{output_base_name_safe}_part_{i+1:03d}"
             chunk_filepath_prefix = os.path.join(temp_output_dir, chunk_filename_base)
             audio_data_received = False
             for stream_response_chunk in client.models.generate_content_stream(
-                model=model_dropdown, contents=contents_for_api, config=generate_content_config,
             ):
                 if (stream_response_chunk.candidates and stream_response_chunk.candidates[0].content and
                     stream_response_chunk.candidates[0].content.parts and
@@ -284,161 +255,136 @@ def generate_audio_for_gradio(
                     data_buffer = inline_data.data
                     api_mime_type = inline_data.mime_type
                     audio_data_received = True
-                    status_messages.append(f"ℹ️ API returned MIME type: {api_mime_type}")
-                    # Determine file extension and convert if necessary
-                    file_extension = ".wav" # Default to .wav and convert
                     if api_mime_type and ("mp3" in api_mime_type.lower() or "mpeg" in api_mime_type.lower()):
                         file_extension = ".mp3"
-                        # For MP3, data_buffer is already MP3. No conversion needed for saving.
-                        # pydub will need ffmpeg to read MP3 for merging.
-                        status_messages.append(f"ℹ️ Saving as MP3 based on MIME: {api_mime_type}")
                     elif api_mime_type and "wav" in api_mime_type.lower() and \
                          not ("audio/l16" in api_mime_type.lower() or "audio/l24" in api_mime_type.lower()):
                         file_extension = ".wav"
-                        # API says WAV and it's not raw L16/L24, trust it.
-                        status_messages.append(f"ℹ️ Saving as WAV based on MIME: {api_mime_type}")
-                    else: # Raw PCM (like audio/L16), unknown, or .bin -> convert to WAV
                         file_extension = ".wav"
-                        status_messages.append(f"ℹ️ Converting to WAV for MIME: {api_mime_type or 'Unknown'}")
                         data_buffer = convert_to_wav(data_buffer, api_mime_type)
-                    status_messages.append(f"ℹ️ Determined file extension: {file_extension}")
                     generated_file_path = save_binary_file(f"{chunk_filepath_prefix}{file_extension}", data_buffer)
                     if generated_file_path:
                         generated_audio_files.append(generated_file_path)
-                        status_messages.append(f"✅ Chunk {i+1} saved: {os.path.basename(generated_file_path)}")
                     else:
-                        status_messages.append(f"❌ Failed to save chunk {i+1}.")
-                    break # Processed this audio data from stream
                 elif stream_response_chunk.text:
-                     status_messages.append(f"ℹ️ API Text Message (during stream): {stream_response_chunk.text}")
             if not audio_data_received:
-                status_messages.append(f"❌ No audio data received in stream for chunk {i+1}.")
-                # Check for errors in the stream response if available
                 if stream_response_chunk and stream_response_chunk.prompt_feedback and stream_response_chunk.prompt_feedback.block_reason:
-                    status_messages.append(f"🛑 API Block Reason: {stream_response_chunk.prompt_feedback.block_reason_message or stream_response_chunk.prompt_feedback.block_reason}")
         except types.BlockedPromptException as bpe:
-            status_messages.append(f"❌ Content blocked for chunk {i+1}: {bpe}")
-            status_messages.append(f"  Feedback: {bpe.response.prompt_feedback}")
         except types.StopCandidateException as sce:
-            status_messages.append(f"❌ Generation stopped for chunk {i+1}: {sce}")
-            status_messages.append(f"  Feedback: {sce.response.prompt_feedback}")
         except Exception as e:
-            status_messages.append(f"❌ Error generating/processing chunk {i+1}: {e}")
-            import traceback
-            status_messages.append(traceback.format_exc()) # More detailed error
-            continue
-        if i < total_chunks - 1:
-            status_messages.append(f"⏱️ Waiting {sleep_slider}s...")
             time.sleep(float(sleep_slider))
-    progress(0.85, desc="Processing generated files...")
-    # 6. Handle output files
     if not generated_audio_files:
-        status_messages.append("❌ No audio files were successfully generated or saved!")
         final_status = "\n".join(status_messages)
         print(final_status)
-        progress(1, desc="Finished with errors.")
         return None, None, final_status
-    status_messages.append(f"\n🎉 {len(generated_audio_files)} audio file(s) generated!")
-    output_audio_path_for_player = None # For gr.Audio, ideally a single WAV
-    output_path_for_download = None    # For gr.File, can be WAV or ZIP
-    if merge_checkbox and len(generated_audio_files) > 1:
-        if not PYDUB_AVAILABLE:
-            status_messages.append("⚠️ pydub not available. Cannot merge. Returning ZIP of parts.")
             success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
             status_messages.append(msg_zip)
             if success_zip: output_path_for_download = zip_p
-        else:
-            status_messages.append(f"🔗 Merging {len(generated_audio_files)} files (all should be WAVs now)...")
-            # Ensure all files for merging are WAV, convert if any MP3s were saved and pydub is used
-            # For simplicity, our save logic now tries to make them WAV if not MP3 from API.
-            # If an MP3 was saved and PYDUB_AVAILABLE, it should handle it.
-            merged_filename_path = os.path.join(temp_output_dir, f"{output_base_name_safe}_merged.wav")
-            success_merge, msg_merge, merged_p = merge_audio_files_func(generated_audio_files, merged_filename_path)
-            status_messages.append(msg_merge)
-            if success_merge:
-                output_audio_path_for_player = merged_p
-                output_path_for_download = merged_p
-                if delete_partials_checkbox:
-                    status_messages.append("🗑️ Deleting partial files...")
-                    for file_p in generated_audio_files:
-                        try: os.remove(file_p); status_messages.append(f"  🗑️ Deleted: {os.path.basename(file_p)}")
-                        except Exception as e_del: status_messages.append(f"  ⚠️ Could not delete {os.path.basename(file_p)}: {e_del}")
-            else:
-                status_messages.append("⚠️ Merge failed. Providing ZIP of parts.")
-                success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
-                status_messages.append(msg_zip)
-                if success_zip: output_path_for_download = zip_p
     elif len(generated_audio_files) == 1:
-        # Single file, should be WAV due to our conversion logic or MP3 if API sent that
         single_file_path = generated_audio_files[0]
-        if single_file_path.lower().endswith(".mp3") and PYDUB_AVAILABLE:
-            # Convert MP3 to WAV for Gradio player if it prefers WAV
-            # Or, gr.Audio might handle MP3 directly. Let's test.
-            # For now, assume gr.Audio handles common types.
-            output_audio_path_for_player = single_file_path
-            status_messages.append(f"🎵 Single MP3 file: {os.path.basename(single_file_path)}")
-        else: # Assume WAV
-             output_audio_path_for_player = single_file_path
-             status_messages.append(f"🎵 Single WAV file: {os.path.basename(single_file_path)}")
         output_path_for_download = single_file_path
-    else: # Multiple files, no merge requested
-        status_messages.append("📦 Multiple parts generated. Creating ZIP file.")
         success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
         status_messages.append(msg_zip)
         if success_zip: output_path_for_download = zip_p
     final_status = "\n".join(status_messages)
     print(final_status)
-    print(f"DEBUG: output_audio_path_for_player: {output_audio_path_for_player}")
-    print(f"DEBUG: output_path_for_download: {output_path_for_download}")
-    progress(1, desc="Finished!")
     return output_audio_path_for_player, output_path_for_download, final_status
-# --- Gradio Interface Definition ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
-    gr.Markdown("# 🎵 Gemini Text-to-Speech UI 🗣️")
     if not HF_GEMINI_API_KEY:
         gr.Warning(
-            "GEMINI_API_KEY not found in Hugging Face Secrets. "
-            "Please add it in your Space's 'Settings' > 'Secrets' tab for the app to work. "
-            "Name the secret `GEMINI_API_KEY`."
         )
     else:
-        gr.Info("Gemini API Key loaded successfully from Space Secrets. Ready to generate!")
     gr.Markdown(
-        "Convert text to speech using Google's Gemini API. "
-        "Your Gemini API Key must be set as a Secret named `GEMINI_API_KEY` in this Space's settings."
-        "\n\nGet your API Key from [Google AI Studio](https://aistudio.google.com/app/apikey)."
     )
     with gr.Row():
-        with gr.Column(scale=2): # Wider column for text inputs
-            use_file = gr.Checkbox(label="📁 Use Text File Input (.txt)", value=False)
             text_file = gr.File(
-                label="Upload Text File", # Simpler label
                 file_types=['.txt'],
-                visible=False # Initially hidden
             )
             text_to_speak = gr.Textbox(
-                label="📝 Text to Speak (or use file above)",
                 lines=10,
-                placeholder="Enter text here...",
-                visible=True # Initially visible
             )
             use_file.change(
                 lambda x: (gr.update(visible=x), gr.update(visible=not x)),
@@ -446,105 +392,111 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary
                 [text_file, text_to_speak]
             )
             speech_prompt = gr.Textbox(
-                label="🗣️ Speech Prompt (Optional)",
-                placeholder="e.g., 'As an energetic YouTuber speaking to an audience'",
-                info="Influences style, emotion, and voice characteristics."
             )
-        with gr.Column(scale=1):
-            model_name = gr.Dropdown(
-                MODELS, label="🤖 Model", value=MODELS[0]
             )
-            speaker_voice = gr.Dropdown(
-                SPEAKER_VOICES, label="🎤 Speaker Voice", value="Charon"
             )
-            temperature = gr.Slider(
-                minimum=0.0, maximum=1.0, step=0.05, value=0.7, # Gemini TTS often uses temp <= 1
-                label="🌡️ Temperature",
-                info="Controls randomness (0.0-1.0). Higher for more variation."
             )
-            max_chunk_size = gr.Slider(
                 minimum=1000, maximum=4000, step=100, value=3800,
-                label="🧩 Max Characters per Chunk",
-                info="Text is split for API. Max 4096 per request for some models."
             )
-            sleep_between_requests = gr.Slider(
-                minimum=1, maximum=15, step=0.5, value=2, # Reduced default sleep
-                label="⏱️ Sleep Between Chunks (sec)",
-                info="Helps manage API rate limits (e.g. Gemini Flash has 60 RPM limit)."
             )
-            output_filename_base = gr.Textbox(
-                label="💾 Output Filename Base", value="gemini_tts_audio"
             )
-            with gr.Group(visible=PYDUB_AVAILABLE):
-                merge_audio = gr.Checkbox(label="🔗 Merge Audio Chunks (if >1)", value=True)
-                delete_partials = gr.Checkbox(label="🗑️ Delete Chunks After Merge", value=True, visible=True) # Default visible
-                merge_audio.change(lambda x: gr.update(visible=x), [merge_audio], [delete_partials])
-            if not PYDUB_AVAILABLE:
-                 gr.Markdown("<small>⚠️ Merging disabled: `pydub` library not found. Install if needed.</small>")
-    submit_button = gr.Button("✨ Generate Audio ✨", variant="primary", scale=2) # Centered button
     with gr.Row():
         with gr.Column(scale=1):
-            output_audio_player = gr.Audio(label="🎧 Generated Audio Output", type="filepath", format="wav") # Specify format if known
         with gr.Column(scale=1):
-            output_file_download = gr.File(label="📥 Download Output File", type="filepath")
-    status_textbox = gr.Textbox(label="📊 Status Log", lines=10, interactive=False, max_lines=20)
     submit_button.click(
         fn=generate_audio_for_gradio,
         inputs=[
             use_file, text_file, speech_prompt, text_to_speak,
-            max_chunk_size, sleep_between_requests, temperature,
-            model_name, speaker_voice, output_filename_base,
-            merge_audio, delete_partials # Even if not visible, pass them
         ],
-        outputs=[output_audio_player, output_file_download, status_textbox]
     )
     gr.Markdown("---")
-    # The encoded text part:
-    encoded_text = "Q3JlYXRlIGJ5IDogYWlnb2xkZW4=" # "Created by : aigolden"
     try:
-        decoded_text = base64.b64decode(encoded_text.encode('utf-8')).decode('utf-8')
-        gr.Markdown(f"<p style='text-align:center; font-size:small;'><em>{decoded_text}</em></p>")
-    except Exception as e_decode:
-        print(f"Error decoding/displaying credit: {e_decode}")
-        pass
     gr.Examples(
         examples=[
-            [False, None, "A friendly and informative narrator.", "Hello world, this is a test of the Gemini text to speech API using Gradio. I hope this works well!", 3800, 2, 0.7, MODELS[0], "Charon", "example_hello", True, True],
-            [False, None, "An excited news reporter.", "Breaking news! Artificial intelligence can now generate human-like speech. This technology is rapidly evolving!", 3000, 2, 0.8, MODELS[1], "Achernar", "example_news", True, True],
-            [True, "sample_text.txt", "A calm storyteller.", "", 3500, 3, 0.6, MODELS[0], "Vindemiatrix", "example_from_file", True, False]
         ],
-        fn=generate_audio_for_gradio, # Ensure example fn is the same as main
-        inputs=[ # Ensure these match the function's inputs exactly (order and number)
             use_file, text_file, speech_prompt, text_to_speak,
-            max_chunk_size, sleep_between_requests, temperature,
-            model_name, speaker_voice, output_filename_base,
-            merge_audio, delete_partials
         ],
-        outputs=[output_audio_player, output_file_download, status_textbox],
-        cache_examples=False # API calls, so don't cache results based on static inputs
     )
-    gr.Markdown("<small>To use the 'example_from_file', please create a `sample_text.txt` file in the root of this Space with some text content, or upload your own text file.</small>")
 if __name__ == "__main__":
     if not PYDUB_AVAILABLE:
-        print("WARNING: pydub library is not installed or working. Audio file merging will be disabled.")
     if not HF_GEMINI_API_KEY:
-        print("WARNING: GEMINI_API_KEY environment variable not set. The app might not work in local if it relies on this for API key.")
-    # For local testing, you might want to provide a way to input the API key
-    # or set the GEMINI_API_KEY environment variable before running.
-    # e.g., export GEMINI_API_KEY="your_key_here"
-    # then run python app.py
-    demo.launch(debug=True, share=False) # share=False for local, HF Spaces handles public link

 import zipfile
 from google import genai
 from google.genai import types
+import traceback # برای نمایش خطاهای دقیق‌تر
+# خواندن کلید API از Hugging Face Secrets
 HF_GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 try:
     PYDUB_AVAILABLE = True
 except ImportError:
     PYDUB_AVAILABLE = False
+    print("⚠️ کتابخانه pydub در دسترس نیست. قابلیت ادغام فایل‌های صوتی غیرفعال خواهد بود.")
+# --- ثابت‌ها ---
 SPEAKER_VOICES = [
     "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat",
     "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux", "Pulcherrima",
     "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
 ]
 MODELS = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
+MODEL_NAMES_FARSI = {
+    "gemini-2.5-flash-preview-tts": "جمینای ۲.۵ فلش (سریع‌تر، اقتصادی‌تر)",
+    "gemini-2.5-pro-preview-tts": "جمینای ۲.۵ پرو (کیفیت بالاتر)"
+}
+SPEAKER_VOICES_FARSI_SAMPLE = { # می‌توانید برای همه گوینده‌ها نام فارسی تعریف کنید
+    "Charon": "شارون (پیش‌فرض)",
+    "Achernar": "آخرالنهر",
+    "Vindemiatrix": "vindemiatrix (ستاره‌شناس)",
+    # ... بقیه گوینده‌ها
+}
+# --- توابع کمکی (بدون تغییر زیاد در منطق، فقط پیام‌ها فارسی می‌شوند) ---
 def save_binary_file(file_name, data):
     abs_file_name = os.path.abspath(file_name)
     try:
         with open(abs_file_name, "wb") as f:
             f.write(data)
+        print(f"✅ فایل در مسیر ذخیره شد: {abs_file_name}")
         return abs_file_name
     except Exception as e:
+        print(f"❌ خطا در ذخیره فایل {abs_file_name}: {e}")
         return None
 def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
     parameters = parse_audio_mime_type(mime_type)
     bits_per_sample = parameters["bits_per_sample"]
     sample_rate = parameters["rate"]
+    num_channels = 1
     data_size = len(audio_data)
     bytes_per_sample = bits_per_sample // 8
     block_align = num_channels * bytes_per_sample
     byte_rate = sample_rate * block_align
+    chunk_size = 36 + data_size
     header = struct.pack(
         "<4sI4s4sIHHIIHH4sI",
+        b"RIFF", chunk_size, b"WAVE", b"fmt ", 16, 1, num_channels,
+        sample_rate, byte_rate, block_align, bits_per_sample, b"data", data_size
     )
     return header + audio_data
 def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
+    bits_per_sample = 16
+    rate = 24000
     if mime_type:
         mime_type_lower = mime_type.lower()
         parts = mime_type_lower.split(";")
         for param in parts:
             param = param.strip()
             if param.startswith("rate="):
+                try: rate = int(param.split("=", 1)[1])
+                except: pass
+            elif param.startswith("audio/l"):
                 try:
                     potential_bits = param.split("l", 1)[1]
+                    if potential_bits.isdigit(): bits_per_sample = int(potential_bits)
+                except: pass
     return {"bits_per_sample": bits_per_sample, "rate": rate}
 def load_text_from_gr_file(file_obj):
     if file_obj is None:
+        return "", "فایلی برای ورودی متن ارائه نشده است."
     try:
         with open(file_obj.name, 'r', encoding='utf-8') as f:
             content = f.read().strip()
         if not content:
+            return "", "فایل متنی خالی است."
+        return content, f"متن با موفقیت از فایل '{os.path.basename(file_obj.name)}' ({len(content)} کاراکتر) بارگذاری شد."
     except Exception as e:
+        return "", f"خطا در خواندن فایل متنی: {e}"
 def smart_text_split(text, max_size=3800):
+    if len(text) <= max_size: return [text]
+    chunks, current_chunk = [], ""
+    sentences = re.split(r'(?<=[.!?؟])\s+', text)
     for sentence in sentences:
         if not sentence: continue
+        if len(current_chunk) + len(sentence) + 1 > max_size:
+            if current_chunk: chunks.append(current_chunk.strip())
             if len(sentence) > max_size:
+                words, temp_part = sentence.split(' '), ""
                 for word in words:
+                    if len(temp_part) + len(word) + 1 > max_size:
+                        if temp_part: chunks.append(temp_part.strip())
                         if len(word) > max_size:
+                            for i in range(0, len(word), max_size): chunks.append(word[i:i+max_size])
+                            temp_part = ""
+                        else: temp_part = word
+                    else: temp_part += (" " if temp_part else "") + word
+                if temp_part: chunks.append(temp_part.strip())
+                current_chunk = ""
+            else: current_chunk = sentence
+        else: current_chunk += (" " if current_chunk else "") + sentence
+    if current_chunk: chunks.append(current_chunk.strip())
     return chunks
 def merge_audio_files_func(file_paths, output_path):
+    if not PYDUB_AVAILABLE: return False, "pydub در دسترس نیست. امکان ادغام فایل‌ها وجود ندارد.", None
+    if not file_paths: return False, "هیچ فایل صوتی برای ادغام وجود ندارد.", None
     try:
         combined = AudioSegment.empty()
         for i, file_path in enumerate(file_paths):
             if os.path.exists(file_path):
                 try:
+                    audio = AudioSegment.from_file(file_path, format="wav") # فرض می‌کنیم ورودی‌ها WAV هستند
                     combined += audio
+                    if i < len(file_paths) - 1: combined += AudioSegment.silent(duration=200)
                 except Exception as e_load:
+                    msg = f"خطا در بارگذاری فایل صوتی '{os.path.basename(file_path)}' با pydub: {e_load}"
+                    print(f"⚠️ {msg}")
+                    return False, msg, None
             else:
+                msg = f"فایل برای ادغام یافت نشد: {os.path.basename(file_path)}"
+                print(f"⚠️ {msg}")
+                return False, msg, None
         abs_output_path = os.path.abspath(output_path)
         combined.export(abs_output_path, format="wav")
+        return True, f"فایل ادغام شده با موفقیت در '{os.path.basename(abs_output_path)}' ذخیره شد.", abs_output_path
     except Exception as e:
+        msg = f"خطا در ادغام فایل‌ها: {e}"
+        print(f"❌ {msg}")
+        return False, msg, None
 def create_zip_file(file_paths, zip_name):
     abs_zip_name = os.path.abspath(zip_name)
             for file_path in file_paths:
                 if os.path.exists(file_path):
                     zipf.write(file_path, os.path.basename(file_path))
+        return True, f"فایل ZIP با نام '{os.path.basename(abs_zip_name)}' ایجاد شد.", abs_zip_name
     except Exception as e:
+        return False, f"خطا در ایجاد فایل ZIP: {e}", None
+# --- تابع اصلی تولید صدا ---
 def generate_audio_for_gradio(
     use_file_input_checkbox, text_file_obj,
     speech_prompt_input, text_to_speak_input,
     max_chunk_slider, sleep_slider, temperature_slider,
+    model_dropdown_key, # کلید مدل (انگلیسی)
+    speaker_dropdown, output_filename_base_input,
     merge_checkbox, delete_partials_checkbox,
     progress=gr.Progress(track_tqdm=True)
 ):
     status_messages = []
+    status_messages.append("🚀 فرآیند تبدیل متن به گفتار آغاز شد...")
+    progress(0, desc="در حال آماده‌سازی...")
     api_key_to_use = HF_GEMINI_API_KEY
     if not api_key_to_use:
+        status_messages.append("❌ خطا: کلید API جمینای (GEMINI_API_KEY) در تنظیمات Secret این Space یافت نشد.")
+        status_messages.append("⬅️ لطفاً آن را در بخش Settings > Secrets مربوط به این Space تنظیم کنید.")
         return None, None, "\n".join(status_messages)
+    os.environ["GEMINI_API_KEY"] = api_key_to_use
+    status_messages.append("🔑 کلید API با موفقیت از Secrets بارگذاری شد.")
     actual_text_input = ""
     if use_file_input_checkbox:
         if text_file_obj is None:
+            status_messages.append("❌ خطا: گزینه 'استفاده از فایل متنی' انتخاب شده، اما هیچ فایلی آپلود نشده است.")
             return None, None, "\n".join(status_messages)
         actual_text_input, msg = load_text_from_gr_file(text_file_obj)
         status_messages.append(msg)
+        if not actual_text_input: return None, None, "\n".join(status_messages)
     else:
         actual_text_input = text_to_speak_input
+        status_messages.append("⌨️ از متن وارد شده به صورت دستی استفاده می‌شود.")
     if not actual_text_input or actual_text_input.strip() == "":
+        status_messages.append("❌ خطا: متن ورودی خالی است.")
         return None, None, "\n".join(status_messages)
     try:
+        status_messages.append("🛠️ در حال مقداردهی اولیه کلاینت جمینای...")
+        progress(0.1, desc="اتصال به جمینای...")
         client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
+        status_messages.append("✅ کلاینت جمینای با موفقیت ایجاد شد.")
     except Exception as e:
+        status_messages.append(f"❌ خطا در ایجاد کلاینت جمینای: {e}")
         return None, None, "\n".join(status_messages)
     text_chunks = smart_text_split(actual_text_input, int(max_chunk_slider))
+    status_messages.append(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.")
+    for i, chunk_text_content in enumerate(text_chunks):
+        status_messages.append(f"  📝 قطعه {i+1}: {len(chunk_text_content)} کاراکتر")
     generated_audio_files = []
     run_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
     temp_output_dir = f"temp_audio_{run_id}"
     os.makedirs(temp_output_dir, exist_ok=True)
+    output_base_name_safe = re.sub(r'[\s\\\/\:\*\?\"\<\>\|\%]+', '_', output_filename_base_input)
     total_chunks = len(text_chunks)
     for i, chunk_text_content in enumerate(text_chunks):
+        progress_val = 0.1 + (0.7 * (i / total_chunks))
+        progress(progress_val, desc=f"در حال تولید قطعه {i+1} از {total_chunks}...")
+        status_messages.append(f"\n🔊 در حال تولید صدا برای قطعه {i+1}/{total_chunks}...")
         final_text_for_api = f'"{speech_prompt_input}"\n{chunk_text_content}' if speech_prompt_input.strip() else chunk_text_content
         contents_for_api = [types.Content(role="user", parts=[types.Part.from_text(text=final_text_for_api)])]
             response_modalities=["audio"],
             speech_config=types.SpeechConfig(
                 voice_config=types.VoiceConfig(
+                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=speaker_dropdown) # گوینده از ورودی
                 )
             )
         )
         try:
             chunk_filename_base = f"{output_base_name_safe}_part_{i+1:03d}"
             chunk_filepath_prefix = os.path.join(temp_output_dir, chunk_filename_base)
             audio_data_received = False
             for stream_response_chunk in client.models.generate_content_stream(
+                model=model_dropdown_key, contents=contents_for_api, config=generate_content_config,
             ):
                 if (stream_response_chunk.candidates and stream_response_chunk.candidates[0].content and
                     stream_response_chunk.candidates[0].content.parts and
                     data_buffer = inline_data.data
                     api_mime_type = inline_data.mime_type
                     audio_data_received = True
+                    status_messages.append(f"ℹ️ MIME Type دریافتی از API: {api_mime_type}")
+                    file_extension = ".wav"
                     if api_mime_type and ("mp3" in api_mime_type.lower() or "mpeg" in api_mime_type.lower()):
                         file_extension = ".mp3"
+                        status_messages.append(f"ℹ️ ذخیره با فرمت MP3 بر اساس MIME Type: {api_mime_type}")
                     elif api_mime_type and "wav" in api_mime_type.lower() and \
                          not ("audio/l16" in api_mime_type.lower() or "audio/l24" in api_mime_type.lower()):
                         file_extension = ".wav"
+                        status_messages.append(f"ℹ️ ذخیره با فرمت WAV بر اساس MIME Type: {api_mime_type}")
+                    else:
                         file_extension = ".wav"
+                        status_messages.append(f"ℹ️ تبدیل به فرمت WAV برای MIME Type: {api_mime_type or 'نامشخص'}")
                         data_buffer = convert_to_wav(data_buffer, api_mime_type)
+                    status_messages.append(f"ℹ️ پسوند فایل نهایی: {file_extension}")
                     generated_file_path = save_binary_file(f"{chunk_filepath_prefix}{file_extension}", data_buffer)
                     if generated_file_path:
                         generated_audio_files.append(generated_file_path)
+                        status_messages.append(f"✅ قطعه {i+1} ذخیره شد: {os.path.basename(generated_file_path)}")
                     else:
+                        status_messages.append(f"❌ عدم موفقیت در ذخیره قطعه {i+1}.")
+                    break
                 elif stream_response_chunk.text:
+                     status_messages.append(f"ℹ️ پیام متنی از API (حین استریم): {stream_response_chunk.text}")
             if not audio_data_received:
+                status_messages.append(f"❌ هیچ داده صوتی برای قطعه {i+1} دریافت نشد.")
                 if stream_response_chunk and stream_response_chunk.prompt_feedback and stream_response_chunk.prompt_feedback.block_reason:
+                    status_messages.append(f"🛑 دلیل مسدود شدن توسط API: {stream_response_chunk.prompt_feedback.block_reason_message or stream_response_chunk.prompt_feedback.block_reason}")
         except types.BlockedPromptException as bpe:
+            status_messages.append(f"❌ محتوای قطعه {i+1} توسط API مسدود شد: {bpe}")
+            status_messages.append(f"  بازخورد API: {bpe.response.prompt_feedback}")
         except types.StopCandidateException as sce:
+            status_messages.append(f"❌ تولید صدا برای قطعه {i+1} متوقف شد: {sce}")
+            status_messages.append(f"  بازخورد API: {sce.response.prompt_feedback}")
         except Exception as e:
+            status_messages.append(f"❌ خطا در تولید/پردازش قطعه {i+1}: {type(e).__name__} - {e}")
+            status_messages.append(traceback.format_exc())
+            continue
+        if i < total_chunks - 1 and float(sleep_slider) > 0 :
+            status_messages.append(f"⏱️ انتظار به مدت {sleep_slider} ثانیه...")
             time.sleep(float(sleep_slider))
+    progress(0.85, desc="پردازش فایل‌های نهایی...")
     if not generated_audio_files:
+        status_messages.append("❌ هیچ فایل صوتی با موفقیت تولید یا ذخیره نشد!")
         final_status = "\n".join(status_messages)
         print(final_status)
+        progress(1, desc="پایان با خطا.")
         return None, None, final_status
+    status_messages.append(f"\n🎉 {len(generated_audio_files)} فایل(های) صوتی تولید شد!")
+    output_audio_path_for_player = None
+    output_path_for_download = None
+    if merge_checkbox and len(generated_audio_files) > 1 and PYDUB_AVAILABLE:
+        status_messages.append(f"🔗 در حال ادغام {len(generated_audio_files)} فایل صوتی...")
+        merged_filename_path = os.path.join(temp_output_dir, f"{output_base_name_safe}_merged.wav")
+        success_merge, msg_merge, merged_p = merge_audio_files_func(generated_audio_files, merged_filename_path)
+        status_messages.append(msg_merge)
+        if success_merge:
+            output_audio_path_for_player = merged_p
+            output_path_for_download = merged_p
+            if delete_partials_checkbox:
+                status_messages.append("🗑️ در حال حذف فایل‌های جزئی...")
+                for file_p in generated_audio_files:
+                    try: os.remove(file_p); status_messages.append(f"  🗑️ حذف شد: {os.path.basename(file_p)}")
+                    except Exception as e_del: status_messages.append(f"  ⚠️ عدم موفقیت در حذف {os.path.basename(file_p)}: {e_del}")
+        else:
+            status_messages.append("⚠️ ادغام ناموفق بود. فایل ZIP از قطعات ارائه می‌شود.")
             success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
             status_messages.append(msg_zip)
             if success_zip: output_path_for_download = zip_p
     elif len(generated_audio_files) == 1:
         single_file_path = generated_audio_files[0]
+        output_audio_path_for_player = single_file_path
         output_path_for_download = single_file_path
+        status_messages.append(f"🎵 فایل صوتی تکی: {os.path.basename(single_file_path)}")
+    elif len(generated_audio_files) > 1: # No merge or pydub not available
+        if not PYDUB_AVAILABLE and merge_checkbox:
+            status_messages.append("⚠️ pydub در دسترس نیست، امکان ادغام وجود ندارد. فایل ZIP ارائه می‌شود.")
+        status_messages.append("📦 چندین قطعه تولید شد. در حال ایجاد فایل ZIP...")
         success_zip, msg_zip, zip_p = create_zip_file(generated_audio_files, os.path.join(temp_output_dir, f"{output_base_name_safe}_all_parts.zip"))
         status_messages.append(msg_zip)
         if success_zip: output_path_for_download = zip_p
     final_status = "\n".join(status_messages)
     print(final_status)
+    print(f"DEBUG مسیر فایل برای پخش کننده: {output_audio_path_for_player}")
+    print(f"DEBUG مسیر فایل برای دانلود: {output_path_for_download}")
+    progress(1, desc="انجام شد!")
     return output_audio_path_for_player, output_path_for_download, final_status
+# --- تعریف رابط کاربری Gradio ---
+with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky), title="تبدیل متن به گفتار با Gemini") as demo:
+    gr.Markdown("# 🎵 تبدیل متن به گفتار با Gemini API 🗣️", elem_id="app-title")
     if not HF_GEMINI_API_KEY:
         gr.Warning(
+            "کلید API جمینای (GEMINI_API_KEY) در Hugging Face Secrets یافت نشد. "
+            "لطفاً آن را در بخش 'Settings' > 'Secrets' این Space با نام `GEMINI_API_KEY` اضافه کنید تا برنامه کار کند."
         )
     else:
+        gr.Info("کلید API جمینای با موفقیت از Secrets بارگذاری شد. آماده تولید صدا!")
     gr.Markdown(
+        "این ابزار متن شما را با استفاده از API قدرتمند Gemini گوگل به گفتار تبدیل می‌کند. "
+        "برای استفاده، باید کلید API جمینای خود را در بخش Secrets این Space تنظیم کرده باشید."
+        "\n\nمی‌توانید کلید API خود را از [استودیوی هوش مصنوعی گوگل (Google AI Studio)](https://aistudio.google.com/app/apikey) دریافت کنید."
     )
     with gr.Row():
+        with gr.Column(scale=3): # ستون ورودی متن
+            gr.Markdown("### ۱. متن ورودی")
+            use_file = gr.Checkbox(label="📁 استفاده از فایل متنی (.txt) به جای ورود دستی", value=False)
             text_file = gr.File(
+                label="آپلود فایل متنی",
                 file_types=['.txt'],
+                visible=False
             )
             text_to_speak = gr.Textbox(
+                label="📝 متنی که می‌خواهید به گفتار تبدیل شود:",
                 lines=10,
+                placeholder="متن خود را در اینجا وارد کنید یا فایل متنی را در بالا آپلود نمایید...",
+                visible=True,
+                text_align="right" # برای متون فارسی
             )
             use_file.change(
                 lambda x: (gr.update(visible=x), gr.update(visible=not x)),
                 [text_file, text_to_speak]
             )
             speech_prompt = gr.Textbox(
+                label="🗣️ فرمان سبک گفتار (اختیاری)",
+                placeholder="مثال: «با لحنی دوستانه و پرانرژی، مانند یک مجری پادکست صحبت کن»",
+                info="این فرمان به تنظیم سبک، احساسات و ویژگی‌های صدای خروجی کمک می‌کند.",
+                text_align="right"
             )
+        with gr.Column(scale=2): # ستون تنظیمات
+            gr.Markdown("### ۲. تنظیمات تولید صدا")
+            # تبدیل دیکشنری نام‌های فارسی به لیست تاپل‌ها برای Dropdown
+            model_choices_farsi = [(MODEL_NAMES_FARSI[key], key) for key in MODELS]
+            model_name_dropdown = gr.Dropdown(
+                choices=model_choices_farsi, # نمایش نام فارسی، ارسال کلید انگلیسی
+                label="🤖 انتخاب مدل Gemini",
+                value=MODELS[0] # مقدار پیش‌فرض کلید انگلیسی
             )
+            speaker_choices_farsi = [(SPEAKER_VOICES_FARSI_SAMPLE.get(v, v), v) for v in SPEAKER_VOICES]
+            speaker_voice_dropdown = gr.Dropdown(
+                choices=speaker_choices_farsi,
+                label="🎤 انتخاب گوینده",
+                value="Charon"
             )
+            temperature_slider = gr.Slider(
+                minimum=0.0, maximum=1.0, step=0.05, value=0.7,
+                label="🌡️ دمای مدل (Temperature)",
+                info="میزان خلاقیت و تنوع در خروجی (0.0 تا 1.0). مقادیر بالاتر = تنوع بیشتر."
             )
+            max_chunk_size_slider = gr.Slider(
                 minimum=1000, maximum=4000, step=100, value=3800,
+                label="🧩 حداکثر کاراکتر در هر قطعه",
+                info="متن برای ارسال به API به قطعات کوچکتر تقسیم می‌شود."
             )
+            sleep_between_requests_slider = gr.Slider(
+                minimum=0, maximum=15, step=0.5, value=1, # کاهش مقدار پیش‌فرض برای سرعت بیشتر
+                label="⏱️ تاخیر بین درخواست‌ها (ثانیه)",
+                info="برای مدیریت محدودیت‌های API (مثلاً Gemini Flash دارای محدودیت ۶۰ درخواست در دقیقه است)."
             )
+            output_filename_base_input = gr.Textbox(
+                label="💾 نام پایه فایل خروجی", value="gemini_tts_farsi"
             )
+            with gr.Group(elem_id="merge-options"):
+                gr.Markdown("تنظیمات ادغام (در صورت تولید بیش از یک قطعه):")
+                merge_audio_checkbox = gr.Checkbox(label="🔗 ادغام قطعات صوتی", value=True, visible=PYDUB_AVAILABLE)
+                delete_partials_checkbox = gr.Checkbox(label="🗑️ حذف قطعات پس از ادغام", value=True, visible=PYDUB_AVAILABLE and True) # نمایش اگر ادغام فعال و pydub موجود باشد
+                if PYDUB_AVAILABLE:
+                     merge_audio_checkbox.change(lambda x: gr.update(visible=x), [merge_audio_checkbox], [delete_partials_checkbox])
+                else:
+                    gr.Markdown("<p style='color:orange; font-size:small;'>⚠️ قابلیت ادغام فایل‌ها به دلیل عدم دسترسی به کتابخانه `pydub` غیرفعال است.</p>")
+    submit_button = gr.Button("✨ تولید فایل صوتی ✨", variant="primary", elem_id="submit-button-main")
+    gr.Markdown("### ۳. خروجی")
     with gr.Row():
         with gr.Column(scale=1):
+            output_audio_player_component = gr.Audio(label="🎧 فایل صوتی تولید شده (قابل پخش)", type="filepath")
         with gr.Column(scale=1):
+            output_file_download_component = gr.File(label="📥 دانلود فایل خروجی (صوتی یا ZIP)", type="filepath")
+    status_textbox_component = gr.Textbox(label="📊 گزارش وضعیت و پیام‌ها", lines=10, interactive=False, max_lines=20, text_align="right")
     submit_button.click(
         fn=generate_audio_for_gradio,
         inputs=[
             use_file, text_file, speech_prompt, text_to_speak,
+            max_chunk_size_slider, sleep_between_requests_slider, temperature_slider,
+            model_name_dropdown, speaker_voice_dropdown, output_filename_base_input,
+            merge_audio_checkbox, delete_partials_checkbox
         ],
+        outputs=[output_audio_player_component, output_file_download_component, status_textbox_component]
     )
     gr.Markdown("---")
+    # اطلاعات سازنده
+    encoded_text_creator = "Q3JlYXRlZCBieSA6IEhhbWVkNzQ0IChBSUdPTERFTikgZm9yIEh1Z2dpbmcgRmFjZSBTcGFjZXMu"
+    # "Created by : Hamed744 (AIGOLDEN) for Hugging Face Spaces."
     try:
+        decoded_text_creator = base64.b64decode(encoded_text_creator.encode('utf-8')).decode('utf-8')
+        gr.Markdown(f"<p style='text-align:center; font-size:small; color:grey;'><em>{decoded_text_creator}</em></p>")
+    except Exception: pass
     gr.Examples(
+        label="چند مثال برای شروع:",
         examples=[
+            [False, None, "یک راوی مهربان و آموزنده.", "سلام دنیا! این یک آزمایش برای تبدیل متن به گفتار با جمینای در گریدیا است. امیدوارم خوب کار کند!", 3800, 1, 0.7, MODELS[0], "Charon", "hello_farsi", True, True],
+            [False, None, "یک گزارشگر خبری هیجان‌زده.", "خبر فوری! هوش مصنوعی اکنون می‌تواند گفتاری شبیه به انسان با وضوح باورنکردنی تولید کند. این فناوری به سر��ت در حال پیشرفت است!", 3000, 1, 0.8, MODELS[1], "Achernar", "news_farsi", True, True],
         ],
+        fn=generate_audio_for_gradio,
+        inputs=[
             use_file, text_file, speech_prompt, text_to_speak,
+            max_chunk_size_slider, sleep_between_requests_slider, temperature_slider,
+            model_name_dropdown, speaker_voice_dropdown, output_filename_base_input,
+            merge_audio_checkbox, delete_partials_checkbox
         ],
+        outputs=[output_audio_player_component, output_file_download_component, status_textbox_component],
+        cache_examples=False # چون با API کار می‌کند و ورودی‌ها داینامیک هستند
     )
+# اجرای برنامه در صورت اجرای مستقیم فایل (برای تست محلی)
 if __name__ == "__main__":
     if not PYDUB_AVAILABLE:
+        print("هشدار: کتابخانه pydub نصب نشده یا کار نمی‌کند. قابلیت ادغام فایل‌های صوتی غیرفعال خواهد بود.")
     if not HF_GEMINI_API_KEY:
+        print("هشدار: متغیر محیطی GEMINI_API_KEY تنظیم نشده است. اگر برنامه برای کلید API به آن متکی باشد، ممکن است در حالت محلی کار نکند.")
+    demo.launch(debug=True, share=False) # share=False برای اجرای محلی، هاگینگ فیس لینک عمومی را مدیریت می‌کند