Spaces:

Hamed744
/

Ttspro

Running

App Files Files Community

Hamed744 commited on Jun 5

Commit

e4bf7cb

verified ·

1 Parent(s): e4d0150

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -177

app.py CHANGED Viewed

@@ -9,13 +9,14 @@ import zipfile
 from google import genai
 from google.genai import types
 try:
     from pydub import AudioSegment
     PYDUB_AVAILABLE = True
 except ImportError:
     PYDUB_AVAILABLE = False
-# --- Constants ---
 SPEAKER_VOICES = [
     "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
     "Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
@@ -25,7 +26,7 @@ SPEAKER_VOICES = [
 ]
 MODEL_NAMES = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
-# --- Helper functions (Adapted for Gradio logging) ---
 def save_binary_file(file_name, data, log_messages_list):
     try:
@@ -57,7 +58,7 @@ def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
 def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
     bits_per_sample = 16
-    rate = 24000
     parts = mime_type.split(";")
     for param in parts:
         param = param.strip()
@@ -67,7 +68,7 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
                 rate = int(rate_str)
             except (ValueError, IndexError):
                 pass
-        elif param.startswith("audio/L"):
             try:
                 bits_per_sample = int(param.split("L", 1)[1])
             except (ValueError, IndexError):
@@ -79,31 +80,40 @@ def smart_text_split(text, max_size=3800):
         return [text]
     chunks = []
     current_chunk = ""
-    sentences = re.split(r'(?<=[.!?])\s+', text)
     for sentence in sentences:
-        if len(current_chunk) + len(sentence) + 1 > max_size:
-            if current_chunk:
                 chunks.append(current_chunk.strip())
             current_chunk = sentence
-            # If a single sentence is too long, split it by words/chars
-            if len(current_chunk) > max_size:
-                words = current_chunk.split()
-                temp_word_chunk = ""
-                for word in words:
-                    if len(temp_word_chunk) + len(word) + 1 > max_size:
-                        if temp_word_chunk: chunks.append(temp_word_chunk.strip())
-                        temp_word_chunk = word
-                        while len(temp_word_chunk) > max_size: # Force split very long words
-                            chunks.append(temp_word_chunk[:max_size])
-                            temp_word_chunk = temp_word_chunk[max_size:]
-                    else:
-                        temp_word_chunk += (" " if temp_word_chunk else "") + word
-                if temp_word_chunk: chunks.append(temp_word_chunk.strip())
-                current_chunk = "" # Reset current_chunk as it was processed
         else:
             current_chunk += (" " if current_chunk else "") + sentence
-    if current_chunk:
         chunks.append(current_chunk.strip())
     return [c for c in chunks if c] # Ensure no empty chunks
 def merge_audio_files_func(file_paths, output_path, log_messages_list):
@@ -115,11 +125,11 @@ def merge_audio_files_func(file_paths, output_path, log_messages_list):
         combined = AudioSegment.empty()
         for i, file_path in enumerate(file_paths):
             if os.path.exists(file_path):
-                log_messages_list.append(f"📎 اضافه کردن فایل {i+1}: {file_path}")
-                audio = AudioSegment.from_file(file_path)
                 combined += audio
                 if i < len(file_paths) - 1: # Add short silence between segments
-                    combined += AudioSegment.silent(duration=100) # 100ms silence
             else:
                 log_messages_list.append(f"⚠️ فایل پیدا نشد: {file_path}")
         combined.export(output_path, format="wav")
@@ -141,7 +151,7 @@ def create_zip_file(file_paths, zip_name, log_messages_list):
         log_messages_list.append(f"❌ خطا در ایجاد فایل ZIP: {e}")
         return False
-# --- Main generation function (Adapted for Gradio) ---
 def core_generate_audio(
     text_input, prompt_input, selected_voice, output_base_name,
     model, temperature_val,
@@ -150,46 +160,40 @@ def core_generate_audio(
 ):
     log_messages_list.append("🚀 شروع فرآیند تبدیل متن به گفتار...")
-    # API Key Retrieval
     api_key = os.environ.get("GEMINI_API_KEY")
     if not api_key:
         log_messages_list.append("❌ خطا: کلید API جمینای (GEMINI_API_KEY) در Secrets این Space تنظیم نشده است.")
         log_messages_list.append("لطفاً به تنظیمات Space رفته و یک Secret با نام GEMINI_API_KEY و مقدار کلید خود ایجاد کنید.")
         return None, None # No audio path, no download path
-    # Initialize GenAI Client
     try:
         log_messages_list.append("🛠️ در حال ایجاد کلاینت جمینای...")
-        # os.environ["GEMINI_API_KEY"] = api_key # Already set if from secrets
-        client = genai.Client(api_key=api_key) # Pass api_key directly
         log_messages_list.append("✅ کلاینت جمینای با موفقیت ایجاد شد.")
     except Exception as e:
         log_messages_list.append(f"❌ خطا در ایجاد کلاینت جمینای: {e}")
         log_messages_list.append("لطفاً از صحت کلید API خود اطمینان حاصل کنید.")
         return None, None
-    # Validate Text Input (already done in wrapper, but good to double check)
     if not text_input or text_input.strip() == "":
         log_messages_list.append("❌ خطا: متن ورودی برای تبدیل به گفتار خالی است.")
         return None, None
-    # Split text into chunks
     text_chunks = smart_text_split(text_input, max_chunk)
     log_messages_list.append(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.")
     for i, chunk in enumerate(text_chunks):
         log_messages_list.append(f"📝 قطعه {i+1}: {len(chunk)} کاراکتر")
-        if len(chunk) == 0: # Safety check from smart_text_split
-            log_messages_list.append(f"⚠️ هشدار: قطعه {i+1} خالی است و نادیده گرفته می‌شود.")
-    text_chunks = [c for c in text_chunks if c] # Filter out empty chunks again
     if not text_chunks:
         log_messages_list.append("❌ خطا: پس از تقسیم‌بندی، هیچ قطعه متنی برای پردازش وجود ندارد.")
         return None, None
     generated_files = []
-    # Ensure output directory exists (optional, can write to current dir)
-    # output_dir = "outputs"
-    # os.makedirs(output_dir, exist_ok=True)
     for i, chunk in enumerate(text_chunks):
         log_messages_list.append(f"\n🔊 تولید صدا برای قطعه {i+1}/{len(text_chunks)}...")
@@ -207,12 +211,9 @@ def core_generate_audio(
         )
         current_chunk_filename_base = f"{output_base_name}_part{i+1:03d}"
-        # current_chunk_filename_base = os.path.join(output_dir, f"{output_base_name}_part{i+1:03d}")
         try:
-            # Using generate_content, not stream, for simplicity with single audio part expected
-            response = client.models.generate_content(
                 model=model,
                 contents=contents,
                 config=generate_content_config,
@@ -224,50 +225,51 @@ def core_generate_audio(
                 inline_data = response.candidates[0].content.parts[0].inline_data
                 data_buffer = inline_data.data
                 file_extension = mimetypes.guess_extension(inline_data.mime_type)
-                if file_extension is None or "binary" in inline_data.mime_type: # Fallback for generic mime types
                     file_extension = ".wav"
-                    # Assuming Gemini TTS API now more consistently returns audio/* mimetypes
-                    # but if it's audio/L16; rate=24000, convert_to_wav is needed
-                    if "audio/L" in inline_data.mime_type: # Needs WAV header
                          data_buffer = convert_to_wav(data_buffer, inline_data.mime_type)
-                    # If it's already audio/wav or audio/mpeg, it might be fine.
-                    # Forcing .wav for consistency as pydub handles WAV well.
-                    # If Gemini sends actual WAV, convert_to_wav might not be strictly needed
-                    # but better safe than sorry if mime is generic.
                 generated_file_path = save_binary_file(f"{current_chunk_filename_base}{file_extension}", data_buffer, log_messages_list)
                 if generated_file_path:
                     generated_files.append(generated_file_path)
-                    log_messages_list.append(f"✅ قطعه {i+1} تولید شد: {generated_file_path}")
-            elif response.text: # If API returns text (e.g. error or info)
                  log_messages_list.append(f"ℹ️ پیام متنی از API برای قطعه {i+1}: {response.text}")
-                 if "rate limit" in response.text.lower():
-                    log_messages_list.append(f"⏳ به نظر میرسد به محدودیت تعداد درخواست API رسیده‌اید. لطفاً چند دقیقه صبر کنید و دوباره امتحان کنید، یا فاصله زمانی بین درخواست‌ها را افزایش دهید.")
-            else: # No audio, no text
-                log_messages_list.append(f"⚠️ پاسخ API برای قطعه {i+1} حاوی داده صوتی یا پیام متنی نبود.")
         except types.generation_types.BlockedPromptException as bpe:
             log_messages_list.append(f"❌ محتوای پرامپت برای قطعه {i+1} مسدود شد: {bpe}")
             log_messages_list.append("لطفاً متن ورودی یا پرامپت سبک گفتار را بررسی و اصلاح کنید.")
-            continue # Skip to next chunk
         except types.generation_types.StopCandidateException as sce:
             log_messages_list.append(f"❌ تولید محتوا برای قطعه {i+1} به دلیل نامشخصی متوقف شد: {sce}")
             continue
         except Exception as e:
             log_messages_list.append(f"❌ خطا در تولید قطعه {i+1}: {e}")
-            # Specific check for common API errors
             if "API key not valid" in str(e):
                 log_messages_list.append("خطای کلید API. لطفاً از معتبر بودن کلید و تنظیم صحیح آن در Secrets مطمئن شوید.")
             elif "resource has been exhausted" in str(e).lower() or "quota" in str(e).lower():
                  log_messages_list.append("به نظر میرسد محدودیت استفاده از API (Quota) شما تمام شده است.")
-            continue # Skip to next chunk
-        if i < len(text_chunks) - 1 and len(text_chunks) > 1 : # Only sleep if there are more chunks
             log_messages_list.append(f"⏱️ انتظار {sleep_time} ثانیه...")
             time.sleep(sleep_time)
@@ -282,79 +284,74 @@ def core_generate_audio(
     if merge_files and len(generated_files) > 1:
         if not PYDUB_AVAILABLE:
-            log_messages_list.append("⚠️ pydub برای ادغام در دسترس نیست. فایل‌ها به صورت جداگانه ارائه می‌شوند.")
-            # Offer zip of parts if pydub not available for merging
             zip_filename = f"{output_base_name}_all_parts.zip"
             if create_zip_file(generated_files, zip_filename, log_messages_list):
                 download_file = zip_filename
-            playback_file = generated_files[0] # Play first part
         else:
             merged_filename = f"{output_base_name}_merged.wav"
-            # merged_filename = os.path.join(output_dir, f"{output_base_name}_merged.wav")
             if merge_audio_files_func(generated_files, merged_filename, log_messages_list):
                 playback_file = merged_filename
                 download_file = merged_filename
-                log_messages_list.append(f"🎵 فایل نهایی ادغام شده: {merged_filename}")
                 if delete_partials:
                     for file_path in generated_files:
                         try:
-                            if file_path != merged_filename: # Don't delete the merged file itself if it was in generated_files
                                 os.remove(file_path)
-                                log_messages_list.append(f"🗑️ فایل جزئی حذف شد: {file_path}")
                         except Exception as e:
-                            log_messages_list.append(f"⚠️ خطا در حذف فایل جزئی {file_path}: {e}")
             else:
-                log_messages_list.append("⚠️ ادغام ممکن نبود. فایل‌های جداگانه حفظ شدند.")
-                # Fallback to zip if merging failed
                 zip_filename = f"{output_base_name}_all_parts.zip"
-                # zip_filename = os.path.join(output_dir, f"{output_base_name}_all_parts.zip")
                 if create_zip_file(generated_files, zip_filename, log_messages_list):
                     download_file = zip_filename
-                playback_file = generated_files[0] # Play first part
     elif len(generated_files) == 1:
         playback_file = generated_files[0]
         download_file = generated_files[0]
-    else: # Multiple files, no merge requested
         zip_filename = f"{output_base_name}_all_parts.zip"
-        # zip_filename = os.path.join(output_dir, f"{output_base_name}_all_parts.zip")
         if create_zip_file(generated_files, zip_filename, log_messages_list):
             download_file = zip_filename
-        playback_file = generated_files[0] # Play first part
     if playback_file and not os.path.exists(playback_file):
-        log_messages_list.append(f"⚠️ فایل پخش {playback_file} وجود ندارد!")
         playback_file = None
     if download_file and not os.path.exists(download_file):
-        log_messages_list.append(f"⚠️ فایل دانلود {download_file} وجود ندارد!")
         download_file = None
     return playback_file, download_file
-# --- Gradio Interface Function ---
 def gradio_tts_interface(
     use_file_input, uploaded_file, text_to_speak,
-    speech_prompt, speaker_voice, output_filename_base,
     model_name, temperature,
     max_chunk_size, sleep_between_requests,
-    merge_audio_files, delete_partial_files,
-    progress=gr.Progress(track_tqdm=True)
 ):
-    log_messages = [] # Initialize list for logs for this run
-    # Determine actual text input
     actual_text_input = ""
     if use_file_input:
         if uploaded_file is not None:
             try:
                 with open(uploaded_file.name, 'r', encoding='utf-8') as f:
                     actual_text_input = f.read().strip()
                 log_messages.append(f"✅ متن از فایل '{os.path.basename(uploaded_file.name)}' بارگذاری شد: {len(actual_text_input)} کاراکتر.")
                 log_messages.append(f"📝 نمونه متن فایل: '{actual_text_input[:100]}{'...' if len(actual_text_input) > 100 else ''}'")
                 if not actual_text_input:
-                    log_messages.append("❌ خطا: فایل آپلود شده خالی است.")
                     return None, None, "\n".join(log_messages)
             except Exception as e:
                 log_messages.append(f"❌ خطا در خواندن فایل آپلود شده: {e}")
@@ -370,130 +367,148 @@ def gradio_tts_interface(
         log_messages.append(f"📖 متن ورودی دستی: {len(actual_text_input)} کاراکتر")
         log_messages.append(f"📝 نمونه متن ورودی: '{actual_text_input[:100]}{'...' if len(actual_text_input) > 100 else ''}'")
     if not PYDUB_AVAILABLE:
         log_messages.append("⚠️ کتابخانه pydub در دسترس نیست. امکان ادغام فایل‌های صوتی وجود نخواهد داشت و فایل‌های صوتی به صورت جداگانه (در صورت وجود چند بخش) در یک فایل ZIP ارائه می‌شوند.")
-        merge_audio_files = False # Force disable merge if pydub is not available
-    # Call the core generation logic
     playback_path, download_path = core_generate_audio(
-        actual_text_input,
-        speech_prompt,
-        speaker_voice,
-        output_filename_base if output_filename_base else "gemini_tts_output",
-        model_name,
-        temperature,
-        max_chunk_size,
-        sleep_between_requests,
-        merge_audio_files,
-        delete_partial_files,
-        log_messages # Pass the list
     )
-    log_output = "\n".join(log_messages)
-    # Ensure paths are valid before returning
     valid_playback_path = playback_path if playback_path and os.path.exists(playback_path) else None
     valid_download_path = download_path if download_path and os.path.exists(download_path) else None
-    if not valid_playback_path and not valid_download_path and not actual_text_input:
-         # Avoid error message if it was just an empty input from the start
-        pass
-    elif not valid_playback_path and not valid_download_path :
-        log_output += "\n🛑 هیچ فایل صوتی برای پخش یا دانلود در دسترس نیست."
-    return valid_playback_path, valid_download_path, log_output
-# --- Gradio UI Definition ---
 css = """
-body { font-family: 'Arial', sans-serif; }
-.gradio-container { max-width: 800px !important; margin: auto !important; }
 footer { display: none !important; }
-.gr-button { background-color: #007bff !important; color: white !important; }
-.gr-button:hover { background-color: #0056b3 !important; }
-#output_audio .gallery { display: none !important; } /* Hide gallery view for audio if it appears */
-#download_file_output .gallery { display: none !important; } /* Hide gallery view for file if it appears */
 """
-with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🔊 تبدیل متن به گفتار با Gemini API")
-    gr.Markdown("ساخته شده بر اساس کد کولب توسط: [aigolden](https://github.com/aigolden)") # Assuming aigolden is a GitHub user or similar
-    gr.Markdown("---")
-    gr.Markdown(
-        "**راهنما:** برای استفاده از این ابزار، ابتدا باید کلید API جمینای خود را در بخش **Secrets** این Space در Hugging Face اضافه کنید.\n"
-        "1. به صفحه Space خود بروید.\n"
-        "2. روی 'Settings' کلیک کنید.\n"
-        "3. در بخش 'Repository secrets'، روی 'New secret' کلیک کنید.\n"
-        "4. در فیلد 'Name'، عبارت `GEMINI_API_KEY` را وارد کنید.\n"
-        "5. در فیلد 'Value'، کلید API جمینای خود را وارد کنید و 'Save secret' را بزنید.\n"
-        "پس از تنظیم Secret، می‌توانید از این ابزار استفاده کنید."
-    )
-    gr.Markdown("---")
     with gr.Row():
-        with gr.Column(scale=2):
-            gr.Markdown("###  تنظیمات ورودی و پرامپت")
-            use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی ورودی", value=False)
-            # Conditional visibility for text_to_speak_tb vs uploaded_file_input
-            # Gradio handles this by just having both and user interaction defines which is used via the wrapper
-            uploaded_file_input = gr.File(label="📂 آپلود فایل متنی (فقط شامل متن اصلی)", file_types=['.txt'])
             text_to_speak_tb = gr.Textbox(
-                label="📝 متن ورودی (اگر گزینه فایل فعال نیست)",
                 placeholder="متن مورد نظر برای تبدیل به گفتار را اینجا وارد کنید...",
-                lines=7,
-                value="سلام دنیا! این یک آزمایش برای تبدیل متن به گفتار با استفاده از مدل جمینای است."
             )
             speech_prompt_tb = gr.Textbox(
-                label="🗣️ پرامپت برای تنظیم سبک گفتار",
-                placeholder="مثال: از زبان یک یوتوبر پر انرژی و حرفه ای",
-                value="به زبان یک گوینده حرفه‌ای و آرام صحبت کن."
             )
-        with gr.Column(scale=1):
-            gr.Markdown("### تنظیمات مدل و خروجی")
             model_name_dd = gr.Dropdown(
-                MODEL_NAMES, label="🤖 انتخاب مدل", value="gemini-2.5-flash-preview-tts"
             )
             speaker_voice_dd = gr.Dropdown(
                 SPEAKER_VOICES, label="🎤 انتخاب گوینده", value="Charon"
             )
             temperature_slider = gr.Slider(
-                minimum=0, maximum=2, step=0.05, value=1.0, label="🌡️ دمای مدل (تنوع خروجی)"
-            )
             output_filename_base_tb = gr.Textbox(
-                label="📛 نام پایه فایل خروجی (بدون پسوند)", value="gemini_tts_output"
             )
-    gr.Markdown("---")
-    gr.Markdown("### تنظیمات پیشرفته")
-    with gr.Row():
-        max_chunk_size_slider = gr.Slider(
-            minimum=2000, maximum=4000, step=100, value=3800, label="📏 حداکثر کاراکتر در هر قطعه"
-        )
-        sleep_between_requests_slider = gr.Slider(
-            minimum=5, maximum=20, step=0.5, value=14, label="⏱️ فاصله زمانی بین درخواست‌ها (ثانیه)"
-        ) # Increased min sleep a bit
-    with gr.Row():
-        merge_audio_files_cb = gr.Checkbox(label="🔗 ادغام فایل‌های صوتی در یک فایل", value=True)
-        delete_partial_files_cb = gr.Checkbox(label="🗑️ حذف فایل‌های جزئی پس از ادغام (اگر ادغام فعال باشد)", value=False)
-    gr.Markdown("---")
-    generate_button = gr.Button("🎙️ تولید صدا", variant="primary")
-    gr.Markdown("---")
     gr.Markdown("### 🎧 خروجی صوتی و دانلود 📥")
     with gr.Row():
-        output_audio = gr.Audio(label="🔊 فایل صوتی تولید شده", elem_id="output_audio")
-        download_file_output = gr.File(label="💾 دانلود فایل نهایی (WAV یا ZIP)", elem_id="download_file_output")
-    gr.Markdown("### 📜 لاگ‌ها و پیام‌ها")
-    logs_output_tb = gr.Textbox(label=" ", lines=10, interactive=False, autoscroll=True)
-    # Connect button to function
     generate_button.click(
         fn=gradio_tts_interface,
         inputs=[
@@ -506,11 +521,11 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
         outputs=[output_audio, download_file_output, logs_output_tb]
     )
-    # Example texts
     gr.Examples(
         examples=[
-            [False, None, "سلام، این یک تست کوتاه است.", "یک صدای دوستانه و واضح.", "Charon", "gemini_tts_output", "gemini-2.5-flash-preview-tts", 0.9, 3800, 12, True, False],
-            [False, None, "به دنیای هوش مصنوعی خوش آمدید. امیدوارم از این ابزار لذت ببرید.", "با هیجان و انرژی صحبت کن.", "Zephyr", "ai_voice_test", "gemini-2.5-flash-preview-tts", 1.1, 3000, 10, True, True],
         ],
         inputs=[
             use_file_input_cb, uploaded_file_input, text_to_speak_tb,
@@ -519,18 +534,21 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
             max_chunk_size_slider, sleep_between_requests_slider,
             merge_audio_files_cb, delete_partial_files_cb
         ],
-        outputs=[output_audio, download_file_output, logs_output_tb], # outputs are optional for examples
-        fn=gradio_tts_interface, # function to call for examples
-        cache_examples=False # Set to True if you want to precompute and cache example outputs
     )
     gr.Markdown(
-        "<div style='text-align: center; margin-top: 20px; font-size: 0.9em; color: grey;'>"
-        "این ابزار از API شرکت Google Gemini برای تبدیل متن ب�� گفتار استفاده می‌کند. "
-        "لطفاً به محدودیت‌های استفاده و شرایط خدمات Gemini API توجه فرمایید."
         "</div>"
     )
 if __name__ == "__main__":
-    demo.launch(debug=True) # debug=True for local testing

 from google import genai
 from google.genai import types
+# تلاش برای ایمپورت pydub و تنظیم فلگ در دسترس بودن
 try:
     from pydub import AudioSegment
     PYDUB_AVAILABLE = True
 except ImportError:
     PYDUB_AVAILABLE = False
+# --- ثابت‌ها ---
 SPEAKER_VOICES = [
     "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
     "Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
 ]
 MODEL_NAMES = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
+# --- توابع کمکی (سازگار شده برای لاگ‌نویسی در Gradio) ---
 def save_binary_file(file_name, data, log_messages_list):
     try:
 def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
     bits_per_sample = 16
+    rate = 24000 # Default rate for Gemini TTS
     parts = mime_type.split(";")
     for param in parts:
         param = param.strip()
                 rate = int(rate_str)
             except (ValueError, IndexError):
                 pass
+        elif param.startswith("audio/L"): # e.g., audio/L16
             try:
                 bits_per_sample = int(param.split("L", 1)[1])
             except (ValueError, IndexError):
         return [text]
     chunks = []
     current_chunk = ""
+    # Split by sentences, keeping delimiters. Prioritize common Persian sentence enders.
+    sentences = re.split(r'(?<=[.!?؟])\s+', text)
     for sentence in sentences:
+        sentence_with_space = sentence + " " # Add potential space for length calculation
+        if len(current_chunk) + len(sentence_with_space) > max_size:
+            if current_chunk: # Add the current chunk if it's not empty
                 chunks.append(current_chunk.strip())
+            # Now, current_chunk becomes the new sentence.
+            # If this new sentence itself is too long, it needs to be split further.
             current_chunk = sentence
+            while len(current_chunk) > max_size:
+                # Find a good split point (e.g., comma, space) near max_size
+                # Fallback to hard split if no good point found
+                split_idx = -1
+                # Try splitting at Persian/English punctuation within the oversized chunk
+                possible_split_chars = ['،', ',', ';', ':', ' ']
+                for char_idx in range(max_size - 1, max_size // 2, -1): # Search backwards from max_size
+                    if current_chunk[char_idx] in possible_split_chars:
+                        split_idx = char_idx + 1
+                        break
+                if split_idx != -1:
+                    chunks.append(current_chunk[:split_idx].strip())
+                    current_chunk = current_chunk[split_idx:].strip()
+                else: # Hard split
+                    chunks.append(current_chunk[:max_size].strip())
+                    current_chunk = current_chunk[max_size:].strip()
         else:
             current_chunk += (" " if current_chunk else "") + sentence
+    if current_chunk: # Add any remaining part
         chunks.append(current_chunk.strip())
     return [c for c in chunks if c] # Ensure no empty chunks
 def merge_audio_files_func(file_paths, output_path, log_messages_list):
         combined = AudioSegment.empty()
         for i, file_path in enumerate(file_paths):
             if os.path.exists(file_path):
+                log_messages_list.append(f"📎 اضافه کردن فایل {i+1}: {os.path.basename(file_path)}")
+                audio = AudioSegment.from_file(file_path) # pydub usually infers format
                 combined += audio
                 if i < len(file_paths) - 1: # Add short silence between segments
+                    combined += AudioSegment.silent(duration=200) # 200ms silence
             else:
                 log_messages_list.append(f"⚠️ فایل پیدا نشد: {file_path}")
         combined.export(output_path, format="wav")
         log_messages_list.append(f"❌ خطا در ایجاد فایل ZIP: {e}")
         return False
+# --- تابع اصلی تولید (سازگار شده برای Gradio) ---
 def core_generate_audio(
     text_input, prompt_input, selected_voice, output_base_name,
     model, temperature_val,
 ):
     log_messages_list.append("🚀 شروع فرآیند تبدیل متن به گفتار...")
+    # دریافت کلید API
     api_key = os.environ.get("GEMINI_API_KEY")
     if not api_key:
         log_messages_list.append("❌ خطا: کلید API جمینای (GEMINI_API_KEY) در Secrets این Space تنظیم نشده است.")
         log_messages_list.append("لطفاً به تنظیمات Space رفته و یک Secret با نام GEMINI_API_KEY و مقدار کلید خود ایجاد کنید.")
         return None, None # No audio path, no download path
+    # مقداردهی اولیه کلاینت GenAI
     try:
         log_messages_list.append("🛠️ در حال ایجاد کلاینت جمینای...")
+        client = genai.Client(api_key=api_key)
         log_messages_list.append("✅ کلاینت جمینای با موفقیت ایجاد شد.")
     except Exception as e:
         log_messages_list.append(f"❌ خطا در ایجاد کلاینت جمینای: {e}")
         log_messages_list.append("لطفاً از صحت کلید API خود اطمینان حاصل کنید.")
         return None, None
     if not text_input or text_input.strip() == "":
         log_messages_list.append("❌ خطا: متن ورودی برای تبدیل به گفتار خالی است.")
         return None, None
     text_chunks = smart_text_split(text_input, max_chunk)
     log_messages_list.append(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.")
     for i, chunk in enumerate(text_chunks):
         log_messages_list.append(f"📝 قطعه {i+1}: {len(chunk)} کاراکتر")
+    text_chunks = [c for c in text_chunks if c] # فیلتر کردن قطعات خالی احتمالی
     if not text_chunks:
         log_messages_list.append("❌ خطا: پس از تقسیم‌بندی، هیچ قطعه متنی برای پردازش وجود ندارد.")
         return None, None
     generated_files = []
+    # نامگذاری فایل‌ها بدون مسیر اضافی برای سادگی در محیط Space
+    # فایل‌ها در ریشه فضای کاری Space ذخیره می‌شوند
     for i, chunk in enumerate(text_chunks):
         log_messages_list.append(f"\n🔊 تولید صدا برای قطعه {i+1}/{len(text_chunks)}...")
         )
         current_chunk_filename_base = f"{output_base_name}_part{i+1:03d}"
         try:
+            response = client.models.generate_content( # استفاده از generate_content برای سادگی
                 model=model,
                 contents=contents,
                 config=generate_content_config,
                 inline_data = response.candidates[0].content.parts[0].inline_data
                 data_buffer = inline_data.data
+                # حدس پسوند فایل بر اساس MIME type
                 file_extension = mimetypes.guess_extension(inline_data.mime_type)
+                # اگر پسوند قابل تشخیص نبود یا باینری عمومی بود، WAV را در نظر می‌گیریم
+                # و در صورت نیاز (مثلاً برای audio/L16) هدر WAV اضافه می‌کنیم
+                if file_extension is None or "binary" in inline_data.mime_type or file_extension == ".bin":
                     file_extension = ".wav"
+                    if "audio/L" in inline_data.mime_type: # نیاز به هدر WAV
                          data_buffer = convert_to_wav(data_buffer, inline_data.mime_type)
+                elif inline_data.mime_type == "audio/mpeg":
+                    file_extension = ".mp3" # اگر API مستقیما MP3 داد
+                elif inline_data.mime_type == "audio/wav":
+                    file_extension = ".wav" # اگر API مستقیما WAV داد
                 generated_file_path = save_binary_file(f"{current_chunk_filename_base}{file_extension}", data_buffer, log_messages_list)
                 if generated_file_path:
                     generated_files.append(generated_file_path)
+                    log_messages_list.append(f"✅ قطعه {i+1} تولید شد: {os.path.basename(generated_file_path)}")
+            elif response.text:
                  log_messages_list.append(f"ℹ️ پیام متنی از API برای قطعه {i+1}: {response.text}")
+                 if "rate limit" in response.text.lower() or "quota" in response.text.lower():
+                    log_messages_list.append(f"⏳ به نظر میرسد به محدودیت تعداد درخواست API (Quota) رسیده‌اید. لطفاً چند دقیقه صبر کنید و دوباره امتحان کنید، یا فاصله زمانی بین درخواست‌ها را افزایش دهید.")
+            else:
+                log_messages_list.append(f"⚠️ پاسخ API برای قطعه {i+1} حاوی داده صوتی یا پیام متنی نبود. جزئیات پاسخ: {response.prompt_feedback if response else 'No response'}")
         except types.generation_types.BlockedPromptException as bpe:
             log_messages_list.append(f"❌ محتوای پرامپت برای قطعه {i+1} مسدود شد: {bpe}")
+            log_messages_list.append(f"علت مسدود شدن: {bpe.response.prompt_feedback if bpe.response else 'نامشخص'}")
             log_messages_list.append("لطفاً متن ورودی یا پرامپت سبک گفتار را بررسی و اصلاح کنید.")
+            continue
         except types.generation_types.StopCandidateException as sce:
             log_messages_list.append(f"❌ تولید محتوا برای قطعه {i+1} به دلیل نامشخصی متوقف شد: {sce}")
             continue
         except Exception as e:
             log_messages_list.append(f"❌ خطا در تولید قطعه {i+1}: {e}")
             if "API key not valid" in str(e):
                 log_messages_list.append("خطای کلید API. لطفاً از معتبر بودن کلید و تنظیم صحیح آن در Secrets مطمئن شوید.")
             elif "resource has been exhausted" in str(e).lower() or "quota" in str(e).lower():
                  log_messages_list.append("به نظر میرسد محدودیت استفاده از API (Quota) شما تمام شده است.")
+            continue
+        if i < len(text_chunks) - 1 and len(text_chunks) > 1 :
             log_messages_list.append(f"⏱️ انتظار {sleep_time} ثانیه...")
             time.sleep(sleep_time)
     if merge_files and len(generated_files) > 1:
         if not PYDUB_AVAILABLE:
+            log_messages_list.append("⚠️ pydub برای ادغام در دسترس نیست. فایل‌ها به صورت جداگانه در یک فایل ZIP ارائه می‌شوند.")
             zip_filename = f"{output_base_name}_all_parts.zip"
             if create_zip_file(generated_files, zip_filename, log_messages_list):
                 download_file = zip_filename
+            if generated_files: playback_file = generated_files[0]
         else:
             merged_filename = f"{output_base_name}_merged.wav"
             if merge_audio_files_func(generated_files, merged_filename, log_messages_list):
                 playback_file = merged_filename
                 download_file = merged_filename
+                log_messages_list.append(f"🎵 فایل نهایی ادغام شده: {os.path.basename(merged_filename)}")
                 if delete_partials:
                     for file_path in generated_files:
                         try:
+                            if os.path.abspath(file_path) != os.path.abspath(merged_filename):
                                 os.remove(file_path)
+                                log_messages_list.append(f"🗑️ فایل جزئی حذف شد: {os.path.basename(file_path)}")
                         except Exception as e:
+                            log_messages_list.append(f"⚠️ خطا در حذف فایل جزئی {os.path.basename(file_path)}: {e}")
             else:
+                log_messages_list.append("⚠️ ادغام ممکن نبود. فایل‌ها به صورت جداگانه در یک فایل ZIP ارائه می‌شوند.")
                 zip_filename = f"{output_base_name}_all_parts.zip"
                 if create_zip_file(generated_files, zip_filename, log_messages_list):
                     download_file = zip_filename
+                if generated_files: playback_file = generated_files[0]
     elif len(generated_files) == 1:
         playback_file = generated_files[0]
         download_file = generated_files[0]
+    else: # Multiple files, no merge requested (or PYDUB_AVAILABLE is False and merge_files was True)
         zip_filename = f"{output_base_name}_all_parts.zip"
         if create_zip_file(generated_files, zip_filename, log_messages_list):
             download_file = zip_filename
+        if generated_files: playback_file = generated_files[0]
     if playback_file and not os.path.exists(playback_file):
+        log_messages_list.append(f"⚠️ فایل پخش {os.path.basename(playback_file)} وجود ندارد!")
         playback_file = None
     if download_file and not os.path.exists(download_file):
+        log_messages_list.append(f"⚠️ فایل دانلود {os.path.basename(download_file)} وجود ندارد!")
         download_file = None
     return playback_file, download_file
+# --- تابع رابط کاربری Gradio ---
 def gradio_tts_interface(
     use_file_input, uploaded_file, text_to_speak,
+    speech_prompt, speaker_voice, output_filename_base_in,
     model_name, temperature,
     max_chunk_size, sleep_between_requests,
+    merge_audio_files_flag, delete_partial_files_flag,
+    progress=gr.Progress(track_tqdm=True) # track_tqdm for visual progress if using loops with tqdm
 ):
+    log_messages = []
     actual_text_input = ""
     if use_file_input:
         if uploaded_file is not None:
             try:
+                # Gradio file objects have a .name attribute which is the temp path
                 with open(uploaded_file.name, 'r', encoding='utf-8') as f:
                     actual_text_input = f.read().strip()
                 log_messages.append(f"✅ متن از فایل '{os.path.basename(uploaded_file.name)}' بارگذاری شد: {len(actual_text_input)} کاراکتر.")
                 log_messages.append(f"📝 نمونه متن فایل: '{actual_text_input[:100]}{'...' if len(actual_text_input) > 100 else ''}'")
                 if not actual_text_input:
+                    log_messages.append("❌ خطا: فایل آپلود شده خالی است یا قابل خواندن نیست.")
                     return None, None, "\n".join(log_messages)
             except Exception as e:
                 log_messages.append(f"❌ خطا در خواندن فایل آپلود شده: {e}")
         log_messages.append(f"📖 متن ورودی دستی: {len(actual_text_input)} کاراکتر")
         log_messages.append(f"📝 نمونه متن ورودی: '{actual_text_input[:100]}{'...' if len(actual_text_input) > 100 else ''}'")
+    # Sanitize output_filename_base to prevent path traversal or invalid characters
+    output_filename_base = re.sub(r'[^\w\-_]', '', output_filename_base_in if output_filename_base_in else "gemini_tts_output")
+    if not output_filename_base: # If sanitization results in empty string
+        output_filename_base = "gemini_tts_output"
+    log_messages.append(f"🏷️ نام پایه فایل خروجی: {output_filename_base}")
     if not PYDUB_AVAILABLE:
         log_messages.append("⚠️ کتابخانه pydub در دسترس نیست. امکان ادغام فایل‌های صوتی وجود نخواهد داشت و فایل‌های صوتی به صورت جداگانه (در صورت وجود چند بخش) در یک فایل ZIP ارائه می‌شوند.")
+        current_merge_audio_files = False # Force disable merge if pydub is not available
+    else:
+        current_merge_audio_files = merge_audio_files_flag
     playback_path, download_path = core_generate_audio(
+        actual_text_input, speech_prompt, speaker_voice, output_filename_base,
+        model_name, temperature, max_chunk_size, sleep_between_requests,
+        current_merge_audio_files, delete_partial_files_flag, log_messages
     )
+    log_output_str = "\n".join(log_messages)
     valid_playback_path = playback_path if playback_path and os.path.exists(playback_path) else None
     valid_download_path = download_path if download_path and os.path.exists(download_path) else None
+    if not valid_playback_path and not valid_download_path and not actual_text_input.strip():
+        pass # Avoid error message if it was just an empty input from the start
+    elif not valid_playback_path and not valid_download_path and actual_text_input.strip():
+        # Add this only if there was text input but no output files
+        log_output_str += "\n🛑 هیچ فایل صوتی برای پخش یا دانلود در دسترس نیست."
+    return valid_playback_path, valid_download_path, log_output_str
+# --- تعریف رابط کاربری Gradio ---
 css = """
+body { font-family: 'Tahoma', 'Arial', sans-serif; direction: rtl; }
+.gradio-container { max-width: 95% !important; margin: auto !important; padding: 10px !important; }
+@media (min-width: 768px) { .gradio-container { max-width: 800px !important; } }
 footer { display: none !important; }
+.gr-button { background-color: #1d67a3 !important; color: white !important; border-radius: 8px !important; }
+.gr-button:hover { background-color: #164f7e !important; }
+.gr-input, .gr-dropdown, .gr-slider, .gr-checkbox, .gr-textbox, .gr-file { border-radius: 6px !important; }
+.gr-panel { padding: 15px !important; border-radius: 8px !important; box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important; }
+h2, h3 { color: #1d67a3; text-align: center; }
+label { font-weight: bold; color: #333; }
+#output_audio .gallery, #download_file_output .gallery { display: none !important; }
+/* Ensure text inputs and textareas are also LTR for code/API keys if needed, but general UI is RTL */
+textarea, input[type="text"] { direction: rtl; text-align: right; }
+/* Override for specific LTR elements if any, e.g. API key input if it were visible */
 """
+with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
+    gr.Markdown("## 🔊 تبدیل متن به گفتار با Gemini API (فارسی)")
+    gr.Markdown("<p style='text-align:center;'>ساخته شده بر اساس کد کولب توسط: aigolden</p>")
+    gr.HTML("<hr>") # Using HTML for a styled horizontal rule
+    with gr.Accordion("⚠️ راهنمای مهم: تنظیم کلید API جمینای", open=False):
+        gr.Markdown(
+            "**برای استفاده از این ابزار، ابتدا باید کلید API جمینای خود را در بخش Secrets این Space در Hugging Face اضافه کنید:**\n"
+            "1. به صفحه اصلی این Space بروید (جایی که این اپلیکیشن را می‌بینید).\n"
+            "2. در بالای صفحه، روی نام Space خود و سپس 'Settings' (آیکن چرخ‌دنده ⚙️) کلیک کنید.\n"
+            "3. در منوی سمت چپ صفحه تنظیمات، به بخش 'Secrets' بروید.\n"
+            "4. روی دکمه '+ New secret' کلیک کنید.\n"
+            "5. در فیلد 'Name'، دقیقاً عبارت `GEMINI_API_KEY` را وارد کنید (با حروف بزرگ).\n"
+            "6. در فیلد 'Value (secret)'، کلید API جمینای خود را که از Google AI Studio یا Google Cloud Console دریافت کرده‌اید، وارد کنید.\n"
+            "7. روی 'Save secret' کلیک کنید.\n"
+            "**توجه:** پس از افزودن یا تغییر Secret، ممکن است لازم باشد Space را یکبار Restart کنید. برای این کار، از منوی سه‌نقطه (⋮) در کنار دکمه 'Settings' در صفحه اصلی Space، گزینه 'Restart this Space' را انتخاب کنید."
+        )
+    gr.HTML("<hr>")
     with gr.Row():
+        with gr.Column(scale=3, min_width=300):
+            gr.Markdown("### ���� تنظیمات ورودی و پرامپت")
+            use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی ورودی (.txt)", value=False, elem_id="use_file_cb")
+            # فایل ورودی و متن ورودی با توجه به چک‌باکس نمایش داده می‌شوند (منطق در تابع اصلی)
+            uploaded_file_input = gr.File(
+                label="📂 آپلود فایل متنی (فقط شامل متن اصلی، UTF-8)",
+                file_types=['.txt'],
+                visible=False # Initially hidden, controlled by checkbox interaction
+            )
             text_to_speak_tb = gr.Textbox(
+                label="⌨️ متن ورودی (اگر گزینه فایل فعال نیست)",
                 placeholder="متن مورد نظر برای تبدیل به گفتار را اینجا وارد کنید...",
+                lines=8,
+                value="سلام دنیا! این یک آزمایش برای تبدیل متن به گفتار با استفاده از مدل جمینای است.",
+                elem_id="text_input_main",
+                visible=True # Initially visible
             )
+            # JavaScript to toggle visibility
+            use_file_input_cb.change(
+                fn=lambda x: (gr.update(visible=x), gr.update(visible=not x)),
+                inputs=use_file_input_cb,
+                outputs=[uploaded_file_input, text_to_speak_tb]
+            )
             speech_prompt_tb = gr.Textbox(
+                label="🗣️ پرامپت برای تنظیم سبک گفتار (اختیاری)",
+                placeholder="مثال: از زبان یک یوتوبر پر انرژی و حرفه ای صحبت کن",
+                value="به زبان یک گوینده رادیو با صدای گرم و واضح صحبت کن.",
+                lines=2
             )
+        with gr.Column(scale=2, min_width=250):
+            gr.Markdown("### ⚙️ تنظیمات مدل و خروجی")
             model_name_dd = gr.Dropdown(
+                MODEL_NAMES, label="🤖 انتخاب مدل Gemini TTS", value="gemini-2.5-flash-preview-tts"
             )
             speaker_voice_dd = gr.Dropdown(
                 SPEAKER_VOICES, label="🎤 انتخاب گوینده", value="Charon"
             )
             temperature_slider = gr.Slider(
+                minimum=0.0, maximum=2.0, step=0.05, value=0.9, label="🌡️ دمای مدل (تنوع خروجی)"
+            ) # Adjusted default temp
             output_filename_base_tb = gr.Textbox(
+                label="📛 نام پایه فایل خروجی (بدون پسوند)", value="gemini_tts_farsi_output"
             )
+            gr.Markdown("#### تنظیمات پیشرفته")
+            max_chunk_size_slider = gr.Slider(
+                minimum=1500, maximum=4000, step=100, value=3800, label="📏 حداکثر کاراکتر در هر قطعه"
+            ) # Adjusted min chunk size
+            sleep_between_requests_slider = gr.Slider(
+                minimum=3, maximum=25, step=0.5, value=12, label="⏱️ فاصله بین درخواست‌ها (ثانیه)"
+            ) # Adjusted sleep range and default
+            merge_audio_files_cb = gr.Checkbox(label="🔗 ادغام فایل‌های صوتی در یک فایل WAV (نیازمند pydub)", value=True)
+            delete_partial_files_cb = gr.Checkbox(label="🗑️ حذف فایل‌های جزئی پس از ادغام (اگر ادغام فعال باشد)", value=False)
+    generate_button = gr.Button("🎙️ تولید صدا", variant="primary", elem_id="generate_button_main")
+    gr.HTML("<hr>")
     gr.Markdown("### 🎧 خروجی صوتی و دانلود 📥")
     with gr.Row():
+        with gr.Column(scale=1):
+            output_audio = gr.Audio(label="🔊 فایل صوتی تولید شده (قابل پخش)", type="filepath", elem_id="output_audio_player")
+        with gr.Column(scale=1):
+            download_file_output = gr.File(label="💾 دانلود فایل نهایی (WAV یا ZIP)", elem_id="download_file_link")
+    gr.Markdown("### 📜 لاگ‌ها و پیام‌های فرآیند")
+    logs_output_tb = gr.Textbox(label=" ", lines=10, interactive=False, autoscroll=True, elem_id="logs_textbox")
     generate_button.click(
         fn=gradio_tts_interface,
         inputs=[
         outputs=[output_audio, download_file_output, logs_output_tb]
     )
     gr.Examples(
         examples=[
+            [False, None, "سلام، این یک تست کوتاه است.", "یک صدای دوستانه و واضح.", "Charon", "test_output_1", "gemini-2.5-flash-preview-tts", 0.9, 3800, 12, True, False],
+            [False, None, "به دنیای شگفت‌انگیز هوش مصنوعی خوش آمدید. امیدوارم از این ابزار لذت ببرید و برایتان مفید باشد.", "با هیجان و انرژی زیاد صحبت کن، انگار که یک خبر فوق‌العاده را اعلام می‌کنی.", "Zephyr", "ai_voice_farsi", "gemini-2.5-flash-preview-tts", 1.1, 3500, 10, True, True],
+            [False, None, "این یک نمونه متن طولانی‌تر است که برای آزمایش تقسیم‌بندی هوشمند به کار می‌رود. باید دید که چگونه به قطعات کوچکتر تقسیم شده و سپس در صورت انتخاب گزینه ادغام، به یک فایل صوتی واحد تبدیل می‌شود. امیدواریم که همه چیز به خوبی کار کند.", "با لحنی آرام و روایی، مانند یک داستان‌گو.", "Achird", "long_text_sample", "gemini-2.5-pro-preview-tts", 0.8, 2500, 15, True, True],
         ],
         inputs=[
             use_file_input_cb, uploaded_file_input, text_to_speak_tb,
             max_chunk_size_slider, sleep_between_requests_slider,
             merge_audio_files_cb, delete_partial_files_cb
         ],
+        outputs=[output_audio, download_file_output, logs_output_tb],
+        fn=gradio_tts_interface,
+        cache_examples=False # Set to True if inputs/outputs are static and pre-computation is desired
     )
     gr.Markdown(
+        "<div style='text-align: center; margin-top: 30px; font-size: 0.9em; color: grey;'>"
+        "این ابزار از Google Gemini API برای تبدیل متن به گفتار استفاده می‌کند. "
+        "لطفاً به محدودیت‌های استفاده و شرایط خدمات Gemini API توجه فرمایید.<br>"
+        "برای بهترین نتیجه، از مرورگرهای به‌روز استفاده کنید."
         "</div>"
     )
 if __name__ == "__main__":
+    # برای اجرای محلی با قابلیت hot-reload و debug
+    # demo.launch(debug=True, share=False)
+    # برای اجرای عادی (مثلا در محیط Hugging Face Spaces، این خط معمولا لازم نیست چون Gradio خودش هندل می‌کنه)
+    demo.launch()