Spaces:

Hamed744
/

Ttspro

Running

App Files Files Community

Hamed744 commited on Jun 5

Commit

eb51a4d

verified ·

1 Parent(s): 8f4a387

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -253

app.py CHANGED Viewed

@@ -24,14 +24,20 @@ SPEAKER_VOICES = [
 ]
 FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts"
-def _log(message, log_list):
     log_list.append(message)
 def save_binary_file(file_name, data, log_list):
     try:
         with open(file_name, "wb") as f:
             f.write(data)
-        _log(f"✅ فایل در مسیر زیر ذخیره شد: {file_name}", log_list)
         return file_name
     except Exception as e:
         _log(f"❌ خطا در ذخیره فایل {file_name}: {e}", log_list)
@@ -61,74 +67,51 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
     for param in parts:
         param = param.strip()
         if param.lower().startswith("rate="):
-            try:
-                rate_str = param.split("=", 1)[1]
-                rate = int(rate_str)
-            except (ValueError, IndexError): pass
         elif param.startswith("audio/L"):
-            try:
-                bits_per_sample = int(param.split("L", 1)[1])
-            except (ValueError, IndexError): pass
     return {"bits_per_sample": bits_per_sample, "rate": rate}
 def smart_text_split(text, max_size=3800, log_list=None):
-    if len(text) <= max_size:
-        return [text]
-    chunks = []
-    current_chunk = ""
     sentences = re.split(r'(?<=[.!?؟])\s+', text)
     for sentence in sentences:
-        sentence_with_space = sentence + " "
-        if len(current_chunk) + len(sentence_with_space) > max_size:
-            if current_chunk:
-                chunks.append(current_chunk.strip())
             current_chunk = sentence
             while len(current_chunk) > max_size:
-                split_idx = -1
-                possible_split_chars = ['،', ',', ';', ':', ' ']
-                for char_idx in range(max_size - 1, max_size // 2, -1):
-                    if current_chunk[char_idx] in possible_split_chars:
-                        split_idx = char_idx + 1
-                        break
-                if split_idx != -1:
-                    chunks.append(current_chunk[:split_idx].strip())
-                    current_chunk = current_chunk[split_idx:].strip()
-                else:
-                    chunks.append(current_chunk[:max_size].strip())
-                    current_chunk = current_chunk[max_size:].strip()
         else:
             current_chunk += (" " if current_chunk else "") + sentence
-    if current_chunk:
-        chunks.append(current_chunk.strip())
     final_chunks = [c for c in chunks if c]
-    if log_list:
-        _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
     return final_chunks
 def merge_audio_files_func(file_paths, output_path, log_list):
     if not PYDUB_AVAILABLE:
-        _log("❌ pydub در دسترس نیست. نمی‌توان فایل‌ها را ادغام کرد.", log_list)
         return False
     try:
-        _log(f"🔗 در حال ادغام {len(file_paths)} فایل صوتی...", log_list)
         combined = AudioSegment.empty()
         for i, file_path in enumerate(file_paths):
             if os.path.exists(file_path):
-                audio = AudioSegment.from_file(file_path)
-                combined += audio
-                if i < len(file_paths) - 1:
-                    combined += AudioSegment.silent(duration=200)
-            else:
-                _log(f"⚠️ فایل پیدا نشد: {file_path}", log_list)
         combined.export(output_path, format="wav")
-        _log(f"✅ فایل ادغام شده ذخیره شد: {output_path}", log_list)
         return True
     except Exception as e:
-        _log(f"❌ خطا در ادغام فایل‌ها: {e}", log_list)
         return False
-def create_zip_file(file_paths, zip_name, log_list):
     try:
         with zipfile.ZipFile(zip_name, 'w') as zipf:
             for file_path in file_paths:
@@ -137,245 +120,212 @@ def create_zip_file(file_paths, zip_name, log_list):
         _log(f"📦 فایل ZIP ایجاد شد: {zip_name}", log_list)
         return True
     except Exception as e:
-        _log(f"❌ خطا در ایجاد فایل ZIP: {e}", log_list)
         return False
 def core_generate_audio(
     text_input, prompt_input, selected_voice, output_base_name,
-    temperature_val, max_chunk, sleep_time,
-    log_list
 ):
-    _log("🚀 شروع فرآیند تبدیل متن به گفتار با هوش مصنوعی آلفا...", log_list)
     api_key = os.environ.get("GEMINI_API_KEY")
     if not api_key:
-        _log("❌ خطا: کلید API (GEMINI_API_KEY) تنظیم نشده است.", log_list)
-        return None, None, "خطا: کلید API برای سرویس هوش مصنوعی تنظیم نشده است. لطفاً با مدیر تماس بگیرید یا راهنما را مطالعه کنید."
     try:
-        _log("🛠️ در حال ایجاد کلاینت هوش مصنوعی آلفا...", log_list)
         client = genai.Client(api_key=api_key)
-        _log("✅ کلاینت با موفقیت ایجاد شد.", log_list)
     except Exception as e:
-        _log(f"❌ خطا در ایجاد کلاینت: {e}", log_list)
-        return None, None, "خطا در اتصال به سرویس هوش مصنوعی. لطفاً بعداً تلاش کنید."
-    if not text_input or text_input.strip() == "":
-        _log("❌ خطا: متن ورودی خالی است.", log_list)
-        return None, None, "خطا: لطفاً متنی را برای تبدیل وارد کنید."
     text_chunks = smart_text_split(text_input, max_chunk, log_list)
     if not text_chunks:
-        _log("❌ خطا: هیچ قطعه متنی برای پردازش وجود ندارد.", log_list)
-        return None, None, "خطا: مشکلی در پردازش متن ورودی پیش آمد."
     generated_files = []
     model_to_use = FIXED_MODEL_NAME
     for i, chunk in enumerate(text_chunks):
-        _log(f"\n🔊 تولید صدا برای قطعه {i+1}/{len(text_chunks)}...", log_list)
         final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
         contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
         generate_content_config = types.GenerateContentConfig(
-            temperature=temperature_val,
-            response_modalities=["audio"],
-            speech_config=types.SpeechConfig(
-                voice_config=types.VoiceConfig(
-                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=selected_voice)
-                )
-            ),
         )
         current_chunk_filename_base = f"{output_base_name}_part{i+1:03d}"
         try:
-            response = client.models.generate_content(
-                model=model_to_use, contents=contents, config=generate_content_config,
-            )
             if (response.candidates and response.candidates[0].content and
-                response.candidates[0].content.parts and
-                response.candidates[0].content.parts[0].inline_data):
                 inline_data = response.candidates[0].content.parts[0].inline_data
                 data_buffer = inline_data.data
-                file_extension = mimetypes.guess_extension(inline_data.mime_type)
-                if file_extension is None or "binary" in inline_data.mime_type or file_extension == ".bin":
-                    file_extension = ".wav"
-                    if "audio/L" in inline_data.mime_type:
-                         data_buffer = convert_to_wav(data_buffer, inline_data.mime_type)
-                elif inline_data.mime_type == "audio/mpeg": file_extension = ".mp3"
-                elif inline_data.mime_type == "audio/wav": file_extension = ".wav"
                 generated_file_path = save_binary_file(f"{current_chunk_filename_base}{file_extension}", data_buffer, log_list)
-                if generated_file_path:
-                    generated_files.append(generated_file_path)
-                    _log(f"✅ قطعه {i+1} تولید شد.", log_list)
-            elif response.text:
-                 _log(f"ℹ️ پیام API برای قطعه {i+1}: {response.text}", log_list)
-                 if "rate limit" in response.text.lower() or "quota" in response.text.lower():
-                    _log(f"⏳ محدودیت درخواست API.", log_list)
-            else:
-                _log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی/متنی. باز��ورد: {response.prompt_feedback if response else 'No response'}", log_list)
-        except types.generation_types.BlockedPromptException as bpe:
-            _log(f"❌ محتوای قطعه {i+1} مسدود شد: {bpe}", log_list)
-            return None, None, "خطا: محتوای ورودی شما توسط سیستم ایمنی مسدود شد. لطفاً متن را تغییر دهید."
         except Exception as e:
             _log(f"❌ خطا در تولید قطعه {i+1}: {e}", log_list)
-            if "API key not valid" in str(e): return None, None, "خطا: کلید API نامعتبر است."
-            elif "quota" in str(e).lower(): return None, None, "خطا: محدودیت استفاده از سرویس به پایان رسیده است."
             continue
-        if i < len(text_chunks) - 1 and len(text_chunks) > 1 :
-            _log(f"⏱️ انتظار {sleep_time} ثانیه...", log_list)
             time.sleep(sleep_time)
     if not generated_files:
-        _log("❌ هیچ فایل صوتی تولید نشد!", log_list)
-        return None, None, "متاسفانه هیچ فایل صوتی تولید نشد. لطفاً ورودی خود را بررسی کرده و مجدداً تلاش کنید."
-    _log(f"\n🎉 {len(generated_files)} فایل صوتی با موفقیت تولید شد!", log_list)
-    playback_file = None
-    download_file = None
-    user_message = "صدا با موفقیت تولید شد."
     if len(generated_files) > 1:
         if PYDUB_AVAILABLE:
-            merged_filename = f"{output_base_name}_final_audio.wav"
             if merge_audio_files_func(generated_files, merged_filename, log_list):
-                playback_file = merged_filename
-                download_file = merged_filename
-                for file_path in generated_files:
-                    try:
-                        if os.path.abspath(file_path) != os.path.abspath(merged_filename):
-                            os.remove(file_path)
-                    except Exception as e:
-                        _log(f"⚠️ خطا در حذف فایل جزئی {os.path.basename(file_path)}: {e}", log_list)
-            else:
-                user_message = "ادغام فایل‌ها ممکن نبود. فایل ZIP از قطعات صوتی برای دانلود آماده شد."
-                zip_filename = f"{output_base_name}_all_parts.zip"
-                if create_zip_file(generated_files, zip_filename, log_list): download_file = zip_filename
-                if generated_files: playback_file = generated_files[0]
-        else:
-            user_message = "فایل‌های صوتی به صورت جداگانه در یک فایل ZIP آماده شدند (امکان ادغام خودکار فراهم نبود)."
-            zip_filename = f"{output_base_name}_all_parts.zip"
-            if create_zip_file(generated_files, zip_filename, log_list): download_file = zip_filename
-            if generated_files: playback_file = generated_files[0]
     elif len(generated_files) == 1:
-        playback_file = generated_files[0]
-        download_file = generated_files[0]
-    if playback_file and not os.path.exists(playback_file): playback_file = None
-    if download_file and not os.path.exists(download_file): download_file = None
-    if not playback_file and not download_file and generated_files:
-         user_message = "خطا در آماده‌سازی فایل نهایی. ممکن است قطعات جداگانه تولید شده باشند اما ادغام یا فشرده‌سازی ناموفق بوده."
-    return playback_file, download_file, user_message
 def gradio_tts_interface(
     use_file_input, uploaded_file, text_to_speak,
     speech_prompt, speaker_voice, output_filename_base_in,
-    temperature, max_chunk_size, sleep_between_requests,
     progress=gr.Progress(track_tqdm=True)
 ):
-    internal_logs = []
     actual_text_input = ""
     if use_file_input:
         if uploaded_file is not None:
             try:
                 with open(uploaded_file.name, 'r', encoding='utf-8') as f:
                     actual_text_input = f.read().strip()
-                _log(f"✅ متن از فایل '{os.path.basename(uploaded_file.name)}' بارگذاری شد.", internal_logs)
-                if not actual_text_input:
-                    return None, None, "خطا: فایل آپلود شده خالی است."
             except Exception as e:
-                _log(f"❌ خطا در خواندن فایل آپلود شده: {e}", internal_logs)
-                return None, None, f"خطا در خواندن فایل: {e}"
-        else:
-            return None, None, "خطا: گزینه فایل انتخاب شده اما فایلی آپلود نشده."
     else:
         actual_text_input = text_to_speak
-        if not actual_text_input or not actual_text_input.strip():
-            return None, None, "خطا: لطفاً متنی را وارد کنید."
     output_filename_base = re.sub(r'[^\w\-_]', '', output_filename_base_in if output_filename_base_in else "alpha_tts_output")
     if not output_filename_base: output_filename_base = "alpha_tts_output"
-    playback_path, download_path, user_message_from_core = core_generate_audio(
         actual_text_input, speech_prompt, speaker_voice, output_filename_base,
-        temperature, max_chunk_size, sleep_between_requests, internal_logs
     )
-    # for log_entry in internal_logs: # For debugging in HF Spaces console
-        # print(log_entry)
-    return playback_path, download_path, user_message_from_core
-def format_user_message(message_text):
-    if not message_text:
-        return "<div class='user_message_output'></div>"
-    # از gr.utils.escape_html یا معادل آن در نسخه‌های جدیدتر استفاده کنید
-    # در Gradio 3.x به بالا، escape کردن خودکار است اگر از f-string در gr.HTML استفاده نشود.
-    # برای اطمینان، می‌توانیم خودمان escape کنیم اگر از f-string استفاده می‌کنیم.
-    # اما اینجا ما رشته HTML را کامل می‌سازیم، پس نیازی به gr.zwoel نیست.
-    escaped_message = gr.utils.escape_html(message_text)
-    if "خطا:" in message_text or "متاسفانه" in message_text or "مسدود شد" in message_text or "نامعتبر" in message_text:
-        return f"<div class='user_message_output error'>{escaped_message}</div>"
-    elif "موفقیت" in message_text or "آماده شد" in message_text:
-        return f"<div class='user_message_output success'>{escaped_message}</div>"
-    else:
-        return f"<div class='user_message_output info'>{escaped_message}</div>"
 css = """
-body { font-family: 'Tahoma', 'Arial', sans-serif; direction: rtl; background-color: #f0f2f5; }
-.gradio-container { max-width: 95% !important; margin: 20px auto !important; padding: 15px !important; background-color: #ffffff; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
-@media (min-width: 768px) { .gradio-container { max-width: 700px !important; } }
 footer { display: none !important; }
-.gr-button { font-weight: bold; background-color: #007bff !important; color: white !important; border-radius: 8px !important; padding: 10px 20px !important; transition: background-color 0.3s ease; }
-.gr-button:hover { background-color: #0056b3 !important; }
-.gr-input, .gr-dropdown, .gr-slider, .gr-checkbox, .gr-textbox, .gr-file { border-radius: 6px !important; border: 1px solid #ced4da; }
-.gr-panel { padding: 15px !important; border-radius: 8px !important; background-color: #f8f9fa; border: 1px solid #e9ecef; margin-bottom:15px; }
-h1, h2, h3 { color: #343a40; text-align: center; }
-h1 { font-size: 1.8em; margin-bottom: 5px;}
-h2 { font-size: 1.2em; margin-bottom: 15px; color: #495057;}
-label { font-weight: 500; color: #495057; margin-bottom: 5px; display: block; }
-#output_audio .gallery, #download_file_output .gallery { display: none !important; }
-textarea, input[type="text"] { direction: rtl; text-align: right; padding: 10px; font-size: 1em; }
-.gr-form { gap: 20px !important; }
-.user_message_output { padding: 12px 15px; margin-top: 15px; border-radius: 6px; text-align: center; font-weight: 500; border: 1px solid transparent; }
-.user_message_output.success { background-color: #d1e7dd; color: #0f5132; border-color: #badbcc; }
-.user_message_output.error { background-color: #f8d7da; color: #842029; border-color: #f5c2c7; }
-.user_message_output.info { background-color: #cff4fc; color: #055160; border-color: #b6effb; }
-#api_key_accordion details { border: 1px solid #ddd; border-radius: 6px; margin-bottom: 15px; }
-#api_key_accordion summary { font-weight: bold; padding: 10px; cursor: pointer; background-color: #f7f7f7; border-radius: 6px 6px 0 0;}
-#api_key_accordion div[class^="prose"] { padding: 10px; border-top: 1px solid #ddd;}
 """
-alpha_intro = """
-<div style='text-align:center; padding:10px;'>
-  <img src='https://img.icons8.com/fluency/96/artificial-intelligence.png' alt='AI Icon' style='width:60px; height:60px; margin-bottom:5px;'/>
   <h1>تبدیل متن به صدا با هوش مصنوعی آلفا</h1>
-  <p style='font-size:1.1em; color:#555;'>به سادگی متن خود را وارد کنید و صدای طبیعی و رسا تحویل بگیرید!</p>
 </div>
 """
 with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
-    gr.HTML(alpha_intro)
-    with gr.Accordion("⚠️ راهنمای مهم: تنظیم کلید API", open=False, elem_id="api_key_accordion"):
-        gr.Markdown(
-            "**برای استفاده از این ابزار، نیاز به تنظیم یک کلید API در تنظیمات این Space دارید:**\n"
-            "1. به صفحه اصلی این Space بروید.\n"
-            "2. روی نام Space و سپس 'Settings' (⚙️) کلیک کنید.\n"
-            "3. در منوی سمت چپ، به 'Secrets' بروید.\n"
-            "4. روی '+ New secret' کلیک کنید.\n"
-            "5. نام Secret را `GEMINI_API_KEY` (با حروف بزرگ) وارد کنید.\n"
-            "6. کلید API خود را در فیلد 'Value' وارد کنید.\n"
-            "7. 'Save secret' را بزنید و در صورت نیاز Space را Restart کنید."
-        )
     with gr.Row(elem_classes="gr-form"):
-        with gr.Column(scale=3, min_width=300):
-            gr.Markdown("### ۱. متن و سبک گفتار خود را وارد کنید")
-            use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی ورودی (.txt)", value=False)
             uploaded_file_input = gr.File(label="📂 آپلود فایل متنی (UTF-8)", file_types=['.txt'], visible=False)
             text_to_speak_tb = gr.Textbox(
-                label="⌨️ متن برای تبدیل به گفتار:",
                 placeholder="اینجا بنویسید...",
-                lines=8,
-                value="سلام! من هوش مصنوعی آلفا هستم و می‌توانم متن شما را به صدا تبدیل کنم.",
                 visible=True
             )
             use_file_input_cb.change(
@@ -389,84 +339,56 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue,
                 value="با لحنی دوستانه و واضح صحبت کن.",
                 lines=2
             )
-        with gr.Column(scale=2, min_width=250):
-            gr.Markdown("### ۲. تنظیمات صدا")
             speaker_voice_dd = gr.Dropdown(
-                SPEAKER_VOICES, label="🎤 انتخاب گوینده:", value="Charon"
             )
             temperature_slider = gr.Slider(
-                minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="🌡️ خلاقیت صدا (دما):"
             )
             output_filename_base_tb = gr.Textbox(
-                label="📛 نام پایه فایل خروجی (اختیاری):", value="alpha_audio_output"
-            )
-            gr.Markdown("#### تنظیمات فنی (پیشرفته)")
-            max_chunk_size_slider = gr.Slider(
-                minimum=1500, maximum=4000, step=100, value=3800, label="📏 حداکثر کاراکتر هر بخش:"
-            )
-            sleep_between_requests_slider = gr.Slider(
-                minimum=3, maximum=20, step=0.5, value=10, label="⏱️ تاخیر بین بخش‌ها (ثانیه):"
             )
-    generate_button = gr.Button("🎧 تولید صدا با آلفا", variant="primary", elem_id="generate_button_main")
-    # کامپوننت مخفی برای نگهداری پیام متنی خام
-    raw_user_message_holder = gr.Textbox(visible=False)
-    user_message_display = gr.HTML(value="<div class='user_message_output'>پیام وضعیت اینجا نمایش داده می‌شود...</div>")
-    gr.HTML("<hr style='margin: 20px 0;'>")
-    gr.Markdown("<h3 style='text-align:center; margin-bottom:10px;'>📢 نتیجه و دانلود 📢</h3>")
-    with gr.Row():
-        with gr.Column(scale=1):
-            output_audio = gr.Audio(label="🔊 فایل صوتی تولید شده:", type="filepath", elem_id="output_audio_player")
-        with gr.Column(scale=1):
-            download_file_output = gr.File(label="💾 دانلود فایل نهایی (WAV یا ZIP):", elem_id="download_file_link")
-    # رویداد کلیک دکمه
     generate_button.click(
         fn=gradio_tts_interface,
         inputs=[
             use_file_input_cb, uploaded_file_input, text_to_speak_tb,
             speech_prompt_tb, speaker_voice_dd, output_filename_base_tb,
-            temperature_slider, max_chunk_size_slider, sleep_between_requests_slider
         ],
-        outputs=[output_audio, download_file_output, raw_user_message_holder] # پیام خام به کامپوننت مخفی می‌رود
-    ).then( # سپس، کامپوننت مخفی به عنوان ورودی به تابع قالب‌بندی داده می‌شود
-        fn=format_user_message,
-        inputs=[raw_user_message_holder], # ورودی از کامپوننت مخفی
-        outputs=user_message_display # خروجی به کامپوننت HTML
     )
     gr.Examples(
-        label="✨ نمونه‌های آماده برای امتحان کردن ✨",
         examples=[
-            [False, None, "سلام به همه دوستان! امروز می‌خواهیم درباره آخرین دستاوردهای هوش مصنوعی صحبت کنیم.", "با لحنی پر انرژی و هیجان‌زده، مانند یک مجری برنامه علمی.", "Zephyr", "alpha_demo_1", 0.95, 3800, 8],
-            [False, None, "داستان از آنجا شروع شد که در یک شب تاریک و طوفانی، قهرمان ما به کلبه‌ای مرموز رسید.", "با صدایی آرام و داستانی، مناسب برای قصه‌گویی شبانه.", "Achird", "alpha_story_1", 0.8, 3500, 12],
-            [False, None, "آخرین اخبار ورزشی: تیم ملی فوتبال کشورمان با یک بازی درخشان به پیروزی رسید!", "مانند یک گزارشگر ورزشی هیجان‌زده و سریع.", "Orus", "alpha_news_1", 1.0, 3000, 7],
-            [False, None, "آموزش پخت کیک شکلاتی: ابتدا فر را با دمای ۱۸۰ درجه سانتی‌گراد گرم کنید. سپس آرد، شکر و کاکائو را با هم مخلوط نمایید.", "با صدایی واضح، آموزشی و کمی آهسته‌تر از حد معمول.", "Vindemiatrix", "alpha_recipe_1", 0.75, 3800, 10],
-            [False, None, "به پادکست هفتگی ما خوش آمدید. این هفته به بررسی عمیق تاثیرات فناوری بر زندگی روزمره خواهیم پرداخت.", "مانند یک میزبان پادکست، صمیمی و متفکر.", "Laomedeia", "alpha_podcast_1", 0.85, 3600, 11],
         ],
-        inputs=[
             use_file_input_cb, uploaded_file_input, text_to_speak_tb,
             speech_prompt_tb, speaker_voice_dd, output_filename_base_tb,
-            temperature_slider, max_chunk_size_slider, sleep_between_requests_slider
         ],
-        # خروجی Examples باید با خروجی تابع اصلی مطابقت داشته باشد
-        outputs=[output_audio, download_file_output, raw_user_message_holder],
         fn=gradio_tts_interface,
-        # برای Examples، نمی‌توانیم به سادگی .then را زنجیر کنیم تا user_message_display آپدیت شود.
-        # پیام وضعیت برای Examples نمایش داده نخواهد شد مگر اینکه یک wrapper پیچیده‌تر بنویسیم.
-        # فعلا برای سادگی، پیام وضعیت برای Examples آپدیت نمی‌شود.
         cache_examples=False
     )
     gr.Markdown(
-        "<div style='text-align: center; margin-top: 30px; padding-top:15px; border-top: 1px solid #eee; font-size: 0.9em; color: #6c757d;'>"
         "قدرت گرفته از فناوری پیشرفته هوش مصنوعی آلفا.<br>"
-        "لطفاً از این ابزار به صورت مسئولانه استفاده کنید."
         "</div>"
     )

 ]
 FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts"
+# مقادیر پیش‌فرض برای تنظیمات فنی که از UI حذف می‌شوند
+DEFAULT_MAX_CHUNK_SIZE = 3800
+DEFAULT_SLEEP_BETWEEN_REQUESTS = 8 # کمی کاهش داده شد چون دیگر قابل تنظیم نیست
+def _log(message, log_list): # برای دیباگ داخلی
+    # print(message) # برای نمایش در کنسول Hugging Face Spaces
     log_list.append(message)
 def save_binary_file(file_name, data, log_list):
     try:
         with open(file_name, "wb") as f:
             f.write(data)
+        _log(f"✅ فایل ذخیره شد: {file_name}", log_list)
         return file_name
     except Exception as e:
         _log(f"❌ خطا در ذخیره فایل {file_name}: {e}", log_list)
     for param in parts:
         param = param.strip()
         if param.lower().startswith("rate="):
+            try: rate = int(param.split("=", 1)[1])
+            except: pass
         elif param.startswith("audio/L"):
+            try: bits_per_sample = int(param.split("L", 1)[1])
+            except: pass
     return {"bits_per_sample": bits_per_sample, "rate": rate}
 def smart_text_split(text, max_size=3800, log_list=None):
+    if len(text) <= max_size: return [text]
+    chunks, current_chunk = [], ""
     sentences = re.split(r'(?<=[.!?؟])\s+', text)
     for sentence in sentences:
+        if len(current_chunk) + len(sentence) + 1 > max_size:
+            if current_chunk: chunks.append(current_chunk.strip())
             current_chunk = sentence
             while len(current_chunk) > max_size:
+                split_idx = next((i for i in range(max_size - 1, max_size // 2, -1) if current_chunk[i] in ['،', ',', ';', ':', ' ']), -1)
+                part, current_chunk = (current_chunk[:split_idx+1], current_chunk[split_idx+1:]) if split_idx != -1 else (current_chunk[:max_size], current_chunk[max_size:])
+                chunks.append(part.strip())
         else:
             current_chunk += (" " if current_chunk else "") + sentence
+    if current_chunk: chunks.append(current_chunk.strip())
     final_chunks = [c for c in chunks if c]
+    if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
     return final_chunks
 def merge_audio_files_func(file_paths, output_path, log_list):
     if not PYDUB_AVAILABLE:
+        _log("❌ pydub در دسترس نیست.", log_list)
         return False
     try:
+        _log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
         combined = AudioSegment.empty()
         for i, file_path in enumerate(file_paths):
             if os.path.exists(file_path):
+                combined += AudioSegment.from_file(file_path) + (AudioSegment.silent(duration=150) if i < len(file_paths) - 1 else AudioSegment.empty())
+            else: _log(f"⚠️ فا��ل پیدا نشد: {file_path}", log_list)
         combined.export(output_path, format="wav")
+        _log(f"✅ فایل ادغام شده: {output_path}", log_list)
         return True
     except Exception as e:
+        _log(f"❌ خطا در ادغام: {e}", log_list)
         return False
+def create_zip_file(file_paths, zip_name, log_list): # این تابع دیگر استفاده نمی‌شود چون بخش دانلود مجزا حذف شده
     try:
         with zipfile.ZipFile(zip_name, 'w') as zipf:
             for file_path in file_paths:
         _log(f"📦 فایل ZIP ایجاد شد: {zip_name}", log_list)
         return True
     except Exception as e:
+        _log(f"❌ خطا در ایجاد ZIP: {e}", log_list)
         return False
 def core_generate_audio(
     text_input, prompt_input, selected_voice, output_base_name,
+    temperature_val,
+    log_list # فقط برای لاگ‌های داخلی
 ):
+    max_chunk = DEFAULT_MAX_CHUNK_SIZE
+    sleep_time = DEFAULT_SLEEP_BETWEEN_REQUESTS
+    _log("🚀 شروع فرآیند...", log_list)
     api_key = os.environ.get("GEMINI_API_KEY")
     if not api_key:
+        _log("❌ کلید API تنظیم نشده.", log_list)
+        # چون پیام وضعیت حذف شده، کاربر فقط خروجی خالی دریافت می‌کند.
+        # بهتر است در README.md تاکید زیادی روی تنظیم کلید شود.
+        return None # فقط فایل صوتی برگردانده می‌شود
     try:
         client = genai.Client(api_key=api_key)
     except Exception as e:
+        _log(f"❌ خطا در کلاینت: {e}", log_list)
+        return None
+    if not text_input or not text_input.strip():
+        _log("❌ متن ورودی خالی.", log_list)
+        return None
     text_chunks = smart_text_split(text_input, max_chunk, log_list)
     if not text_chunks:
+        _log("❌ متن قابل پردازش نیست.", log_list)
+        return None
     generated_files = []
     model_to_use = FIXED_MODEL_NAME
     for i, chunk in enumerate(text_chunks):
+        _log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list)
         final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
         contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
         generate_content_config = types.GenerateContentConfig(
+            temperature=temperature_val, response_modalities=["audio"],
+            speech_config=types.SpeechConfig(voice_config=types.VoiceConfig(
+                prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=selected_voice)))
         )
         current_chunk_filename_base = f"{output_base_name}_part{i+1:03d}"
         try:
+            response = client.models.generate_content(model=model_to_use, contents=contents, config=generate_content_config)
             if (response.candidates and response.candidates[0].content and
+                response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data):
                 inline_data = response.candidates[0].content.parts[0].inline_data
                 data_buffer = inline_data.data
+                file_extension = mimetypes.guess_extension(inline_data.mime_type) or ".wav"
+                if "audio/L" in inline_data.mime_type and file_extension == ".wav":
+                    data_buffer = convert_to_wav(data_buffer, inline_data.mime_type)
+                # اطمینان از پسوند مناسب برای pydub
+                if not file_extension.startswith("."): file_extension = "." + file_extension
+                if file_extension not in [".wav", ".mp3", ".ogg", ".flac"]: # اگر فرمت ناشناخته بود، به wav تبدیل می‌کنیم اگر ممکن باشد
+                    if PYDUB_AVAILABLE and file_extension != ".wav": # سعی در تبدیل به wav
+                        try:
+                            temp_path = f"{current_chunk_filename_base}{file_extension}"
+                            save_binary_file(temp_path, data_buffer, log_list)
+                            audio_seg = AudioSegment.from_file(temp_path)
+                            # پاک کردن فایل موقت با پسوند اصلی
+                            if os.path.exists(temp_path): os.remove(temp_path)
+                            file_extension = ".wav" # تغییر پسوند به wav
+                            # فایل را با پسوند wav ذخیره می‌کنیم
+                            # data_buffer حالا باید بایت‌های wav باشد
+                            # این بخش نیاز به بازبینی دارد که چگونه بایت‌های wav را از audio_seg بگیریم یا مستقیما ذخیره کنیم
+                            # برای سادگی، اگر فرمت اولیه توسط pydub خوانا باشد، همان را ذخیره می‌کنیم
+                            # و اگر قرار است ادغام شود، pydub خودش هندل می‌کند.
+                            # اگر فرمت اولیه mp3 و ... باشد، ذخیره و بعدا توسط pydub خوانده میشود.
+                            # فعلا فرض میکنیم فرمت دریافتی از API توسط pydub قابل خواندن است.
+                            pass # ادامه با file_extension اصلی
+                        except Exception as e_conv:
+                             _log(f"⚠️ خطا در تبدیل فرمت {file_extension} به wav برای قطعه {i+1}: {e_conv}", log_list)
+                             # اگر تبدیل ناموفق بود، با همان فرمت اولیه ادامه می‌دهیم و امیدواریم pydub آن را بخواند
+                    else: # اگر pydub نباشد و فرمت هم wav نباشد، ممکن است در ادغام مشکل پیش بیاید
+                         _log(f"⚠️ فرمت ناشناخته {file_extension} برای قطعه {i+1} و pydub در دسترس نیست یا فرمت wav نیست.", log_list)
+                         # اگر فرمت شناخته شده‌ای برای pydub نباشد و pydub هم نباشد، فقط wav ذخیره می‌کنیم
+                         if file_extension not in [".wav",".mp3"]: file_extension = ".wav"
                 generated_file_path = save_binary_file(f"{current_chunk_filename_base}{file_extension}", data_buffer, log_list)
+                if generated_file_path: generated_files.append(generated_file_path)
+            else: _log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی.", log_list)
         except Exception as e:
             _log(f"❌ خطا در تولید قطعه {i+1}: {e}", log_list)
+            # اگر خطایی در یک قطعه رخ دهد، ادامه می‌دهیم تا بقیه تولید شوند
+            # کاربر در نهایت هرچه تولید شده را دریافت می‌کند.
             continue
+        if i < len(text_chunks) - 1 and len(text_chunks) > 1:
             time.sleep(sleep_time)
     if not generated_files:
+        _log("❌ هیچ فایلی تولید نشد.", log_list)
+        return None
+    _log(f"🎉 {len(generated_files)} فایل(های) صوتی تولید شد.", log_list)
+    final_audio_file = None
     if len(generated_files) > 1:
         if PYDUB_AVAILABLE:
+            merged_filename = f"{output_base_name}_final_audio.wav" # همیشه WAV برای ادغام شده
             if merge_audio_files_func(generated_files, merged_filename, log_list):
+                final_audio_file = merged_filename
+                for file_path in generated_files: # حذف فایل‌های جزئی
+                    if os.path.abspath(file_path) != os.path.abspath(merged_filename):
+                        try: os.remove(file_path)
+                        except: pass
+            else: # اگر ادغام ناموفق بود، اولین فایل را برمی‌گردانیم
+                final_audio_file = generated_files[0] if generated_files else None
+        else: # اگر pydub نباشد، فقط اولین فایل را برمی‌گردانیم
+            _log("⚠️ pydub برای ادغام در دسترس نیست. فقط اولین قطعه ارائه می‌شود.", log_list)
+            final_audio_file = generated_files[0] if generated_files else None
     elif len(generated_files) == 1:
+        final_audio_file = generated_files[0]
+    if final_audio_file and not os.path.exists(final_audio_file):
+        _log(f"⚠️ فایل نهایی {final_audio_file} وجود ندارد!", log_list)
+        return None
+    return final_audio_file
 def gradio_tts_interface(
     use_file_input, uploaded_file, text_to_speak,
     speech_prompt, speaker_voice, output_filename_base_in,
+    temperature,
     progress=gr.Progress(track_tqdm=True)
 ):
+    internal_logs = [] # برای دیباگ داخلی
     actual_text_input = ""
     if use_file_input:
         if uploaded_file is not None:
             try:
                 with open(uploaded_file.name, 'r', encoding='utf-8') as f:
                     actual_text_input = f.read().strip()
+                if not actual_text_input: return None # خطا: فایل خالی
             except Exception as e:
+                _log(f"❌ خطا در خواندن فایل: {e}", internal_logs)
+                return None # خطا
+        else: return None # خطا: فایل انتخاب نشده
     else:
         actual_text_input = text_to_speak
+        if not actual_text_input or not actual_text_input.strip(): return None
     output_filename_base = re.sub(r'[^\w\-_]', '', output_filename_base_in if output_filename_base_in else "alpha_tts_output")
     if not output_filename_base: output_filename_base = "alpha_tts_output"
+    # تابع core_generate_audio فقط مسیر فایل صوتی نهایی را برمی‌گرداند
+    final_audio_path = core_generate_audio(
         actual_text_input, speech_prompt, speaker_voice, output_filename_base,
+        temperature, internal_logs
     )
+    # for log_entry in internal_logs: print(log_entry) # برای دیباگ در کنسول HF
+    return final_audio_path
 css = """
+body { font-family: 'Tahoma', 'Arial', sans-serif; direction: rtl; background-color: #f4f7f6; color: #333; }
+.gradio-container { max-width: 90% !important; margin: 20px auto !important; padding: 20px !important; background-color: #ffffff; border-radius: 15px; box-shadow: 0 5px 15px rgba(0,0,0,0.08); }
+@media (min-width: 768px) { .gradio-container { max-width: 650px !important; } }
 footer { display: none !important; }
+.gr-button { font-weight: bold; background: linear-gradient(135deg, #007bff, #0056b3) !important; color: white !important; border:none !important; border-radius: 8px !important; padding: 12px 25px !important; transition: all 0.3s ease; box-shadow: 0 2px 5px rgba(0,0,0,0.15); }
+.gr-button:hover { transform: translateY(-2px); box-shadow: 0 4px 8px rgba(0,0,0,0.2); }
+.gr-input, .gr-dropdown, .gr-slider, .gr-checkbox, .gr-textbox, .gr-file { border-radius: 8px !important; border: 1px solid #d1d5db; transition: border-color 0.2s ease, box-shadow 0.2s ease; }
+.gr-input:focus-within, .gr-textbox:focus-within { border-color: #007bff !important; box-shadow: 0 0 0 2px rgba(0,123,255,0.25) !important; }
+h1 { font-size: 1.9em; margin-bottom: 8px; color: #2c3e50; }
+h2 { font-size: 1.1em; margin-bottom: 18px; color: #555;}
+h3 { font-size: 1.3em; color: #0056b3; margin-top: 25px; margin-bottom:15px; border-bottom: 2px solid #007bff30; padding-bottom: 8px;}
+label { font-weight: 600; color: #4a5568; margin-bottom: 6px; display: block; font-size: 0.95em; }
+textarea, input[type="text"] { direction: rtl; text-align: right; padding: 12px; font-size: 1em; }
+.gr-form > div { margin-bottom: 15px !important; } /* فاصله بین ردیف‌های فرم */
+#output_audio_player audio { width: 100%; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
+.temperature_description { font-size: 0.85em; color: #666; margin-top: -8px; margin-bottom: 10px; padding-right: 5px; }
+.main_title_container {text-align:center; padding-bottom:15px; border-bottom: 1px solid #eee; margin-bottom: 20px;}
+.main_title_container img {width:60px; height:60px; margin-bottom:5px;}
 """
+alpha_intro_html = """
+<div class='main_title_container'>
+  <img src='https://img.icons8.com/fluency/96/artificial-intelligence.png' alt='AI Icon'/>
   <h1>تبدیل متن به صدا با هوش مصنوعی آلفا</h1>
+  <p style='font-size:1.1em; color:#555;'>متن خود را به صدای طبیعی و رسا تبدیل کنید!</p>
 </div>
 """
 with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
+    gr.HTML(alpha_intro_html)
     with gr.Row(elem_classes="gr-form"):
+        with gr.Column(scale=3): # ستون اصلی برای ورودی‌ها
+            gr.Markdown("### ۱. متن و تنظیمات صدا")
+            use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False)
             uploaded_file_input = gr.File(label="📂 آپلود فایل متنی (UTF-8)", file_types=['.txt'], visible=False)
             text_to_speak_tb = gr.Textbox(
+                label="⌨️ متنی که می‌خواهید به صدا تبدیل شود:",
                 placeholder="اینجا بنویسید...",
+                lines=7,
+                value="سلام! من هوش مصنوعی آلفا هستم.",
                 visible=True
             )
             use_file_input_cb.change(
                 value="با لحنی دوستانه و واضح صحبت کن.",
                 lines=2
             )
             speaker_voice_dd = gr.Dropdown(
+                SPEAKER_VOICES, label="🎤 انتخاب نوع صدا (گوینده):", value="Charon"
             )
             temperature_slider = gr.Slider(
+                minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="🌡️ خلاقیت و تنوع صدا:"
             )
+            gr.Markdown("<p class='temperature_description'>مقادیر بالاتر صدایی خلاقانه‌تر و متنوع‌تر، و مقادیر پایین‌تر صدایی قابل پیش‌بینی‌تر و یکنواخت‌تر ایجاد می‌کنند.</p>",
+                        elem_classes="temperature_description_container")
             output_filename_base_tb = gr.Textbox(
+                label="📛 نام فایل خروجی (اختیاری، بدون پسوند):", value="alpha_audio"
             )
+    generate_button = gr.Button("🎧 تولید و پخش صدا", variant="primary", elem_id="generate_button_main")
+    gr.Markdown("### 🔊 نتیجه تولید صدا")
+    output_audio = gr.Audio(label=" ", type="filepath", elem_id="output_audio_player") # لیبل خالی شد
     generate_button.click(
         fn=gradio_tts_interface,
         inputs=[
             use_file_input_cb, uploaded_file_input, text_to_speak_tb,
             speech_prompt_tb, speaker_voice_dd, output_filename_base_tb,
+            temperature_slider
         ],
+        outputs=[output_audio]
     )
     gr.Examples(
+        label="✨ چند نمونه برای شروع ✨",
         examples=[
+            [False, None, "به نام خداوند بخشنده مهربان. سلام بر شما شنوندگان عزیز.", "با لحنی آرام و معنوی.", "Achird", "quran_intro_sample", 0.7],
+            [False, None, "خبر فوری! قیمت‌ها در بازار طلا و سکه با نوسانات شدیدی همراه بوده است.", "مانند یک گوینده خبر اقتصادی، سریع و دقیق.", "Orus", "news_flash_sample", 1.0],
+            [False, None, "در این ویدیو قصد داریم به شما آموزش دهیم چگونه یک وبسایت ساده با پایتون بسازید.", "آموزشی، واضح و با سرعت متوسط.", "Vindemiatrix", "tutorial_sample", 0.8],
+            [False, None, "کتاب صوتی «بوف کور» اثر صادق هدایت. فصل اول.", "روایی، با احساس و کمی غمگین.", "Alnilam", "audiobook_sample", 0.85],
         ],
+        inputs=[ # ورودی‌ها باید با ورودی‌های تابع اصلی مطابقت داشته باشند
             use_file_input_cb, uploaded_file_input, text_to_speak_tb,
             speech_prompt_tb, speaker_voice_dd, output_filename_base_tb,
+            temperature_slider
         ],
+        outputs=[output_audio], # خروجی Examples هم فقط پلیر صوتی است
         fn=gradio_tts_interface,
         cache_examples=False
     )
     gr.Markdown(
+        "<div style='text-align: center; margin-top: 40px; padding-top:20px; border-top: 1px solid #eee; font-size: 0.9em; color: #6c757d;'>"
         "قدرت گرفته از فناوری پیشرفته هوش مصنوعی آلفا.<br>"
+        "لطفاً به قوانین و مقررات مربوط به تولید محتوا احترام بگذارید."
         "</div>"
     )