Hamed744 commited on
Commit
7d73f6a
·
verified ·
1 Parent(s): 26b9387

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -333
app.py CHANGED
@@ -7,9 +7,7 @@ import struct
7
  import time
8
  # import zipfile # Not used in your original core logic
9
  from google import genai
10
- from google.genai import types as genai_types # Aliased to avoid conflict with built-in 'types'
11
-
12
- import logging # Standard Python logging
13
 
14
  try:
15
  from pydub import AudioSegment
@@ -17,10 +15,7 @@ try:
17
  except ImportError:
18
  PYDUB_AVAILABLE = False
19
 
20
- # --- Basic Logging Setup (Kept simple as it's not the focus) ---
21
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
-
23
- # --- START: Core TTS Logic from YOUR AlphaTTS_Original (UNCHANGED) ---
24
  SPEAKER_VOICES = [
25
  "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
26
  "Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
@@ -28,14 +23,14 @@ SPEAKER_VOICES = [
28
  "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
29
  "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
30
  ]
31
- FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts"
32
  DEFAULT_MAX_CHUNK_SIZE = 3800
33
  DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
34
  DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
35
 
36
- def _log(message, log_list):
37
  log_list.append(message)
38
- logging.info(f"[AlphaTTS_LOG] {message}")
39
 
40
  def save_binary_file(file_name, data, log_list):
41
  try:
@@ -67,196 +62,163 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int]:
67
  except ValueError: pass
68
  return {"bits_per_sample": bits, "rate": rate}
69
 
70
- def smart_text_split(text, max_size=3800, log_list=None):
71
  if len(text) <= max_size: return [text]
72
  chunks, current_chunk = [], ""
73
- sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
74
  for sentence in sentences:
75
  if len(current_chunk) + len(sentence) + 1 > max_size:
76
  if current_chunk: chunks.append(current_chunk.strip())
77
  current_chunk = sentence
78
  while len(current_chunk) > max_size:
79
- split_idx = -1
80
- for punc in ['،', ',', ';', ':', ' ']:
81
- try:
82
- idx = current_chunk.rindex(punc, max_size // 2, max_size)
83
- if idx > split_idx:
84
- split_idx = idx
85
- except ValueError:
86
- pass
87
- if split_idx != -1 :
88
- part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
89
- else:
90
- part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
91
  chunks.append(part.strip())
92
- else: current_chunk += (" " if current_chunk and sentence else "") + sentence
93
  if current_chunk: chunks.append(current_chunk.strip())
94
  final_chunks = [c for c in chunks if c]
95
  if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
96
  return final_chunks
97
 
98
- def merge_audio_files_func(file_paths, output_path, log_list):
99
- if not PYDUB_AVAILABLE:
100
- _log("❌ pydub در دسترس نیست. ادغام انجام نشد.", log_list)
101
- return False
102
  try:
103
  _log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
104
  combined = AudioSegment.empty()
105
  for i, fp in enumerate(file_paths):
106
  if os.path.exists(fp):
107
- try:
108
  segment = AudioSegment.from_file(fp)
109
  combined += segment
110
  if i < len(file_paths) - 1:
111
- combined += AudioSegment.silent(duration=150)
112
  except Exception as e_pydub:
113
  _log(f"⚠️ خطای Pydub در پردازش فایل '{fp}': {e_pydub}. از این فایل صرف نظر می شود.", log_list)
114
  continue
115
- else:
116
- _log(f"⚠️ فایل پیدا نشد: {fp}", log_list)
117
- if len(combined) == 0:
118
  _log("❌ هیچ قطعه صوتی برای ادغام وجود ندارد.", log_list)
119
  return False
120
  combined.export(output_path, format="wav")
121
- _log(f"✅ فایل ادغام شده: {output_path}", log_list)
122
- return True
123
- except Exception as e:
124
- _log(f"❌ خطا در ادغام: {e}", log_list)
125
- return False
126
 
127
- def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list):
128
  output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
129
  max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
130
  _log(f"🚀 شروع فرآیند با مدل: {FIXED_MODEL_NAME}...", log_list)
131
- api_key = os.environ.get("GEMINI_API_KEY")
132
- if not api_key:
133
- _log("❌ کلید API با نام GEMINI_API_KEY در متغیرهای محیطی تنظیم نشده.", log_list)
134
- return None
135
- try:
136
- client = genai.Client(api_key=api_key)
137
- _log(f"کلاینت Gemini با کلید API برای مدل {FIXED_MODEL_NAME} مقداردهی اولیه شد.", log_list)
138
- except Exception as e:
139
- _log(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list)
140
- return None
141
- if not text_input or not text_input.strip():
142
- _log("❌ متن ورودی خالی.", log_list)
143
- return None
144
  text_chunks = smart_text_split(text_input, max_chunk, log_list)
145
- if not text_chunks:
146
- _log("❌ متن قابل پردازش نیست.", log_list)
147
- return None
148
  generated_files = []
149
  for i, chunk in enumerate(text_chunks):
150
  _log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)} (صدا: {selected_voice}, دما: {temperature_val})...", log_list)
151
  final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
 
152
  contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text)])]
153
- config = genai_types.GenerateContentConfig(
154
- temperature=temperature_val,
155
- response_modalities=["audio"],
156
- speech_config=genai_types.SpeechConfig(
157
- voice_config=genai_types.VoiceConfig(
158
- prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)
159
- )
160
- )
161
- )
162
- _log(f"کانفیگ API برای قطعه {i+1}: دما={temperature_val}, صدا={selected_voice}, مدالیته=['audio']", log_list)
163
  fname_base = f"{output_base_name}_part{i+1:03d}"
164
  try:
 
165
  response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
166
  if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
167
  inline_data = response.candidates[0].content.parts[0].inline_data
168
  data_buffer = inline_data.data
169
- mime_type = inline_data.mime_type
170
- _log(f"داده صوتی در candidate.part[0].inline_data برای قطعه {i+1} یافت شد.", log_list)
171
  ext = mimetypes.guess_extension(mime_type) or ".wav"
172
  if "audio/L" in mime_type and ext == ".wav":
173
- _log(f"تبدیل صدای خام PCM (MIME: {mime_type}) به WAV برای قطعه {i+1}.", log_list)
174
- data_buffer = convert_to_wav(data_buffer, mime_type)
175
  if not ext.startswith("."): ext = "." + ext
176
  fpath = save_binary_file(f"{fname_base}{ext}", data_buffer, log_list)
177
  if fpath: generated_files.append(fpath)
178
- else:
179
- _log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی در مسیر مورد انتظار.", log_list)
180
- _log(f"ساختار کامل پاسخ (اولین 500 کاراکتر): {str(response)[:500]}", log_list)
181
- except Exception as e:
182
- _log(f"❌ خطا در تولید قطعه {i+1}: {e}", log_list)
183
- continue
184
  if i < len(text_chunks) - 1 and len(text_chunks) > 1:
185
- _log(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از قطعه بعدی...", log_list)
186
  time.sleep(sleep_time)
187
- if not generated_files:
188
- _log("❌ هیچ فایلی تولید نشد.", log_list)
189
- return None
190
  _log(f"🎉 {len(generated_files)} فایل(های) صوتی تولید شد.", log_list)
 
191
  final_audio_file = None
192
  final_output_path_base = f"{output_base_name}_final"
 
193
  if len(generated_files) > 1:
194
  if PYDUB_AVAILABLE:
195
  merged_fn = f"{final_output_path_base}.wav"
196
  if os.path.exists(merged_fn):
197
  try: os.remove(merged_fn)
198
- except OSError: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}' (خطای سیستم عامل)", log_list)
199
  except Exception as e_rm: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e_rm}", log_list)
 
200
  if merge_audio_files_func(generated_files, merged_fn, log_list):
201
  final_audio_file = merged_fn
202
- for fp_path in generated_files:
203
- if os.path.abspath(fp_path) != os.path.abspath(merged_fn):
204
- try: os.remove(fp_path)
205
- except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}' (خطای سیستم عامل)", log_list)
206
- except Exception as e_del: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}': {e_del}", log_list)
207
- else:
208
- _log("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list)
209
- if generated_files:
210
  try:
 
211
  first_chunk_path = generated_files[0]
212
- target_ext = os.path.splitext(first_chunk_path)[1]
213
- fallback_fn = f"{final_output_path_base}{target_ext}"
214
- if os.path.exists(fallback_fn) and os.path.abspath(first_chunk_path) != os.path.abspath(fallback_fn):
215
- os.remove(fallback_fn)
216
- if os.path.abspath(first_chunk_path) != os.path.abspath(fallback_fn):
217
- os.rename(first_chunk_path, fallback_fn)
218
- final_audio_file = fallback_fn
219
- for i_gf in range(1, len(generated_files)):
220
- try: os.remove(generated_files[i_gf])
221
- except: pass
222
- except Exception as e_rename_fb:
223
- _log(f"خطا در تغییر نام فایل اولین قطعه: {e_rename_fb}", log_list)
 
224
  final_audio_file = generated_files[0]
225
  else:
226
  _log("⚠️ pydub نیست. اولین قطعه ارائه می‌شود.", log_list)
227
  if generated_files:
228
  try:
229
  first_chunk_path = generated_files[0]
230
- target_ext = os.path.splitext(first_chunk_path)[1]
231
- single_fallback_fn = f"{final_output_path_base}{target_ext}"
232
- if os.path.exists(single_fallback_fn) and os.path.abspath(first_chunk_path) != os.path.abspath(single_fallback_fn):
233
- os.remove(single_fallback_fn)
234
- if os.path.abspath(first_chunk_path) != os.path.abspath(single_fallback_fn):
235
- os.rename(first_chunk_path, single_fallback_fn)
236
- final_audio_file = single_fallback_fn
237
  for i_gf in range(1, len(generated_files)):
238
  try: os.remove(generated_files[i_gf])
239
  except: pass
240
- except Exception as e_rename_single_npd:
241
- _log(f"خطا در تغییر نام فایل اولین قطعه (بدون pydub): {e_rename_single_npd}", log_list)
242
  final_audio_file = generated_files[0]
243
  elif len(generated_files) == 1:
244
  try:
245
  single_file_path = generated_files[0]
246
- target_ext = os.path.splitext(single_file_path)[1]
247
- final_single_fn = f"{final_output_path_base}{target_ext}"
248
- if os.path.exists(final_single_fn) and os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
249
- os.remove(final_single_fn)
250
- if os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
251
- os.rename(single_file_path, final_single_fn)
252
- final_audio_file = final_single_fn
253
- except Exception as e_rename_sgl_final:
254
- _log(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_sgl_final}", log_list)
255
  final_audio_file = generated_files[0]
 
256
  if final_audio_file and not os.path.exists(final_audio_file):
257
  _log(f"⚠️ فایل نهایی '{final_audio_file}' وجود ندارد!", log_list)
258
  return None
259
- return final_audio_file
260
 
261
  def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature, progress=gr.Progress(track_tqdm=True)): # YOUR ORIGINAL SIGNATURE
262
  logs = []
@@ -271,200 +233,236 @@ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_pr
271
  else:
272
  actual_text = text_to_speak
273
  if not actual_text or not actual_text.strip(): return None
 
274
  final_path = core_generate_audio(actual_text, speech_prompt, speaker_voice, temperature, logs)
275
- # for log_entry in logs: print(log_entry) # Kept commented as in your original
276
- return final_path
277
- # --- END: Core TTS Logic from YOUR AlphaTTS_Original (UNCHANGED) ---
278
 
279
 
280
- # --- START: CSS for AlphaTranslator_Styled Appearance ---
281
  # (Using CSS variables from AlphaTranslator_Styled for colors and fonts)
282
- FLY_PRIMARY_COLOR_HEX = "#4F46E5"
283
- FLY_SECONDARY_COLOR_HEX = "#10B981"
284
- FLY_ACCENT_COLOR_HEX = "#D97706" # Orange from AlphaTranslator_Styled
285
- FLY_TEXT_COLOR_HEX = "#1F2937"
286
- FLY_SUBTLE_TEXT_HEX = "#6B7280"
287
- FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB"
288
- FLY_WHITE_HEX = "#FFFFFF"
289
- FLY_BORDER_COLOR_HEX = "#D1D5DB"
290
- FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
291
- # FLY_PANEL_BG_SIMPLE = "#E0F2FE" # Not directly used if panel structure changes
292
-
293
- # This is the CSS from the "AlphaTranslator_Styled" that you liked.
294
- # It will be applied to your Gradio app structure.
295
- # Some selectors might need slight adjustment if your `elem_id` or class usage is very different,
296
- # but the general look and feel should be achieved.
297
- alphatranslator_inspired_css = f"""
298
- @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
 
 
 
 
 
 
 
 
 
 
 
 
299
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
300
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
 
301
  :root {{
302
- --fly-primary: {FLY_PRIMARY_COLOR_HEX}; --fly-secondary: {FLY_SECONDARY_COLOR_HEX};
303
- --fly-accent: {FLY_ACCENT_COLOR_HEX}; --fly-text-primary: {FLY_TEXT_COLOR_HEX};
304
- --fly-text-secondary: {FLY_SUBTLE_TEXT_HEX}; --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
305
- --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
 
 
 
 
 
306
  --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE};
307
- /* --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE}; */ /* Specific panel bg, might not apply directly */
308
- --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif; /* Vazirmatn prioritized */
309
- --font-english: 'Poppins', 'Inter', system-ui, sans-serif;
310
- --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
311
- --shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
312
  --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
313
  --shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
314
- --fly-primary-rgb: 79,70,229; --fly-accent-rgb: 217,119,6;
 
 
 
315
  }}
316
- body {{ /* General body styling */
317
- font-family:var(--font-global);direction:rtl;background-color:var(--fly-bg-light);
318
- color:var(--fly-text-primary);line-height:1.7;font-size:16px;
319
- -webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;
 
 
 
 
 
 
320
  }}
321
- .gradio-container {{ /* Overall container to achieve the gradient background */
322
- max-width:100% !important;width:100% !important;min-height:100vh;
323
- margin:0 auto !important;padding:0 !important;border-radius:0 !important;
324
- box-shadow:none !important;background:linear-gradient(170deg, #E0F2FE 0%, #F3E8FF 100%);
325
- display:flex;flex-direction:column;
 
326
  }}
327
- /* Header styling using a distinct class to avoid conflicts if you run multiple apps */
328
- .app-header-alphatts-styled {{
329
- text-align:center;padding:2.5rem 1rem;margin:0;
330
- background:linear-gradient(135deg,var(--fly-primary) 0%,var(--fly-secondary) 100%);
331
- color:var(--fly-bg-white);border-bottom-left-radius:var(--radius-xl);
332
- border-bottom-right-radius:var(--radius-xl);box-shadow:var(--shadow-lg);
333
- position:relative;overflow:hidden;
 
334
  }}
335
  .app-header-alphatts-styled::before {{
336
- content:'';position:absolute;top:-50px;right:-50px;width:150px;height:150px;
337
- background:rgba(255,255,255,0.1);border-radius:var(--radius-full);
338
- opacity:0.5;transform:rotate(45deg);
339
  }}
340
  .app-header-alphatts-styled h1 {{
341
- font-size:2.25em !important;font-weight:800 !important;margin:0 0 0.5rem 0;
342
- font-family:var(--font-english);letter-spacing:-0.5px;text-shadow:0 2px 4px rgba(0,0,0,0.1);
343
  }}
344
  .app-header-alphatts-styled p {{
345
- font-size:1em !important;margin-top:0.25rem;font-weight:400;
346
  color:rgba(255,255,255,0.85) !important;
347
  }}
348
 
349
- /* Main content area where your Gradio components will reside */
350
- /* Your original AlphaTTS used `main-content-panel-alpha` for its main Column. */
351
- /* We'll try to style that or a new wrapper if needed. */
352
- .main-content-panel-alpha {{ /* Targeting your existing class for the main content block */
353
- background-color:var(--fly-bg-white);padding:1.5rem;border-radius:var(--radius-xl);
354
- box-shadow:var(--shadow-xl);margin:-2rem auto 2rem auto; /* Negative margin for overlap */
355
- position:relative;z-index:10;width:90%;max-width: 680px; /* Max width from your original CSS */
 
356
  }}
357
 
358
- /* Styling inputs within your main panel */
359
- .main-content-panel-alpha .gr-input > label + div > textarea,
360
- .main-content-panel-alpha .gr-dropdown > label + div > div > input,
361
- .main-content-panel-alpha .gr-dropdown > label + div > div > select,
362
- .main-content-panel-alpha .gr-textbox > label + div > textarea,
363
- .main-content-panel-alpha .gr-file > label + div
 
 
364
  {{
365
- border-radius:var(--radius-md) !important; /* Using a consistent radius */
366
  border:1.5px solid var(--fly-border-color) !important;
367
- font-size:0.95em !important;background-color:var(--fly-input-bg-simple) !important;
368
- padding:10px 12px !important;color:var(--fly-text-primary) !important;
 
 
 
369
  }}
370
- .main-content-panel-alpha .gr-input > label + div > textarea:focus,
371
- .main-content-panel-alpha .gr-dropdown > label + div > div > input:focus,
372
- .main-content-panel-alpha .gr-dropdown > label + div > div > select:focus,
373
- .main-content-panel-alpha .gr-textbox > label + div > textarea:focus,
374
- .main-content-panel-alpha .gr-file > label + div:focus-within
 
375
  {{
376
  border-color:var(--fly-primary) !important;
377
  box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;
378
  background-color:var(--fly-bg-white) !important;
379
  }}
380
- .main-content-panel-alpha .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
- /* Button styling: Applying the --fly-accent color */
383
- .main-content-panel-alpha .gr-button[elem_id="generate_button_alpha_v3"] /* Your button ID */
384
  {{
385
  background:var(--fly-accent) !important; /* Orange accent */
386
- margin-top:1.5rem !important;padding:12px 20px !important;
387
- transition:all 0.25s ease-in-out !important;color:white !important;font-weight:600 !important;
388
- border-radius:var(--radius-md) !important;border:none !important; /* Consistent radius */
389
  box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;
390
- width:100% !important;font-size:1em !important; /* Full width on mobile, adjust in media query */
391
- display:flex;align-items:center;justify-content:center;
392
  }}
393
- .main-content-panel-alpha .gr-button[elem_id="generate_button_alpha_v3"]:hover
394
  {{
395
  background:#B45309 !important; /* Darker orange */ transform:translateY(-1px) !important;
396
  box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;
397
  }}
398
 
399
- /* General label styling */
400
- .main-content-panel-alpha label > span.label-text
401
- {{
402
- font-weight:500 !important;color:#4B5563 !important;
403
- font-size:0.90em !important;margin-bottom:8px !important;display:inline-block;
404
- }}
405
-
406
- /* Your specific temperature description class */
407
- .main-content-panel-alpha .temp_description_class_alpha_v3 {{
408
- font-size: 0.85em; color: var(--fly-text-secondary); margin-top: -0.6rem; margin-bottom: 1.2rem;
409
  }}
410
 
411
- /* Audio Player styling */
412
- .main-content-panel-alpha #output_audio_player_alpha_v3 audio
413
  {{
414
  width: 100%; border-radius: var(--radius-md); margin-top:0.8rem;
415
  }}
416
 
417
- /* Examples section button styling */
418
- .main-content-panel-alpha div[label*="نمونه‌های کاربردی"] .gr-button.gr-button-tool,
419
- .main-content-panel-alpha div[label*="نمونه‌های کاربردی"] .gr-sample-button
420
  {{
421
- background-color:#E0E7FF !important;color:var(--fly-primary) !important;
422
- border-radius:var(--radius-sm) !important;font-size:0.8em !important;padding:5px 10px !important;
423
- }}
424
- .main-content-panel-alpha .custom-hr {{ /* If you add a hr manually */
425
- height:1px;background-color:var(--fly-border-color);margin:1.8rem 0;border:none;
426
  }}
 
427
 
428
- /* Footer styling */
 
429
  .app-footer-alphatts-styled {{
430
  text-align:center;font-size:0.85em;color:var(--fly-text-secondary);margin-top:2.5rem;
431
  padding:1rem 0;background-color:rgba(255,255,255,0.3);backdrop-filter:blur(5px);
432
  border-top:1px solid var(--fly-border-color);
433
  }}
434
- footer {{display:none !important;}} /* Hide default Gradio footer */
 
 
435
 
436
- /* Responsive adjustments */
437
  @media (min-width:640px) {{
 
 
438
  .app-header-alphatts-styled h1 {{font-size:2.5em !important;}}
439
  .app-header-alphatts-styled p {{font-size:1.05em !important;}}
440
  }}
441
  @media (min-width:768px) {{
442
- .main-content-panel-alpha {{padding:2rem;}}
443
- .main-content-panel-alpha .gr-button[elem_id="generate_button_alpha_v3"]
 
 
444
  {{
445
- width:auto !important;align-self:flex-start; /* Align to start on larger screens */
446
  }}
447
  .app-header-alphatts-styled h1 {{font-size:2.75em !important;}}
448
  .app-header-alphatts-styled p {{font-size:1.1em !important;}}
449
  }}
450
  """
451
- # --- END: CSS for AlphaTranslator_Styled Appearance ---
452
-
453
-
454
- # --- START: YOUR ORIGINAL GRADIO UI STRUCTURE ---
455
- # The theme `gr.themes.Base(font=[gr.themes.GoogleFont("Vazirmatn")])` is from your original.
456
- # We apply `app_theme_outer_styled` for body background and `alphatranslator_inspired_css` for specifics.
457
- # Note: Your original `custom_css_inspired_by_image` is replaced by `alphatranslator_inspired_css` here.
458
- with gr.Blocks(theme=app_theme_outer_styled, css=alphatranslator_inspired_css, title=f"آلفا TTS ({FIXED_MODEL_NAME.split('-')[1]})") as demo:
459
- # Applying the styled header
460
- # Your original HTML for header was:
461
- # alpha_header_html_v3 = """
462
- # <div class='app-header-alpha'>
463
- # <h1>Alpha TTS</h1>
464
- # <p>جادوی تبدیل متن به صدا در دستان شما</p>
465
- # </div>
466
- # """
467
- # We use the new class for styling:
468
  gr.HTML(f"""
469
  <div class='app-header-alphatts-styled'>
470
  <h1>🚀 Alpha TTS</h1>
@@ -472,108 +470,117 @@ with gr.Blocks(theme=app_theme_outer_styled, css=alphatranslator_inspired_css, t
472
  </div>
473
  """)
474
 
475
- # Your original main Column with its class `main-content-panel-alpha`
476
- # The CSS above will target this class to style the panel.
477
- with gr.Column(elem_classes=["main-content-panel-alpha"]): # YOUR ORIGINAL CLASS NAME
478
-
479
- # Warning if GEMINI_API_KEY is not set (useful addition, styled by CSS)
480
- if not os.environ.get("GEMINI_API_KEY"):
481
- missing_key_msg = (
482
- "⚠️ هشدار: متغیر محیطی GEMINI_API_KEY تنظیم نشده است. "
483
- "قابلیت تبدیل متن به گفتار احتمالاً کار نخواهد کرد."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  )
485
- gr.Markdown(f"<div class='api-warning-message'>{missing_key_msg}</div>") # api-warning-message is styled in AlphaTranslator
486
-
487
- # Your original UI components
488
- use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False, elem_id="use_file_cb_alpha_v3")
489
-
490
- uploaded_file_input = gr.File(
491
- label=" ",
492
- file_types=['.txt'],
493
- visible=False,
494
- elem_id="file_uploader_alpha_main_v3"
495
- )
496
-
497
- text_to_speak_tb = gr.Textbox(
498
- label="متن فارسی برای تبدیل",
499
- placeholder="مثال: سلام، فردا هوا چطور است؟",
500
- lines=5,
501
- value="",
502
- visible=True,
503
- elem_id="text_input_main_alpha_v3"
504
- )
505
-
506
- use_file_input_cb.change( # YOUR ORIGINAL CHANGE HANDLER
507
- fn=lambda x: (gr.update(visible=x, label=" " if x else "متن فارسی برای تبدیل"), gr.update(visible=not x)),
508
- inputs=use_file_input_cb,
509
- outputs=[uploaded_file_input, text_to_speak_tb]
510
- )
511
-
512
- speech_prompt_tb = gr.Textbox(
513
- label="سبک گفتار (اختیاری)",
514
- placeholder="مثال: با لحنی شاد و پرانرژی",
515
- value="با لحنی دوستانه و رسا صحبت کن.",
516
- lines=2, elem_id="speech_prompt_alpha_v3"
517
- )
518
-
519
- speaker_voice_dd = gr.Dropdown(
520
- SPEAKER_VOICES, label="انتخاب گوینده و لهجه", value="Charon", elem_id="speaker_voice_alpha_v3"
521
- )
522
-
523
- temperature_slider = gr.Slider(
524
- minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="میزان خلاقیت صدا",
525
- elem_id="temperature_slider_alpha_v3"
526
- )
527
- # Your original Markdown for temperature description
528
- gr.Markdown("<p class='temp_description_class_alpha_v3'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>")
529
 
530
- generate_button = gr.Button("🚀 تولید و پخش صدا", elem_id="generate_button_alpha_v3") # YOUR ORIGINAL BUTTON
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
- output_audio = gr.Audio(label=" ", type="filepath", elem_id="output_audio_player_alpha_v3") # YOUR ORIGINAL AUDIO OUTPUT
533
-
534
- # Your original Examples section
535
- gr.HTML("<hr class='custom-hr'>") # Added for visual separation, styled by CSS
536
- gr.Markdown( # YOUR ORIGINAL EXAMPLES TITLE MARKDOWN
537
- "<h3 class='section-title-main-alpha' style='margin-top:2.5rem; text-align:center; border-bottom:none;'>نمونه‌های کاربردی</h3>",
538
- elem_id="examples_section_title_v3"
539
- )
540
- gr.Examples( # YOUR ORIGINAL EXAMPLES
541
- examples=[
542
- [False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
543
- [False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی آلفا است.", "با صدایی طبیعی و روان.", "Charon", 0.9],
544
- ],
545
- inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
546
- outputs=[output_audio], # Your original output
547
- fn=gradio_tts_interface,
548
- cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true" # Optional caching
549
- )
550
 
551
- # Applying the styled footer
552
- # Your original footer was: gr.Markdown("<p class='app-footer-final'>Alpha Language Learning © 2024</p>")
553
- # We use the new class for styling:
554
- gr.HTML(f"<p class='app-footer-alphatts-styled'>Alpha TTS © 2024 - Model: {FIXED_MODEL_NAME.split('-')[1]}</p>")
555
 
556
 
557
- # --- Event Handlers (YOUR ORIGINAL CLICK HANDLER) ---
 
 
558
  if generate_button is not None: # Check if button was created
559
  generate_button.click(
560
  fn=gradio_tts_interface,
561
- inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
562
- outputs=[output_audio]
563
  )
564
  else:
565
- logging.error("دکمه generate_button_alpha_v3 به درستی ایجاد نشده است.")
566
- # --- END: YOUR ORIGINAL GRADIO UI STRUCTURE ---
 
567
 
568
  if __name__ == "__main__":
 
569
  if not PYDUB_AVAILABLE:
570
- logging.warning("Pydub (برای ادغام فایل‌های صوتی) یافت نشد. اگر چندین قطعه صوتی تولید شود، ادغام انجام نخواهد شد.")
571
  if not os.environ.get("GEMINI_API_KEY"):
572
- logging.warning("متغیر محیطی GEMINI_API_KEY تنظیم نشده است. TTS احتمالاً با خطا مواجه خواهد شد.")
573
 
574
  demo.launch(
575
- server_name="0.0.0.0",
576
- server_port=int(os.getenv("PORT", 7860)),
577
- debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true",
578
- show_error=True
579
  )
 
7
  import time
8
  # import zipfile # Not used in your original core logic
9
  from google import genai
10
+ from google.genai import types as genai_types # Aliased your 'types' import
 
 
11
 
12
  try:
13
  from pydub import AudioSegment
 
15
  except ImportError:
16
  PYDUB_AVAILABLE = False
17
 
18
+ # --- START: Your Original Core TTS Logic (UNCHANGED) ---
 
 
 
19
  SPEAKER_VOICES = [
20
  "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
21
  "Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
 
23
  "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
24
  "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
25
  ]
26
+ FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts" # YOUR DEFINED MODEL
27
  DEFAULT_MAX_CHUNK_SIZE = 3800
28
  DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
29
  DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
30
 
31
+ def _log(message, log_list): # YOUR _log function
32
  log_list.append(message)
33
+ # print(f"[AlphaTTS_LOG] {message}") # Optional: print to console for live debugging if needed
34
 
35
  def save_binary_file(file_name, data, log_list):
36
  try:
 
62
  except ValueError: pass
63
  return {"bits_per_sample": bits, "rate": rate}
64
 
65
+ def smart_text_split(text, max_size=3800, log_list=None): # YOUR smart_text_split
66
  if len(text) <= max_size: return [text]
67
  chunks, current_chunk = [], ""
68
+ sentences = re.split(r'(?<=[.!?؟۔])\s+', text) # Added Persian full stop from previous attempts for robustness
69
  for sentence in sentences:
70
  if len(current_chunk) + len(sentence) + 1 > max_size:
71
  if current_chunk: chunks.append(current_chunk.strip())
72
  current_chunk = sentence
73
  while len(current_chunk) > max_size:
74
+ # Using your original split_idx logic
75
+ split_idx = next((i for i in range(max_size - 1, max_size // 2, -1) if current_chunk[i] in ['،', ',', ';', ':', ' ']), -1)
76
+ part, current_chunk = (current_chunk[:split_idx+1], current_chunk[split_idx+1:]) if split_idx != -1 else (current_chunk[:max_size], current_chunk[max_size:])
 
 
 
 
 
 
 
 
 
77
  chunks.append(part.strip())
78
+ else: current_chunk += (" " if current_chunk else "") + sentence # Your original logic for adding sentence
79
  if current_chunk: chunks.append(current_chunk.strip())
80
  final_chunks = [c for c in chunks if c]
81
  if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
82
  return final_chunks
83
 
84
+ def merge_audio_files_func(file_paths, output_path, log_list): # YOUR merge_audio_files_func
85
+ if not PYDUB_AVAILABLE: _log("❌ pydub در دسترس نیست.", log_list); return False
 
 
86
  try:
87
  _log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
88
  combined = AudioSegment.empty()
89
  for i, fp in enumerate(file_paths):
90
  if os.path.exists(fp):
91
+ try: # Added try-except for individual file processing with pydub
92
  segment = AudioSegment.from_file(fp)
93
  combined += segment
94
  if i < len(file_paths) - 1:
95
+ combined += AudioSegment.silent(duration=150) # Your original logic
96
  except Exception as e_pydub:
97
  _log(f"⚠️ خطای Pydub در پردازش فایل '{fp}': {e_pydub}. از این فایل صرف نظر می شود.", log_list)
98
  continue
99
+ else: _log(f"⚠️ فایل پیدا نشد: {fp}", log_list)
100
+ if len(combined) == 0: # Added check if no valid segments were combined
 
101
  _log("❌ هیچ قطعه صوتی برای ادغام وجود ندارد.", log_list)
102
  return False
103
  combined.export(output_path, format="wav")
104
+ _log(f"✅ فایل ادغام شده: {output_path}", log_list); return True
105
+ except Exception as e: _log(f"❌ خطا در ادغام: {e}", log_list); return False
 
 
 
106
 
107
+ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list): # YOUR core_generate_audio
108
  output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
109
  max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
110
  _log(f"🚀 شروع فرآیند با مدل: {FIXED_MODEL_NAME}...", log_list)
111
+ api_key = os.environ.get("GEMINI_API_KEY") # YOUR API KEY METHOD
112
+ if not api_key: _log("❌ کلید API تنظیم نشده.", log_list); return None
113
+ try: client = genai.Client(api_key=api_key) # YOUR CLIENT INSTANTIATION
114
+ except Exception as e: _log(f"❌ خطا در کلاینت: {e}", log_list); return None
115
+ if not text_input or not text_input.strip(): _log("❌ متن ورودی خالی.", log_list); return None
 
 
 
 
 
 
 
 
116
  text_chunks = smart_text_split(text_input, max_chunk, log_list)
117
+ if not text_chunks: _log("❌ متن قابل پردازش نیست.", log_list); return None
118
+
 
119
  generated_files = []
120
  for i, chunk in enumerate(text_chunks):
121
  _log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)} (صدا: {selected_voice}, دما: {temperature_val})...", log_list)
122
  final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
123
+ # Using genai_types (aliased) as per your original imports
124
  contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text)])]
125
+ config = genai_types.GenerateContentConfig(temperature=temperature_val, response_modalities=["audio"],
126
+ speech_config=genai_types.SpeechConfig(voice_config=genai_types.VoiceConfig(
127
+ prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice))))
 
 
 
 
 
 
 
128
  fname_base = f"{output_base_name}_part{i+1:03d}"
129
  try:
130
+ # YOUR API CALL
131
  response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
132
  if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
133
  inline_data = response.candidates[0].content.parts[0].inline_data
134
  data_buffer = inline_data.data
135
+ mime_type = inline_data.mime_type # Get mime_type here
136
+ _log(f"داده صوتی در candidate.part[0].inline_data برای قطعه {i+1} یافت شد. MIME: {mime_type}", log_list)
137
  ext = mimetypes.guess_extension(mime_type) or ".wav"
138
  if "audio/L" in mime_type and ext == ".wav":
139
+ _log(f"تبدیل صدای خام PCM (MIME: {mime_type}) به WAV برای قطعه {i+1}.", log_list)
140
+ data_buffer = convert_to_wav(data_buffer, mime_type)
141
  if not ext.startswith("."): ext = "." + ext
142
  fpath = save_binary_file(f"{fname_base}{ext}", data_buffer, log_list)
143
  if fpath: generated_files.append(fpath)
144
+ else: _log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی در مسیر مورد انتظار.", log_list)
145
+ except Exception as e: _log(f" خطا در تولید قطعه {i+1}: {e}", log_list); continue # Your original error handling
 
 
 
 
146
  if i < len(text_chunks) - 1 and len(text_chunks) > 1:
147
+ _log(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از قطعه بعدی...", log_list) # Added log for sleep
148
  time.sleep(sleep_time)
149
+
150
+ if not generated_files: _log("❌ هیچ فایلی تولید نشد.", log_list); return None
 
151
  _log(f"🎉 {len(generated_files)} فایل(های) صوتی تولید شد.", log_list)
152
+
153
  final_audio_file = None
154
  final_output_path_base = f"{output_base_name}_final"
155
+
156
  if len(generated_files) > 1:
157
  if PYDUB_AVAILABLE:
158
  merged_fn = f"{final_output_path_base}.wav"
159
  if os.path.exists(merged_fn):
160
  try: os.remove(merged_fn)
 
161
  except Exception as e_rm: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e_rm}", log_list)
162
+
163
  if merge_audio_files_func(generated_files, merged_fn, log_list):
164
  final_audio_file = merged_fn
165
+ for fp in generated_files:
166
+ if os.path.abspath(fp) != os.path.abspath(merged_fn):
167
+ try: os.remove(fp)
168
+ except: pass # Your original silent pass
169
+ else:
170
+ if generated_files: # Should be true if merge_audio_files_func was attempted
 
 
171
  try:
172
+ # Your original logic for renaming the first chunk on merge fail
173
  first_chunk_path = generated_files[0]
174
+ target_name = f"{final_output_path_base}{os.path.splitext(first_chunk_path)[1]}"
175
+ if os.path.exists(target_name) and os.path.abspath(first_chunk_path) != os.path.abspath(target_name):
176
+ os.remove(target_name)
177
+ if os.path.abspath(first_chunk_path) != os.path.abspath(target_name):
178
+ os.rename(first_chunk_path, target_name)
179
+ final_audio_file = target_name
180
+ # Clean up other parts as per your original logic (silent pass on fail)
181
+ if final_audio_file: # If renaming succeeded for the first file
182
+ for i_gf in range(1, len(generated_files)):
183
+ try: os.remove(generated_files[i_gf])
184
+ except: pass
185
+ except Exception as e_rename: # Your original variable name for this exception
186
+ _log(f"خطا در تغییر نام فایل اولین قطعه: {e_rename}", log_list)
187
  final_audio_file = generated_files[0]
188
  else:
189
  _log("⚠️ pydub نیست. اولین قطعه ارائه می‌شود.", log_list)
190
  if generated_files:
191
  try:
192
  first_chunk_path = generated_files[0]
193
+ target_name = f"{final_output_path_base}{os.path.splitext(first_chunk_path)[1]}"
194
+ if os.path.exists(target_name) and os.path.abspath(first_chunk_path) != os.path.abspath(target_name):
195
+ os.remove(target_name)
196
+ if os.path.abspath(first_chunk_path) != os.path.abspath(target_name):
197
+ os.rename(first_chunk_path, target_name)
198
+ final_audio_file = target_name
 
199
  for i_gf in range(1, len(generated_files)):
200
  try: os.remove(generated_files[i_gf])
201
  except: pass
202
+ except Exception as e_rename_single: # Your original variable name
203
+ _log(f"خطا در تغییر نام فایل اولین قطعه (بدون pydub): {e_rename_single}", log_list)
204
  final_audio_file = generated_files[0]
205
  elif len(generated_files) == 1:
206
  try:
207
  single_file_path = generated_files[0]
208
+ target_name = f"{final_output_path_base}{os.path.splitext(single_file_path)[1]}"
209
+ if os.path.exists(target_name) and os.path.abspath(single_file_path) != os.path.abspath(target_name):
210
+ os.remove(target_name)
211
+ if os.path.abspath(single_file_path) != os.path.abspath(target_name):
212
+ os.rename(single_file_path, target_name)
213
+ final_audio_file = target_name
214
+ except Exception as e_rename_single_final: # Your original variable name
215
+ _log(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single_final}", log_list)
 
216
  final_audio_file = generated_files[0]
217
+
218
  if final_audio_file and not os.path.exists(final_audio_file):
219
  _log(f"⚠️ فایل نهایی '{final_audio_file}' وجود ندارد!", log_list)
220
  return None
221
+ return final_audio_file # Returns only path, as per your original
222
 
223
  def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature, progress=gr.Progress(track_tqdm=True)): # YOUR ORIGINAL SIGNATURE
224
  logs = []
 
233
  else:
234
  actual_text = text_to_speak
235
  if not actual_text or not actual_text.strip(): return None
236
+
237
  final_path = core_generate_audio(actual_text, speech_prompt, speaker_voice, temperature, logs)
238
+ # for log_entry in logs: print(log_entry) # Your original commented-out log printing
239
+ return final_path # YOUR ORIGINAL RETURN
240
+ # --- END: Your Original Core TTS Logic (UNCHANGED) ---
241
 
242
 
243
+ # --- START: Styling and UI (Applying AlphaTranslator_Styled look to YOUR UI structure) ---
244
  # (Using CSS variables from AlphaTranslator_Styled for colors and fonts)
245
+ FLY_PRIMARY_COLOR_HEX = "#4F46E5" # From AlphaTranslator_Styled
246
+ FLY_SECONDARY_COLOR_HEX = "#10B981" # From AlphaTranslator_Styled
247
+ FLY_ACCENT_COLOR_HEX = "#D97706" # Orange accent from AlphaTranslator_Styled (for buttons)
248
+ FLY_TEXT_COLOR_HEX = "#1F2937" # From AlphaTranslator_Styled
249
+ FLY_SUBTLE_TEXT_HEX = "#6B7280" # From AlphaTranslator_Styled
250
+ FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB" # From AlphaTranslator_Styled (overall page bg)
251
+ FLY_WHITE_HEX = "#FFFFFF" # From AlphaTranslator_Styled (panel bg)
252
+ FLY_BORDER_COLOR_HEX = "#D1D5DB" # From AlphaTranslator_Styled
253
+ FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6" # From AlphaTranslator_Styled (input bg)
254
+
255
+ # Your original AlphaTTS CSS variables for comparison or specific overrides if needed
256
+ APP_HEADER_GRADIENT_START_IMG_ORIGINAL = "#2980b9" # Blue from your original AlphaTTS header
257
+ APP_HEADER_GRADIENT_END_IMG_ORIGINAL = "#2ecc71" # Green from your original AlphaTTS header
258
+ BUTTON_BG_IMG_ORIGINAL = "#2979FF" # Blue from your original AlphaTTS button (generate-button-final)
259
+
260
+ # Theme for Gradio Blocks (using a base font from AlphaTranslator_Styled)
261
+ # Your original `gr.Blocks` used `theme=gr.themes.Base(font=[gr.themes.GoogleFont("Vazirmatn")])`
262
+ # We'll use `app_theme_outer_styled` for body background and apply Vazirmatn in CSS.
263
+ app_theme_outer_styled = gr.themes.Base(
264
+ font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], # Base font from AlphaTranslator
265
+ ).set(
266
+ body_background_fill=FLY_LIGHT_BACKGROUND_HEX, # Overall page background
267
+ )
268
+
269
+ # Combined and adapted CSS
270
+ # Prioritizing AlphaTranslator_Styled look, but using your specific elem_ids for targeting if possible
271
+ # The goal is to make YOUR UI components look like the AlphaTranslator_Styled components.
272
+ final_combined_css = f"""
273
+ @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;700;800&display=swap');
274
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
275
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
276
+
277
  :root {{
278
+ /* Colors from AlphaTranslator_Styled */
279
+ --fly-primary: {FLY_PRIMARY_COLOR_HEX};
280
+ --fly-secondary: {FLY_SECONDARY_COLOR_HEX};
281
+ --fly-accent: {FLY_ACCENT_COLOR_HEX}; /* Orange for primary actions */
282
+ --fly-text-primary: {FLY_TEXT_COLOR_HEX};
283
+ --fly-text-secondary: {FLY_SUBTLE_TEXT_HEX};
284
+ --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
285
+ --fly-bg-white: {FLY_WHITE_HEX};
286
+ --fly-border-color: {FLY_BORDER_COLOR_HEX};
287
  --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE};
288
+ --fly-primary-rgb: 79,70,229;
289
+ --fly-accent-rgb: 217,119,6;
290
+
291
+ /* Radii and Shadows from AlphaTranslator_Styled */
292
+ --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem;
293
  --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
294
  --shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
295
+
296
+ /* Font stack prioritizing Vazirmatn (from your original theme) then AlphaTranslator's */
297
+ --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
298
+ --font-english: 'Poppins', 'Inter', system-ui, sans-serif; /* For English text in header */
299
  }}
300
+
301
+ body {{
302
+ font-family: var(--font-global);
303
+ direction: rtl;
304
+ background-color: var(--fly-bg-light);
305
+ color: var(--fly-text-primary);
306
+ line-height: 1.7;
307
+ font-size: 16px;
308
+ -webkit-font-smoothing: antialiased;
309
+ -moz-osx-font-smoothing: grayscale;
310
  }}
311
+
312
+ .gradio-container {{ /* Overall container styling from AlphaTranslator_Styled */
313
+ max-width:100% !important; width:100% !important; min-height:100vh;
314
+ margin:0 auto !important; padding:0 !important; border-radius:0 !important;
315
+ box-shadow:none !important; background:linear-gradient(170deg, #E0F2FE 0%, #F3E8FF 100%);
316
+ display:flex; flex-direction:column;
317
  }}
318
+
319
+ /* Header: Using AlphaTranslator_Styled header structure and classes */
320
+ .app-header-alphatts-styled {{
321
+ text-align:center; padding:2.5rem 1rem; margin:0;
322
+ background:linear-gradient(135deg, var(--fly-primary) 0%, var(--fly-secondary) 100%); /* Blue-Green gradient */
323
+ color:var(--fly-bg-white); border-bottom-left-radius:var(--radius-xl);
324
+ border-bottom-right-radius:var(--radius-xl); box-shadow:var(--shadow-lg);
325
+ position:relative; overflow:hidden;
326
  }}
327
  .app-header-alphatts-styled::before {{
328
+ content:''; position:absolute; top:-50px; right:-50px; width:150px; height:150px;
329
+ background:rgba(255,255,255,0.1); border-radius:9999px; /* full */
330
+ opacity:0.5; transform:rotate(45deg);
331
  }}
332
  .app-header-alphatts-styled h1 {{
333
+ font-size:2.25em !important; font-weight:800 !important; margin:0 0 0.5rem 0;
334
+ font-family:var(--font-english); letter-spacing:-0.5px; text-shadow:0 2px 4px rgba(0,0,0,0.1);
335
  }}
336
  .app-header-alphatts-styled p {{
337
+ font-size:1em !important; margin-top:0.25rem; font-weight:400;
338
  color:rgba(255,255,255,0.85) !important;
339
  }}
340
 
341
+ /* Main content panel that will wrap YOUR UI elements */
342
+ .main-content-area-alphatts-styled {{
343
+ flex-grow:1; padding:0.75rem; width:100%; margin:0 auto; box-sizing:border-box;
344
+ }}
345
+ .content-panel-alphatts-styled {{ /* This will wrap your gr.Column */
346
+ background-color:var(--fly-bg-white); padding:1rem; border-radius:var(--radius-xl);
347
+ box-shadow:var(--shadow-xl); margin-top:-2rem; position:relative; z-index:10;
348
+ margin-bottom:2rem; width:100%; box-sizing:border-box;
349
  }}
350
 
351
+ /* Styling YOUR UI elements to match AlphaTranslator_Styled look */
352
+ /* Inputs (Textbox, Dropdown, File, Slider) */
353
+ .content-panel-alphatts-styled .gr-input > label + div > textarea,
354
+ .content-panel-alphatts-styled .gr-dropdown > label + div > div > input, /* For searchable dropdown */
355
+ .content-panel-alphatts-styled .gr-dropdown > label + div > div > select, /* For standard dropdown */
356
+ .content-panel-alphatts-styled .gr-textbox > label + div > textarea,
357
+ .content-panel-alphatts-styled .gr-file > label + div,
358
+ .content-panel-alphatts-styled .gr-slider input[type="range"] /* Basic slider styling */
359
  {{
360
+ border-radius:var(--radius-md) !important; /* Using AlphaTranslator's medium radius */
361
  border:1.5px solid var(--fly-border-color) !important;
362
+ font-size:0.95em !important;
363
+ background-color:var(--fly-input-bg-simple) !important;
364
+ padding:10px 12px !important;
365
+ color:var(--fly-text-primary) !important;
366
+ box-shadow: none !important; /* Remove default Gradio shadows if any */
367
  }}
368
+ .content-panel-alphatts-styled .gr-input > label + div > textarea:focus,
369
+ .content-panel-alphatts-styled .gr-dropdown > label + div > div > input:focus,
370
+ .content-panel-alphatts-styled .gr-dropdown > label + div > div > select:focus,
371
+ .content-panel-alphatts-styled .gr-textbox > label + div > textarea:focus,
372
+ .content-panel-alphatts-styled .gr-file > label + div:focus-within,
373
+ .content-panel-alphatts-styled .gr-slider input[type="range"]:focus
374
  {{
375
  border-color:var(--fly-primary) !important;
376
  box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;
377
  background-color:var(--fly-bg-white) !important;
378
  }}
379
+ .content-panel-alphatts-styled .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
380
+ .content-panel-alphatts-styled .gr-dropdown select {{ cursor:pointer; }}
381
+
382
+ /* Labels: General style from AlphaTranslator_Styled */
383
+ .content-panel-alphatts-styled .gr-form > div > .gr-block > label > .label-text, /* Common path for labels */
384
+ .content-panel-alphatts-styled .gr-form > .gr-block.gr-input-label-ただ > label > .label-text, /* Another common path */
385
+ .content-panel-alphatts-styled label > span.label-text /* General target */
386
+ {{
387
+ font-weight:500 !important; color: var(--fly-text-secondary) !important;
388
+ font-size:0.88em !important; margin-bottom:6px !important; display:inline-block;
389
+ }}
390
+ /* Your specific elem_id label styling from AlphaTTS_Original (icon part) - this is tricky with Gradio's default label structure */
391
+ /* The ::before icons might not apply correctly without JS or if Gradio doesn't use `for` attributes on labels properly. */
392
+ /* For now, the general label style above will apply. If icons are crucial, it needs more specific handling. */
393
 
394
+ /* Button: Targeting your button by its elem_id for the AlphaTranslator_Styled primary button look */
395
+ .content-panel-alphatts-styled .gr-button[elem_id="generate_button_alpha_v3"]
396
  {{
397
  background:var(--fly-accent) !important; /* Orange accent */
398
+ margin-top:1.5rem !important; padding:12px 20px !important;
399
+ transition:all 0.25s ease-in-out !important; color:white !important; font-weight:600 !important;
400
+ border-radius:var(--radius-md) !important; border:none !important;
401
  box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;
402
+ width:100% !important; font-size:1.05em !important; /* Your original font size */
403
+ display:flex; align-items:center; justify-content:center;
404
  }}
405
+ .content-panel-alphatts-styled .gr-button[elem_id="generate_button_alpha_v3"]:hover
406
  {{
407
  background:#B45309 !important; /* Darker orange */ transform:translateY(-1px) !important;
408
  box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;
409
  }}
410
 
411
+ /* Temperature description (from your original AlphaTTS CSS) */
412
+ .content-panel-alphatts-styled .temp_description_class_alpha_v3 {{
413
+ font-size: 0.85em; color: var(--fly-text-secondary); margin-top: -0.4rem; margin-bottom: 1rem;
 
 
 
 
 
 
 
414
  }}
415
 
416
+ /* Audio Player styling (targeting your specific elem_id) */
417
+ .content-panel-alphatts-styled #output_audio_player_alpha_v3 audio
418
  {{
419
  width: 100%; border-radius: var(--radius-md); margin-top:0.8rem;
420
  }}
421
 
422
+ /* Examples section (using AlphaTranslator_Styled examples button style) */
423
+ .content-panel-alphatts-styled .gr-examples .gr-button.gr-button-tool,
424
+ .content-panel-alphatts-styled .gr-examples .gr-sample-button
425
  {{
426
+ background-color:#E0E7FF !important; color:var(--fly-primary) !important;
427
+ border-radius:var(--radius-sm) !important; font-size:0.78em !important; padding:4px 8px !important;
 
 
 
428
  }}
429
+ .content-panel-alphatts-styled .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
430
 
431
+
432
+ /* Footer: Using AlphaTranslator_Styled footer */
433
  .app-footer-alphatts-styled {{
434
  text-align:center;font-size:0.85em;color:var(--fly-text-secondary);margin-top:2.5rem;
435
  padding:1rem 0;background-color:rgba(255,255,255,0.3);backdrop-filter:blur(5px);
436
  border-top:1px solid var(--fly-border-color);
437
  }}
438
+ /* Hide default Gradio watermarks/footers */
439
+ footer.svelte-1gfkn6j, .gradio-footer, .flagging-container, .footer-utils {{display:none !important; visibility:hidden !important;}}
440
+
441
 
442
+ /* Responsive adjustments from AlphaTranslator_Styled */
443
  @media (min-width:640px) {{
444
+ .main-content-area-alphatts-styled {{padding:1.5rem;max-width:700px;}}
445
+ .content-panel-alphatts-styled {{padding:1.5rem;}}
446
  .app-header-alphatts-styled h1 {{font-size:2.5em !important;}}
447
  .app-header-alphatts-styled p {{font-size:1.05em !important;}}
448
  }}
449
  @media (min-width:768px) {{
450
+ .main-content-area-alphatts-styled {{max-width:780px;}}
451
+ .content-panel-alphatts-styled {{padding:2rem;}}
452
+ /* Make button not full width on larger screens */
453
+ .content-panel-alphatts-styled .gr-button[elem_id="generate_button_alpha_v3"]
454
  {{
455
+ width:auto !important; align-self:flex-start;
456
  }}
457
  .app-header-alphatts-styled h1 {{font-size:2.75em !important;}}
458
  .app-header-alphatts-styled p {{font-size:1.1em !important;}}
459
  }}
460
  """
461
+
462
+ # --- Gradio UI Definition (YOUR UI structure, with new CSS applied) ---
463
+ # Using app_theme_outer_styled for base theme, and final_combined_css for specifics
464
+ with gr.Blocks(theme=app_theme_outer_styled, css=final_combined_css, title=f"آلفا TTS ({FIXED_MODEL_NAME.split('-')[1]})") as demo:
465
+ # Header from AlphaTranslator_Styled structure
 
 
 
 
 
 
 
 
 
 
 
 
466
  gr.HTML(f"""
467
  <div class='app-header-alphatts-styled'>
468
  <h1>🚀 Alpha TTS</h1>
 
470
  </div>
471
  """)
472
 
473
+ # Main content area wrapper from AlphaTranslator_Styled structure
474
+ with gr.Column(elem_classes=["main-content-area-alphatts-styled"]):
475
+ # Content panel wrapper from AlphaTranslator_Styled structure
476
+ # Your original UI was inside a gr.Column, which is fine. We wrap it with another for styling.
477
+ with gr.Column(elem_classes=["content-panel-alphatts-styled"]):
478
+
479
+ # Your original UI layout starts here
480
+ # `elem_id`s are from your original AlphaTTS code.
481
+
482
+ # Optional: Warning if GEMINI_API_KEY is not set
483
+ if not os.environ.get("GEMINI_API_KEY"):
484
+ missing_key_msg = (
485
+ "⚠️ هشدار: متغیر محیطی GEMINI_API_KEY تنظیم نشده است. "
486
+ "این برنامه برای کار کردن به آن نیاز دارد."
487
+ )
488
+ gr.Markdown(f"<div style='background-color:#FFFBEB; color:#92400E; padding:10px; border-radius:8px; border:1px solid #FDE68A; text-align:center; margin-bottom:1rem;'>{missing_key_msg}</div>")
489
+
490
+ use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False, elem_id="use_file_cb_alpha_v3")
491
+
492
+ uploaded_file_input = gr.File(
493
+ label=" ",
494
+ file_types=['.txt'],
495
+ visible=False,
496
+ elem_id="file_uploader_alpha_main_v3"
497
+ )
498
+
499
+ text_to_speak_tb = gr.Textbox(
500
+ label="متن فارسی برای تبدیل",
501
+ placeholder="مثال: سلام، فردا هوا چطور است؟",
502
+ lines=5,
503
+ value="",
504
+ visible=True,
505
+ elem_id="text_input_main_alpha_v3"
506
+ )
507
+
508
+ # Your original change function for checkbox
509
+ use_file_input_cb.change(
510
+ fn=lambda x: (gr.update(visible=x, label=" " if x else "متن فارسی برای تبدیل"), gr.update(visible=not x)),
511
+ inputs=use_file_input_cb,
512
+ outputs=[uploaded_file_input, text_to_speak_tb]
513
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ speech_prompt_tb = gr.Textbox(
516
+ label="سبک گفتار (اختیاری)",
517
+ placeholder="مثال: با لحنی شاد و پرانرژی",
518
+ value="با لحنی دوستانه و رسا صحبت کن.",
519
+ lines=2, elem_id="speech_prompt_alpha_v3"
520
+ )
521
+
522
+ speaker_voice_dd = gr.Dropdown(
523
+ SPEAKER_VOICES, label="انتخاب گوینده و لهجه", value="Charon", elem_id="speaker_voice_alpha_v3"
524
+ )
525
+
526
+ temperature_slider = gr.Slider(
527
+ minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="میزان خلاقیت صدا",
528
+ elem_id="temperature_slider_alpha_v3"
529
+ )
530
+ # Your original temperature description
531
+ gr.Markdown("<p class='temp_description_class_alpha_v3'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>")
532
+
533
+ # Your original button, CSS will target its elem_id
534
+ generate_button = gr.Button("🚀 تولید و پخش صدا", elem_id="generate_button_alpha_v3")
535
+
536
+ # Your original audio output, CSS will target its elem_id
537
+ output_audio = gr.Audio(label=" ", type="filepath", elem_id="output_audio_player_alpha_v3")
538
 
539
+ # Your original Examples section
540
+ gr.HTML("<hr class='custom-hr'>") # Using the styled HR
541
+ gr.Markdown(
542
+ "<h3 style='text-align:center; font-weight:500; color:var(--fly-text-secondary); margin-top:1.5rem; margin-bottom:1rem;'>نمونه‌های کاربردی</h3>",
543
+ # elem_id="examples_section_title_v3" # Your original elem_id for the examples title
544
+ )
545
+ gr.Examples(
546
+ examples=[
547
+ [False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
548
+ [False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی آلفا است.", "با صدایی طبیعی و روان.", "Charon", 0.9],
549
+ ],
550
+ inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
551
+ outputs=[output_audio], # YOUR ORIGINAL OUTPUTS FOR EXAMPLES
552
+ fn=gradio_tts_interface,
553
+ cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true" # Optional caching
554
+ )
 
 
555
 
556
+ # Footer from AlphaTranslator_Styled structure
557
+ gr.Markdown(f"<p class='app-footer-alphatts-styled'>Alpha TTS © 2024 - Model: {FIXED_MODEL_NAME.split('-')[0].upper()} {FIXED_MODEL_NAME.split('-')[1]}</p>")
 
 
558
 
559
 
560
+ # --- Event Handlers (YOUR ORIGINAL EVENT HANDLERS) ---
561
+ # Note: The `generate_button.click` from your original code did not include `progress` in inputs.
562
+ # I'm keeping it that way to match your original exactly.
563
  if generate_button is not None: # Check if button was created
564
  generate_button.click(
565
  fn=gradio_tts_interface,
566
+ inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider], # NO progress
567
+ outputs=[output_audio]
568
  )
569
  else:
570
+ # This should not happen if the UI is defined correctly
571
+ logging.error("دکمه تولید صدا (generate_button_alpha_v3) در UI یافت نشد.")
572
+
573
 
574
  if __name__ == "__main__":
575
+ # Basic check for PYDUB at launch
576
  if not PYDUB_AVAILABLE:
577
+ print("WARNING: Pydub (for audio merging) not found. Please install with `pip install pydub`. Merging will be disabled.")
578
  if not os.environ.get("GEMINI_API_KEY"):
579
+ print("WARNING: GEMINI_API_KEY environment variable not set. TTS functionality will likely fail.")
580
 
581
  demo.launch(
582
+ server_name="0.0.0.0", # Make accessible on network
583
+ server_port=int(os.getenv("PORT", 7860)), # Use PORT from env or default
584
+ debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true", # Optional debug mode
585
+ show_error=True # Show errors in the browser
586
  )