Hamed744 commited on
Commit
d29f80b
·
verified ·
1 Parent(s): 6fd3544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +367 -326
app.py CHANGED
@@ -1,38 +1,28 @@
1
  import gradio as gr
2
- # import base64 # Not used in your core logic
3
  import mimetypes
4
  import os
5
  import re
6
  import struct
7
  import time
8
- # import zipfile # Not used in your core logic
9
  from google import genai
10
- from google.genai import types # Your original import name for genai.types
11
 
12
- # Logging import - this is generally good practice and was in the styled version.
13
- # If you strictly want NO changes, this can be removed, but it doesn't affect core logic.
14
- import logging
15
- # threading and sys were for auto-restart, which is an added feature.
16
- # If you strictly want NO changes from your original Python logic, these can be removed.
17
- # import threading
18
- # import sys
19
- import traceback # Useful for debugging, kept for now.
20
 
21
  try:
22
  from pydub import AudioSegment
23
  PYDUB_AVAILABLE = True
24
  except ImportError:
25
  PYDUB_AVAILABLE = False
 
26
 
27
- # --- START: Logging Configuration (Minimal, can be removed if strictly no additions) ---
28
- # This was part of the styled version for better output.
29
- # If you prefer no logging from this part, remove this block.
30
- if not logging.getLogger().hasHandlers(): # Avoid reconfiguring if already set
31
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
32
- # --- END: Logging Configuration ---
33
 
34
-
35
- # --- START: YOUR ORIGINAL TTS Core Logic (UNCHANGED) ---
36
  SPEAKER_VOICES = [
37
  "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
38
  "Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
@@ -40,15 +30,14 @@ SPEAKER_VOICES = [
40
  "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
41
  "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
42
  ]
43
- FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts" # YOUR DEFINED MODEL
44
  DEFAULT_MAX_CHUNK_SIZE = 3800
45
  DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
46
  DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
47
 
48
  def _log(message, log_list): # YOUR _log function
49
  log_list.append(message)
50
- # Optionally, also print to standard logs for better visibility in Hugging Face logs
51
- # logging.info(f"[AlphaTTS_User_Log] {message}") # You can uncomment this if you want
52
 
53
  def save_binary_file(file_name, data, log_list):
54
  try:
@@ -74,27 +63,35 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int]:
74
  param = param.strip()
75
  if param.lower().startswith("rate="):
76
  try: rate = int(param.split("=", 1)[1])
77
- except ValueError: pass # Keep default if parse fails
78
  elif param.startswith("audio/L"):
79
  try: bits = int(param.split("L", 1)[1])
80
- except ValueError: pass # Keep default if parse fails
81
  return {"bits_per_sample": bits, "rate": rate}
82
 
83
  def smart_text_split(text, max_size=3800, log_list=None):
84
  if len(text) <= max_size: return [text]
85
  chunks, current_chunk = [], ""
86
- # Using your original regex for sentence splitting
87
- sentences = re.split(r'(?<=[.!?؟])\s+', text) # Original regex
88
  for sentence in sentences:
89
  if len(current_chunk) + len(sentence) + 1 > max_size:
90
  if current_chunk: chunks.append(current_chunk.strip())
91
  current_chunk = sentence
92
  while len(current_chunk) > max_size:
93
- # Using your original next() based split_idx logic
94
- split_idx = next((i for i in range(max_size - 1, max_size // 2, -1) if current_chunk[i] in ['،', ',', ';', ':', ' ']), -1)
95
- part, current_chunk = (current_chunk[:split_idx+1], current_chunk[split_idx+1:]) if split_idx != -1 else (current_chunk[:max_size], current_chunk[max_size:])
 
 
 
 
 
 
 
 
 
96
  chunks.append(part.strip())
97
- else: current_chunk += (" " if current_chunk and sentence else "") + sentence # Added check for sentence to avoid leading space if sentence is empty
98
  if current_chunk: chunks.append(current_chunk.strip())
99
  final_chunks = [c for c in chunks if c]
100
  if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
@@ -102,55 +99,53 @@ def smart_text_split(text, max_size=3800, log_list=None):
102
 
103
  def merge_audio_files_func(file_paths, output_path, log_list):
104
  if not PYDUB_AVAILABLE:
105
- _log("❌ pydub در دسترس نیست.", log_list)
106
  return False
107
  try:
108
  _log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
109
  combined = AudioSegment.empty()
110
  for i, fp in enumerate(file_paths):
111
  if os.path.exists(fp):
112
- try: # Added try-except for individual file processing
113
  segment = AudioSegment.from_file(fp)
114
  combined += segment
115
  if i < len(file_paths) - 1:
116
  combined += AudioSegment.silent(duration=150)
117
- except Exception as e_pydub_merge:
118
- _log(f"⚠️ خطای Pydub در پردازش فایل '{fp}' هنگام ادغام: {e_pydub_merge}. از این فایل صرف نظر می شود.", log_list)
119
  continue
120
- else: _log(f"⚠️ فایل پیدا نشد: {fp}", log_list)
121
-
122
- if len(combined) == 0: # Check if anything was combined
123
- _log("❌ هیچ قطعه صوتی برای ادغام موفقیت آمیز نبود.", log_list)
124
  return False
125
-
126
  combined.export(output_path, format="wav")
127
  _log(f"✅ فایل ادغام شده: {output_path}", log_list)
128
  return True
129
  except Exception as e:
130
- _log(f"❌ خطا در ادغام: {e}", log_list)
131
  return False
132
 
133
- def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list):
134
  output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
135
  max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
136
  _log(f"🚀 شروع فرآیند با مدل: {FIXED_MODEL_NAME}...", log_list)
137
 
138
- api_key = os.environ.get("GEMINI_API_KEY") # YOUR METHOD
139
  if not api_key:
140
- _log("❌ کلید API تنظیم نشده.", log_list)
141
- return None # Return only path as per your original gradio_tts_interface
142
 
143
  try:
144
- client = genai.Client(api_key=api_key) # YOUR METHOD
145
  _log(f"کلاینت Gemini با کلید API برای مدل {FIXED_MODEL_NAME} مقداردهی اولیه شد.", log_list)
146
  except Exception as e:
147
- _log(f"❌ خطا در کلاینت: {e}", log_list)
148
  return None
149
 
150
  if not text_input or not text_input.strip():
151
  _log("❌ متن ورودی خالی.", log_list)
152
  return None
153
-
154
  text_chunks = smart_text_split(text_input, max_chunk, log_list)
155
  if not text_chunks:
156
  _log("❌ متن قابل پردازش نیست.", log_list)
@@ -158,72 +153,56 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
158
 
159
  generated_files = []
160
  for i, chunk in enumerate(text_chunks):
161
- _log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list)
162
  final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
163
 
164
- # YOUR ORIGINAL API CALL STRUCTURE
165
- contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
166
- config = types.GenerateContentConfig(
 
167
  temperature=temperature_val,
168
  response_modalities=["audio"],
169
- speech_config=types.SpeechConfig(
170
- voice_config=types.VoiceConfig(
171
- prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=selected_voice)
172
  )
173
  )
174
  )
175
  _log(f"کانفیگ API برای قطعه {i+1}: دما={temperature_val}, صدا={selected_voice}, مدالیته=['audio']", log_list)
176
-
177
  fname_base = f"{output_base_name}_part{i+1:03d}"
178
  try:
 
179
  response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
180
 
181
- audio_bytes = None
182
- mime_type = None
183
-
184
  if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
185
  inline_data = response.candidates[0].content.parts[0].inline_data
186
- audio_bytes = inline_data.data
187
  mime_type = inline_data.mime_type
188
  _log(f"داده صوتی در candidate.part[0].inline_data برای قطعه {i+1} یافت شد.", log_list)
 
 
 
 
 
 
 
189
  else:
190
  _log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی در مسیر مورد انتظار.", log_list)
191
- # logging.debug(f"Full response for chunk {i+1}: {response}") # More detailed log if needed
192
- continue
193
-
194
- if not mime_type:
195
- _log(f"⚠️ MIME type برای قطعه {i+1} تعیین نشد. پیش فرض audio/wav.", log_list)
196
- mime_type = "audio/wav"
197
-
198
- ext = mimetypes.guess_extension(mime_type) or ".wav"
199
- if "audio/L" in mime_type and ext == ".wav":
200
- _log(f"تبدیل صدای خام PCM (MIME: {mime_type}) به WAV برای قطعه {i+1}.", log_list)
201
- audio_bytes = convert_to_wav(audio_bytes, mime_type)
202
- if not ext.startswith("."): ext = "." + ext
203
-
204
- fpath = save_binary_file(f"{fname_base}{ext}", audio_bytes, log_list)
205
- if fpath:
206
- generated_files.append(fpath)
207
-
208
- except types.StopCandidateException as e_stop: # YOUR ORIGINAL EXCEPTION HANDLING
209
- _log(f"❌ تولید برای قطعه {i+1} توسط API متوقف شد: {e_stop}", log_list)
210
- _log(f"دلیل توقف: {e_stop.finish_reason if hasattr(e_stop, 'finish_reason') else 'N/A'}", log_list)
211
  continue
212
- except Exception as e: # YOUR ORIGINAL EXCEPTION HANDLING
213
- _log(f"❌ خطا در تولید قطعه {i+1}: {type(e).__name__} - {e}", log_list)
214
- # logging.error(f"Full traceback for error in chunk {i+1}: {traceback.format_exc()}") # More detailed log
215
- continue
216
-
217
  if i < len(text_chunks) - 1 and len(text_chunks) > 1:
218
  _log(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از قطعه بعدی...", log_list)
219
  time.sleep(sleep_time)
220
 
221
  if not generated_files:
222
  _log("❌ هیچ فایلی تولید نشد.", log_list)
223
- return None
224
-
225
  _log(f"🎉 {len(generated_files)} فایل(های) صوتی تولی�� شد.", log_list)
226
-
227
  final_audio_file = None
228
  final_output_path_base = f"{output_base_name}_final"
229
 
@@ -232,56 +211,54 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
232
  merged_fn = f"{final_output_path_base}.wav"
233
  if os.path.exists(merged_fn):
234
  try: os.remove(merged_fn)
235
- except OSError: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}'.", log_list)
 
236
 
237
  if merge_audio_files_func(generated_files, merged_fn, log_list):
238
  final_audio_file = merged_fn
239
- for fp_path in generated_files: # Corrected variable name
240
  if os.path.abspath(fp_path) != os.path.abspath(merged_fn):
241
  try: os.remove(fp_path)
242
- except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}'.", log_list)
243
- else:
 
244
  _log("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list)
245
  if generated_files:
246
  try:
247
- # Renaming logic from your original code
248
  first_chunk_path = generated_files[0]
249
- target_final_name = f"{final_output_path_base}{os.path.splitext(first_chunk_path)[1]}"
250
- if os.path.exists(target_final_name) and os.path.abspath(first_chunk_path) != os.path.abspath(target_final_name):
251
- os.remove(target_final_name)
252
- if os.path.abspath(first_chunk_path) != os.path.abspath(target_final_name):
253
- os.rename(first_chunk_path, target_final_name)
254
- final_audio_file = target_final_name
255
- except Exception as e_rename:
256
- _log(f"خطا در تغییر نام فایل اولین قطعه (fallback): {e_rename}", log_list)
 
 
 
 
257
  final_audio_file = generated_files[0]
258
-
259
- if final_audio_file: # Additional cleanup from your original code
260
- for fp_cleanup in generated_files:
261
- if os.path.abspath(fp_cleanup) != os.path.abspath(final_audio_file):
262
- try: os.remove(fp_cleanup)
263
- except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت دیگر '{fp_cleanup}'.", log_list)
264
  else:
265
- _log("⚠️ pydub در دسترس نیست. اولین قطعه ارائه می‌شود.", log_list)
266
  if generated_files:
267
  try:
268
- # Renaming logic from your original code
269
  first_chunk_path = generated_files[0]
270
- target_final_name = f"{final_output_path_base}{os.path.splitext(first_chunk_path)[1]}"
271
- if os.path.exists(target_final_name) and os.path.abspath(first_chunk_path) != os.path.abspath(target_final_name):
272
- os.remove(target_final_name)
273
- if os.path.abspath(first_chunk_path) != os.path.abspath(target_final_name):
274
- os.rename(first_chunk_path, target_final_name)
275
- final_audio_file = target_final_name
 
276
  for i_gf in range(1, len(generated_files)):
277
  try: os.remove(generated_files[i_gf])
278
- except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت دیگر '{generated_files[i_gf]}'.", log_list)
279
- except Exception as e_rename_single:
280
- _log(f"خطا در تغییر نام فایل اولین قطعه (بدون pydub): {e_rename_single}", log_list)
281
  final_audio_file = generated_files[0]
282
  elif len(generated_files) == 1:
283
  try:
284
- # Renaming logic from your original code
285
  single_file_path = generated_files[0]
286
  target_ext = os.path.splitext(single_file_path)[1]
287
  final_single_fn = f"{final_output_path_base}{target_ext}"
@@ -290,44 +267,41 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
290
  if os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
291
  os.rename(single_file_path, final_single_fn)
292
  final_audio_file = final_single_fn
293
- except Exception as e_rename_single_final:
294
- _log(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single_final}", log_list)
295
  final_audio_file = generated_files[0]
296
 
297
  if final_audio_file and not os.path.exists(final_audio_file):
298
  _log(f"⚠️ فایل نهایی '{final_audio_file}' وجود ندارد!", log_list)
299
  return None
300
 
301
- return final_audio_file # Return only path as per your original
302
 
303
- # YOUR ORIGINAL gradio_tts_interface (UNCHANGED, except for progress removal for simplicity now)
304
- def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature): # progress=gr.Progress(track_tqdm=True) was in your original
305
  logs = []
306
  actual_text = ""
307
  if use_file_input:
308
- if uploaded_file: # Gradio File object is passed directly
309
  try:
310
- # uploaded_file.name is the path to the temporary file Gradio creates
311
  with open(uploaded_file.name, 'r', encoding='utf-8') as f: actual_text = f.read().strip()
312
- if not actual_text: return None # Return None if file is empty
313
  except Exception as e: _log(f"❌ خطا خواندن فایل: {e}", logs); return None
314
- else: return None # No file uploaded when checkbox is true
315
  else:
316
  actual_text = text_to_speak
317
- if not actual_text or not actual_text.strip(): return None # Return None if text is empty
318
 
319
  final_path = core_generate_audio(actual_text, speech_prompt, speaker_voice, temperature, logs)
320
-
321
- # Your original code did not print logs here, but it's useful for debugging in Hugging Face console
322
- # for log_entry in logs:
323
- # print(f"[AlphaTTS_RunLog] {log_entry}")
324
-
325
- return final_path # Returns only the audio path
326
- # --- END: YOUR ORIGINAL TTS Core Logic ---
327
 
328
 
329
  # --- START: Gradio UI with AlphaTranslator_Styled Appearance ---
330
- # (CSS and HTML structure from previous combined version, applied to your original UI elements)
331
  FLY_PRIMARY_COLOR_HEX = "#4F46E5"
332
  FLY_SECONDARY_COLOR_HEX = "#10B981"
333
  FLY_ACCENT_COLOR_HEX = "#D97706"
@@ -339,14 +313,18 @@ FLY_BORDER_COLOR_HEX = "#D1D5DB"
339
  FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
340
  FLY_PANEL_BG_SIMPLE = "#E0F2FE"
341
 
342
- app_theme_outer = gr.themes.Base(
343
  font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
344
  ).set(
345
  body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
346
  )
347
 
348
- custom_css_combined = f"""
 
 
 
349
  @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
 
350
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
351
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
352
  :root {{
@@ -355,226 +333,289 @@ custom_css_combined = f"""
355
  --fly-text-secondary: {FLY_SUBTLE_TEXT_HEX}; --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
356
  --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
357
  --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
358
- --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
359
  --font-english: 'Poppins', 'Inter', system-ui, sans-serif;
360
  --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
361
- --shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
362
- --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
363
  --shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
364
  --fly-primary-rgb: 79,70,229; --fly-accent-rgb: 217,119,6;
365
-
366
- /* Variables from your original AlphaTTS CSS (custom_css_inspired_by_image) for consistency */
367
- --app-font: 'Vazirmatn', sans-serif;
368
- --app-header-grad-start: #2980b9; /* Blue from your original CSS */
369
- --app-header-grad-end: #2ecc71; /* Green from your original CSS */
370
- --app-panel-bg: #FFFFFF; /* Panel BG from your original CSS */
371
- --app-input-bg: #F7F7F7; /* Input BG from your original CSS */
372
- --app-button-bg: #2979FF; /* Button BG from your original CSS */
373
- --app-main-bg: linear-gradient(170deg, #E0F2FE 0%, #F3E8FF 100%); /* Main BG from your original CSS */
374
- --app-text-primary: #333;
375
- --app-text-secondary: #555;
376
- --app-border-color: #E0E0E0;
377
- --radius-card: 20px;
378
- --radius-input: 8px;
379
- --shadow-card: 0 10px 30px -5px rgba(0,0,0,0.1);
380
- --shadow-button: 0 4px 10px -2px rgba(41,121,255,0.5);
 
 
 
 
 
 
 
 
 
 
381
  }}
382
- body {{font-family:var(--font-global);direction:rtl;background:var(--app-main-bg);color:var(--app-text-primary);line-height:1.7;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-size:16px;}}
383
- .gradio-container {{max-width:100% !important;width:100% !important;min-height:100vh;margin:0 auto !important;padding:0 !important;border-radius:0 !important;box-shadow:none !important;display:flex;flex-direction:column;}}
384
-
385
- /* Header styling from your original AlphaTTS CSS (alpha_header_html_v3) */
386
- .app-header-alpha {{
387
- padding: 3rem 1.5rem 4rem 1.5rem; text-align: center;
388
- background-image: linear-gradient(135deg, var(--app-header-grad-start) 0%, var(--app-header-grad-end) 100%);
389
- color: white; border-bottom-left-radius: var(--radius-card); border-bottom-right-radius: var(--radius-card);
390
- box-shadow: 0 6px 20px -5px rgba(0,0,0,0.2);
391
  }}
392
- .app-header-alpha h1 {{ font-size: 2.4em !important; font-weight: 800 !important; margin:0 0 0.5rem 0; text-shadow: 0 2px 4px rgba(0,0,0,0.15); }}
393
- .app-header-alpha p {{ font-size: 1.1em !important; color: rgba(255,255,255,0.9) !important; margin-top:0; opacity: 0.9; }}
394
-
395
- /* Main content panel from your original AlphaTTS CSS (main-content-panel-alpha) */
396
- .main-content-panel-alpha {{
397
- padding: 1.8rem 1.5rem; max-width: 680px; margin: -2.5rem auto 2rem auto;
398
- width: 90%; background-color: var(--app-panel-bg) !important; /* Use var */
399
- border-radius: var(--radius-card) !important; /* Use var */
400
- box-shadow: var(--shadow-card) !important; /* Use var */
401
- position:relative; z-index:10;
402
  }}
403
- @media (max-width: 768px) {{
404
- .main-content-panel-alpha {{ width: 95%; padding: 1.5rem 1rem; margin-top: -2rem; }}
405
- .app-header-alpha h1 {{font-size:2em !important;}}
406
- .app-header-alpha p {{font-size:1em !important;}}
407
  }}
408
 
409
- /* Button styling from your original AlphaTTS CSS (generate-button-final) */
410
- .content-panel-simple .gr-button.generate-button-tts
411
- {{
412
- background: var(--app-button-bg) !important; color: white !important; border:none !important;
413
- border-radius: var(--radius-input) !important; padding: 0.8rem 1.5rem !important;
414
- font-weight: 700 !important; font-size:1.05em !important;
415
- transition: all 0.3s ease; box-shadow: var(--shadow-button) !important; /* Use var */
416
- width:100% !important; margin-top:1.5rem !important;
417
  }}
418
- .content-panel-simple .gr-button.generate-button-tts:hover
419
- {{
420
- filter: brightness(1.1); transform: translateY(-2px);
421
- box-shadow: 0 6px 12px -3px rgba(41,121,255,0.6) !important; /* Use var with specific color */
422
  }}
423
 
424
- /* Input styling from your original AlphaTTS CSS */
425
- .content-panel-simple .gr-input > label + div > textarea,
426
- .content-panel-simple .gr-dropdown > label + div > div > input,
427
- .content-panel-simple .gr-dropdown > label + div > div > select,
428
- .content-panel-simple .gr-textbox > label + div > textarea,
429
- .content-panel-simple .gr-file > label + div
430
- {{
431
- border-radius: var(--radius-input) !important; border: 1px solid var(--app-border-color) !important;
432
- background-color: var(--app-input-bg) !important;
433
- box-shadow: inset 0 1px 2px rgba(0,0,0,0.05); padding: 0.75rem !important;
 
 
434
  }}
435
- .content-panel-simple .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
436
- .content-panel-simple .gr-input > label + div > textarea:focus,
437
- .content-panel-simple .gr-dropdown > label + div > div > input:focus,
438
- .content-panel-simple .gr-textbox > label + div > textarea:focus
439
- {{
440
- border-color: var(--app-button-bg) !important;
441
- box-shadow: 0 0 0 3px rgba(41,121,255,0.2) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  }}
443
- /* Label text from your original AlphaTTS CSS */
444
- .content-panel-simple label > .label-text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  {{
446
- font-weight: 700 !important; color: var(--app-text-primary) !important;
447
- font-size: 0.95em !important; margin-bottom: 0.5rem !important;
448
  }}
449
- /* Your original icon styling (if elem_ids match) */
450
- label[for*="text_input_main_alpha_v3"] > .label-text::before {{ content: '📝'; margin-left: 8px; vertical-align: middle; opacity: 0.7;}}
451
- label[for*="speech_prompt_alpha_v3"] > .label-text::before {{ content: '🗣️'; margin-left: 8px; vertical-align: middle; opacity: 0.7;}}
452
- label[for*="speaker_voice_alpha_v3"] > .label-text::before {{ content: '🎤'; margin-left: 8px; vertical-align: middle; opacity: 0.7;}}
453
- label[for*="temperature_slider_alpha_v3"] > .label-text::before {{ content: '🌡️'; margin-left: 8px; vertical-align: middle; opacity: 0.7;}}
454
-
455
- /* Output audio player and temperature description from your original AlphaTTS CSS */
456
- #output_audio_player_alpha_v3 audio, .output-audio-player-tts audio {{ width: 100%; border-radius: var(--radius-input); margin-top:0.8rem; }}
457
- .temp_description_class_alpha_v3 {{ font-size: 0.85em; color: #777; margin-top: -0.4rem; margin-bottom: 1rem; }}
458
-
459
- /* Footer from your original AlphaTTS CSS */
460
- .app-footer-final {{
461
- text-align:center;font-size:0.9em;color: var(--app-text-secondary);opacity:0.8;
462
- margin-top:3rem;padding:1.5rem 0; border-top:1px solid var(--app-border-color);
463
  }}
464
- /* Hide default Gradio footer */
465
- footer.svelte-11rogk8, .gradio-footer.svelte-1k000mq {{display:none !important;}}
466
- """
 
 
 
 
 
 
 
467
 
468
- # Header HTML from your original AlphaTTS
469
- alpha_header_html_v3 = """
470
- <div class='app-header-alpha'>
471
- <h1>Alpha TTS</h1>
472
- <p>جادوی تبدیل متن به صدا در دستان شما</p>
473
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
474
  """
475
 
476
- if not PYDUB_AVAILABLE:
477
- logging.warning("Pydub (for audio merging) not found. Merging will be disabled.")
478
- if not os.environ.get("GEMINI_API_KEY"):
479
- logging.warning("GEMINI_API_KEY environment variable not set. TTS will likely fail.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
- with gr.Blocks(theme=gr.themes.Base(font=[gr.themes.GoogleFont("Vazirmatn")]), css=custom_css_combined, title=f"آلفا TTS ({FIXED_MODEL_NAME.split('-')[1]})") as demo:
483
- gr.HTML(alpha_header_html_v3) # YOUR HEADER
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
 
485
- # Using 'main-content-panel-alpha' class for the main content area as in your original CSS
486
- with gr.Column(elem_classes=["main-content-panel-alpha"]):
487
- # YOUR ORIGINAL UI ELEMENTS (with original elem_ids for CSS)
488
- use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False, elem_id="use_file_cb_alpha_v3")
489
-
490
- uploaded_file_input = gr.File(
491
- label=" ",
492
- file_types=['.txt'],
493
- visible=False,
494
- elem_id="file_uploader_alpha_main_v3" # YOUR ELEM_ID
495
- )
496
-
497
- text_to_speak_tb = gr.Textbox(
498
- label="متن فارسی برای تبدیل",
499
- placeholder="مثال: سلام، فردا هوا چطور است؟",
500
- lines=5,
501
- value="",
502
- visible=True,
503
- elem_id="text_input_main_alpha_v3" # YOUR ELEM_ID
504
- )
505
 
506
- # Event handler from your original code
507
- use_file_input_cb.change(
508
- fn=lambda x: (gr.update(visible=x, label=" " if x else "متن فارسی برای تبدیل"), gr.update(visible=not x)),
509
- inputs=use_file_input_cb,
510
- outputs=[uploaded_file_input, text_to_speak_tb]
511
- )
 
 
 
 
 
 
 
 
 
 
 
512
 
513
- speech_prompt_tb = gr.Textbox(
514
- label="سبک گفتار (اختیاری)",
515
- placeholder="مثال: با لحنی شاد و پرانرژی",
516
- value="با لحنی دوستانه و رسا صحبت کن.",
517
- lines=2, elem_id="speech_prompt_alpha_v3" # YOUR ELEM_ID
518
- )
519
-
520
- speaker_voice_dd = gr.Dropdown(
521
- SPEAKER_VOICES, label="انتخاب گوینده و لهجه", value="Charon", elem_id="speaker_voice_alpha_v3" # YOUR ELEM_ID
522
- )
523
-
524
- temperature_slider = gr.Slider(
525
- minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="میزان خلاقیت صدا",
526
- elem_id="temperature_slider_alpha_v3" # YOUR ELEM_ID
527
- )
528
- gr.Markdown("<p class='temp_description_class_alpha_v3'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>")
529
 
530
- generate_button = gr.Button(
531
- "🚀 تولید و پخش صدا",
532
- elem_classes=["generate-button-tts"], # Applied class for styling from combined CSS
533
- elem_id="generate_button_alpha_v3" # YOUR ELEM_ID
534
- )
535
-
536
- output_audio = gr.Audio(
537
- label=" ", type="filepath",
538
- elem_id="output_audio_player_alpha_v3", # YOUR ELEM_ID
539
- elem_classes=["output-audio-player-tts"] # Added class for CSS consistency
540
- )
541
-
542
- generate_button.click(
543
- fn=gradio_tts_interface, # YOUR INTERFACE FUNCTION
544
- # Your original inputs, progress removed for now as it was not fully integrated
545
- inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
546
- outputs=[output_audio] # Your interface function returns only audio path
547
- )
548
 
549
- # Your original Examples section
550
- gr.Markdown("<h3 class='section-title-main-alpha' style='margin-top:2.5rem; text-align:center; border-bottom:none;'>نمونه‌های کاربردی</h3>", elem_id="examples_section_title_v3")
551
- gr.Examples(
552
- examples=[
553
- [False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
554
- [False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی آلفا است.", "با صدایی طبیعی و روان.", "Charon", 0.9],
555
- ],
556
- inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
557
- outputs=[output_audio], # Outputting only to audio as per your original
558
  fn=gradio_tts_interface,
559
- cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true" # Added this for optional caching
 
560
  )
561
-
562
- # Footer from your original AlphaTTS (app-footer-final)
563
- gr.Markdown(f"<p class='app-footer-final'>Alpha Language Learning © 2024 - Model: {FIXED_MODEL_NAME}</p>")
564
-
565
 
566
  if __name__ == "__main__":
567
- # Auto-restart logic can be added here if desired, but kept out for strict "no logic change"
568
- # if os.getenv("AUTO_RESTART_ENABLED", "true").lower() == "true":
569
- # import threading, sys
570
- # def auto_restart_service(): # Minimal auto-restart
571
- # time.sleep(24*60*60)
572
- # os._exit(1)
573
- # threading.Thread(target=auto_restart_service, daemon=True).start()
574
 
575
  demo.launch(
576
- server_name="0.0.0.0", # Good for Docker/HF Spaces
577
- server_port=int(os.getenv("PORT", 7860)), # Standard for HF Spaces
578
- debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true", # Optional debug
579
  show_error=True
580
  )
 
1
  import gradio as gr
2
+ # import base64 # Not used in your original core logic
3
  import mimetypes
4
  import os
5
  import re
6
  import struct
7
  import time
8
+ # import zipfile # Not used in your original core logic
9
  from google import genai
10
+ from google.genai import types as genai_types # Aliased to avoid conflict with built-in 'types'
11
 
12
+ import logging # Standard Python logging
 
 
 
 
 
 
 
13
 
14
  try:
15
  from pydub import AudioSegment
16
  PYDUB_AVAILABLE = True
17
  except ImportError:
18
  PYDUB_AVAILABLE = False
19
+ # logging.warning("Pydub is not available. Audio merging will be disabled.") # Initialized later
20
 
21
+ # --- Basic Logging Setup ---
22
+ # Using a simpler logging setup if the AlphaTranslator_Styled one is too complex for "no other changes"
23
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
24
 
25
+ # --- START: Core TTS Logic from YOUR AlphaTTS_Original (UNCHANGED) ---
 
26
  SPEAKER_VOICES = [
27
  "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
28
  "Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
 
30
  "Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
31
  "Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
32
  ]
33
+ FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts" # FROM YOUR ORIGINAL CODE
34
  DEFAULT_MAX_CHUNK_SIZE = 3800
35
  DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
36
  DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
37
 
38
  def _log(message, log_list): # YOUR _log function
39
  log_list.append(message)
40
+ logging.info(f"[AlphaTTS_LOG] {message}") # Standard logging also
 
41
 
42
  def save_binary_file(file_name, data, log_list):
43
  try:
 
63
  param = param.strip()
64
  if param.lower().startswith("rate="):
65
  try: rate = int(param.split("=", 1)[1])
66
+ except ValueError: pass
67
  elif param.startswith("audio/L"):
68
  try: bits = int(param.split("L", 1)[1])
69
+ except ValueError: pass
70
  return {"bits_per_sample": bits, "rate": rate}
71
 
72
  def smart_text_split(text, max_size=3800, log_list=None):
73
  if len(text) <= max_size: return [text]
74
  chunks, current_chunk = [], ""
75
+ sentences = re.split(r'(?<=[.!?؟۔])\s+', text) # Added Persian full stop for robustness
 
76
  for sentence in sentences:
77
  if len(current_chunk) + len(sentence) + 1 > max_size:
78
  if current_chunk: chunks.append(current_chunk.strip())
79
  current_chunk = sentence
80
  while len(current_chunk) > max_size:
81
+ split_idx = -1
82
+ for punc in ['،', ',', ';', ':', ' ']:
83
+ try:
84
+ idx = current_chunk.rindex(punc, max_size // 2, max_size)
85
+ if idx > split_idx:
86
+ split_idx = idx
87
+ except ValueError:
88
+ pass
89
+ if split_idx != -1 :
90
+ part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
91
+ else:
92
+ part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
93
  chunks.append(part.strip())
94
+ else: current_chunk += (" " if current_chunk and sentence else "") + sentence
95
  if current_chunk: chunks.append(current_chunk.strip())
96
  final_chunks = [c for c in chunks if c]
97
  if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
 
99
 
100
  def merge_audio_files_func(file_paths, output_path, log_list):
101
  if not PYDUB_AVAILABLE:
102
+ _log("❌ pydub در دسترس نیست. ادغام انجام نشد.", log_list)
103
  return False
104
  try:
105
  _log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
106
  combined = AudioSegment.empty()
107
  for i, fp in enumerate(file_paths):
108
  if os.path.exists(fp):
109
+ try:
110
  segment = AudioSegment.from_file(fp)
111
  combined += segment
112
  if i < len(file_paths) - 1:
113
  combined += AudioSegment.silent(duration=150)
114
+ except Exception as e_pydub:
115
+ _log(f"⚠️ خطای Pydub در پردازش فایل '{fp}': {e_pydub}. از این فایل صرف نظر می شود.", log_list)
116
  continue
117
+ else:
118
+ _log(f"⚠️ فایل پیدا نشد: {fp}", log_list)
119
+ if len(combined) == 0:
120
+ _log("❌ هیچ قطعه صوتی برای ادغام وجود ندارد.", log_list)
121
  return False
 
122
  combined.export(output_path, format="wav")
123
  _log(f"✅ فایل ادغام شده: {output_path}", log_list)
124
  return True
125
  except Exception as e:
126
+ _log(f"❌ خطا در ادغام: {e}", log_list) # traceback.format_exc() removed to keep it closer to original
127
  return False
128
 
129
+ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list): # YOUR core_generate_audio
130
  output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
131
  max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
132
  _log(f"🚀 شروع فرآیند با مدل: {FIXED_MODEL_NAME}...", log_list)
133
 
134
+ api_key = os.environ.get("GEMINI_API_KEY") # YOUR WAY OF GETTING API KEY
135
  if not api_key:
136
+ _log("❌ کلید API با نام GEMINI_API_KEY در متغیرهای محیطی تنظیم نشده.", log_list)
137
+ return None # Return None only, as per your original AlphaTTS
138
 
139
  try:
140
+ client = genai.Client(api_key=api_key) # YOUR WAY OF CLIENT INSTANTIATION
141
  _log(f"کلاینت Gemini با کلید API برای مدل {FIXED_MODEL_NAME} مقداردهی اولیه شد.", log_list)
142
  except Exception as e:
143
+ _log(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list)
144
  return None
145
 
146
  if not text_input or not text_input.strip():
147
  _log("❌ متن ورودی خالی.", log_list)
148
  return None
 
149
  text_chunks = smart_text_split(text_input, max_chunk, log_list)
150
  if not text_chunks:
151
  _log("❌ متن قابل پردازش نیست.", log_list)
 
153
 
154
  generated_files = []
155
  for i, chunk in enumerate(text_chunks):
156
+ _log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)} (صدا: {selected_voice}, دما: {temperature_val})...", log_list)
157
  final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
158
 
159
+ # Using genai_types (aliased) for Content, Part etc. as in your original imports
160
+ contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text)])]
161
+
162
+ config = genai_types.GenerateContentConfig( # YOUR CONFIG OBJECT
163
  temperature=temperature_val,
164
  response_modalities=["audio"],
165
+ speech_config=genai_types.SpeechConfig(
166
+ voice_config=genai_types.VoiceConfig(
167
+ prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)
168
  )
169
  )
170
  )
171
  _log(f"کانفیگ API برای قطعه {i+1}: دما={temperature_val}, صدا={selected_voice}, مدالیته=['audio']", log_list)
 
172
  fname_base = f"{output_base_name}_part{i+1:03d}"
173
  try:
174
+ # YOUR API CALL
175
  response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
176
 
 
 
 
177
  if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
178
  inline_data = response.candidates[0].content.parts[0].inline_data
179
+ data_buffer = inline_data.data
180
  mime_type = inline_data.mime_type
181
  _log(f"داده صوتی در candidate.part[0].inline_data برای قطعه {i+1} یافت شد.", log_list)
182
+ ext = mimetypes.guess_extension(mime_type) or ".wav"
183
+ if "audio/L" in mime_type and ext == ".wav":
184
+ _log(f"تبدیل صدای خام PCM (MIME: {mime_type}) به WAV برای قطعه {i+1}.", log_list)
185
+ data_buffer = convert_to_wav(data_buffer, mime_type)
186
+ if not ext.startswith("."): ext = "." + ext
187
+ fpath = save_binary_file(f"{fname_base}{ext}", data_buffer, log_list)
188
+ if fpath: generated_files.append(fpath)
189
  else:
190
  _log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی در مسیر مورد انتظار.", log_list)
191
+ _log(f"ساختار کامل پاسخ (اولین 500 کاراکتر): {str(response)[:500]}", log_list)
192
+ # continue # As per your original code, it continues
193
+ except Exception as e: # Catching generic Exception as in your original
194
+ _log(f"❌ خطا در تولید قطعه {i+1}: {e}", log_list)
195
+ # traceback.format_exc() was not in your original core_generate_audio, so removed here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  continue
 
 
 
 
 
197
  if i < len(text_chunks) - 1 and len(text_chunks) > 1:
198
  _log(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از قطعه بعدی...", log_list)
199
  time.sleep(sleep_time)
200
 
201
  if not generated_files:
202
  _log("❌ هیچ فایلی تولید نشد.", log_list)
203
+ return None # Return None only as per your original AlphaTTS
204
+
205
  _log(f"🎉 {len(generated_files)} فایل(های) صوتی تولی�� شد.", log_list)
 
206
  final_audio_file = None
207
  final_output_path_base = f"{output_base_name}_final"
208
 
 
211
  merged_fn = f"{final_output_path_base}.wav"
212
  if os.path.exists(merged_fn):
213
  try: os.remove(merged_fn)
214
+ except OSError: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}' (خطای سیستم عامل)", log_list)
215
+ except Exception as e_rm: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e_rm}", log_list)
216
 
217
  if merge_audio_files_func(generated_files, merged_fn, log_list):
218
  final_audio_file = merged_fn
219
+ for fp_path in generated_files:
220
  if os.path.abspath(fp_path) != os.path.abspath(merged_fn):
221
  try: os.remove(fp_path)
222
+ except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}' (خطای سیستم عامل)", log_list)
223
+ except Exception as e_del: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}': {e_del}", log_list)
224
+ else:
225
  _log("⚠️ ادغام فایل‌های صوتی ناموفق بود. اولین قطعه ارائه می‌شود.", log_list)
226
  if generated_files:
227
  try:
 
228
  first_chunk_path = generated_files[0]
229
+ target_ext = os.path.splitext(first_chunk_path)[1]
230
+ fallback_fn = f"{final_output_path_base}{target_ext}" # Simplified name for fallback
231
+ if os.path.exists(fallback_fn) and os.path.abspath(first_chunk_path) != os.path.abspath(fallback_fn):
232
+ os.remove(fallback_fn)
233
+ if os.path.abspath(first_chunk_path) != os.path.abspath(fallback_fn):
234
+ os.rename(first_chunk_path, fallback_fn)
235
+ final_audio_file = fallback_fn
236
+ for i_gf in range(1, len(generated_files)):
237
+ try: os.remove(generated_files[i_gf])
238
+ except: pass # Keep silent as per your original
239
+ except Exception as e_rename_fb:
240
+ _log(f"خطا در تغییر نام فایل اولین قطعه: {e_rename_fb}", log_list) # Was `e_rename` in your original
241
  final_audio_file = generated_files[0]
 
 
 
 
 
 
242
  else:
243
+ _log("⚠️ pydub نیست. اولین قطعه ارائه می‌شود.", log_list)
244
  if generated_files:
245
  try:
 
246
  first_chunk_path = generated_files[0]
247
+ target_ext = os.path.splitext(first_chunk_path)[1]
248
+ single_fallback_fn = f"{final_output_path_base}{target_ext}" # Simplified name
249
+ if os.path.exists(single_fallback_fn) and os.path.abspath(first_chunk_path) != os.path.abspath(single_fallback_fn):
250
+ os.remove(single_fallback_fn)
251
+ if os.path.abspath(first_chunk_path) != os.path.abspath(single_fallback_fn):
252
+ os.rename(first_chunk_path, single_fallback_fn)
253
+ final_audio_file = single_fallback_fn
254
  for i_gf in range(1, len(generated_files)):
255
  try: os.remove(generated_files[i_gf])
256
+ except: pass # Keep silent
257
+ except Exception as e_rename_single_npd: # Was `e_rename_single` in your original
258
+ _log(f"خطا در تغییر نام فایل اولین قطعه (بدون pydub): {e_rename_single_npd}", log_list)
259
  final_audio_file = generated_files[0]
260
  elif len(generated_files) == 1:
261
  try:
 
262
  single_file_path = generated_files[0]
263
  target_ext = os.path.splitext(single_file_path)[1]
264
  final_single_fn = f"{final_output_path_base}{target_ext}"
 
267
  if os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
268
  os.rename(single_file_path, final_single_fn)
269
  final_audio_file = final_single_fn
270
+ except Exception as e_rename_sgl_final: # Was `e_rename_single_final` in your original
271
+ _log(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_sgl_final}", log_list)
272
  final_audio_file = generated_files[0]
273
 
274
  if final_audio_file and not os.path.exists(final_audio_file):
275
  _log(f"⚠️ فایل نهایی '{final_audio_file}' وجود ندارد!", log_list)
276
  return None
277
 
278
+ return final_audio_file # Returns only path, as per your original AlphaTTS
279
 
280
+ # Your original gradio_tts_interface
281
+ def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature, progress=gr.Progress(track_tqdm=True)):
282
  logs = []
283
  actual_text = ""
284
  if use_file_input:
285
+ if uploaded_file:
286
  try:
287
+ # In Gradio, uploaded_file.name is the path to the temporary file
288
  with open(uploaded_file.name, 'r', encoding='utf-8') as f: actual_text = f.read().strip()
289
+ if not actual_text: return None # Return None only, as per your original
290
  except Exception as e: _log(f"❌ خطا خواندن فایل: {e}", logs); return None
291
+ else: return None
292
  else:
293
  actual_text = text_to_speak
294
+ if not actual_text or not actual_text.strip(): return None
295
 
296
  final_path = core_generate_audio(actual_text, speech_prompt, speaker_voice, temperature, logs)
297
+ # Your original code commented out printing logs here, so I'll keep it commented.
298
+ # for log_entry in logs: print(log_entry) # For debugging in HF console
299
+ return final_path # Returns only path, as per your original AlphaTTS
300
+ # --- END: Core TTS Logic from YOUR AlphaTTS_Original (UNCHANGED) ---
 
 
 
301
 
302
 
303
  # --- START: Gradio UI with AlphaTranslator_Styled Appearance ---
304
+ # (Using CSS variables from AlphaTranslator_Styled for colors and fonts)
305
  FLY_PRIMARY_COLOR_HEX = "#4F46E5"
306
  FLY_SECONDARY_COLOR_HEX = "#10B981"
307
  FLY_ACCENT_COLOR_HEX = "#D97706"
 
313
  FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
314
  FLY_PANEL_BG_SIMPLE = "#E0F2FE"
315
 
316
+ app_theme_outer_styled = gr.themes.Base( # New theme object name to avoid conflict if you had `app_theme_outer`
317
  font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
318
  ).set(
319
  body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
320
  )
321
 
322
+ # CSS from AlphaTranslator_Styled, adapted slightly for your component names/IDs if needed
323
+ # Your original component IDs are like "use_file_cb_alpha_v3", "file_uploader_alpha_main_v3", etc.
324
+ # The CSS below uses general selectors but can be made more specific if those IDs are kept.
325
+ applied_css_for_alphatts = f"""
326
  @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
327
+ /* Poppins and Inter are from AlphaTranslator_Styled, Vazirmatn from your AlphaTTS_Original theme */
328
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
329
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
330
  :root {{
 
333
  --fly-text-secondary: {FLY_SUBTLE_TEXT_HEX}; --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
334
  --fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
335
  --fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
336
+ --font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif; /* Vazirmatn prioritized */
337
  --font-english: 'Poppins', 'Inter', system-ui, sans-serif;
338
  --radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
 
 
339
  --shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
340
  --fly-primary-rgb: 79,70,229; --fly-accent-rgb: 217,119,6;
341
+
342
+ /* Variables from your original AlphaTTS CSS if they were different and needed */
343
+ /* For example, if your original had --app-button-bg for the blue button */
344
+ --app-button-bg-original: #2979FF; /* Blue from your original AlphaTTS button */
345
+ --shadow-button-original: 0 4px 10px -2px rgba(41,121,255,0.5);
346
+ --radius-input-original: 8px;
347
+ --app-border-color-original: #E0E0E0;
348
+ --app-input-bg-original: #F7F7F7;
349
+ }}
350
+ body {{
351
+ font-family:var(--font-global); direction:rtl; background-color:var(--fly-bg-light);
352
+ color:var(--fly-text-primary); line-height:1.7; font-size:16px;
353
+ }}
354
+ .gradio-container {{ /* Overall container styling from AlphaTranslator_Styled */
355
+ max-width:100% !important; width:100% !important; min-height:100vh;
356
+ margin:0 auto !important; padding:0 !important; border-radius:0 !important;
357
+ box-shadow:none !important; background:linear-gradient(170deg, #E0F2FE 0%, #F3E8FF 100%);
358
+ display:flex; flex-direction:column;
359
+ }}
360
+ /* Header styling from AlphaTranslator_Styled */
361
+ .app-header-alphatts {{ /* Changed class name slightly to avoid conflict if both apps run */
362
+ text-align:center; padding:2.5rem 1rem; margin:0;
363
+ background:linear-gradient(135deg,var(--fly-primary) 0%,var(--fly-secondary) 100%);
364
+ color:var(--fly-bg-white); border-bottom-left-radius:var(--radius-xl);
365
+ border-bottom-right-radius:var(--radius-xl); box-shadow:var(--shadow-lg);
366
+ position:relative; overflow:hidden;
367
  }}
368
+ .app-header-alphatts::before {{ /* Decorative element from AlphaTranslator_Styled */
369
+ content:''; position:absolute; top:-50px; right:-50px; width:150px; height:150px;
370
+ background:rgba(255,255,255,0.1); border-radius:var(--radius-full);
371
+ opacity:0.5; transform:rotate(45deg);
 
 
 
 
 
372
  }}
373
+ .app-header-alphatts h1 {{ /* h1 from AlphaTranslator_Styled */
374
+ font-size:2.25em !important; font-weight:800 !important; margin:0 0 0.5rem 0;
375
+ font-family:var(--font-english); letter-spacing:-0.5px; text-shadow:0 2px 4px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
376
  }}
377
+ .app-header-alphatts p {{ /* p from AlphaTranslator_Styled */
378
+ font-size:1em !important; margin-top:0.25rem; font-weight:400;
379
+ color:rgba(255,255,255,0.85) !important;
 
380
  }}
381
 
382
+ /* Main content panel styling from AlphaTranslator_Styled */
383
+ .main-content-area-alphatts {{ /* Changed class name slightly */
384
+ flex-grow:1; padding:0.75rem; width:100%; margin:0 auto; box-sizing:border-box;
 
 
 
 
 
385
  }}
386
+ .content-panel-alphatts {{ /* Changed class name slightly */
387
+ background-color:var(--fly-bg-white); padding:1rem; border-radius:var(--radius-xl);
388
+ box-shadow:var(--shadow-xl); margin-top:-2rem; position:relative; z-index:10;
389
+ margin-bottom:2rem; width:100%; box-sizing:border-box;
390
  }}
391
 
392
+ /* Styling for YOUR UI elements, applying AlphaTranslator_Styled aesthetics */
393
+ /* Inputs (Textbox, Dropdown, File) */
394
+ .content-panel-alphatts .gr-input > label + div > textarea,
395
+ .content-panel-alphatts .gr-dropdown > label + div > div > input,
396
+ .content-panel-alphatts .gr-dropdown > label + div > div > select,
397
+ .content-panel-alphatts .gr-textbox > label + div > textarea,
398
+ .content-panel-alphatts .gr-file > label + div /* For file input styling */
399
+ {{
400
+ border-radius:var(--radius-input-original) !important; /* Your original radius */
401
+ border:1.5px solid var(--fly-border-color) !important; /* Border from AlphaTranslator */
402
+ font-size:0.95em !important; background-color:var(--fly-input-bg-simple) !important; /* BG from AlphaTranslator */
403
+ padding:10px 12px !important; color:var(--fly-text-primary) !important;
404
  }}
405
+ .content-panel-alphatts .gr-input > label + div > textarea:focus,
406
+ .content-panel-alphatts .gr-dropdown > label + div > div > input:focus,
407
+ .content-panel-alphatts .gr-dropdown > label + div > div > select:focus,
408
+ .content-panel-alphatts .gr-textbox > label + div > textarea:focus,
409
+ .content-panel-alphatts .gr-file > label + div:focus-within
410
+ {{
411
+ border-color:var(--fly-primary) !important; /* Focus border from AlphaTranslator */
412
+ box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;
413
+ background-color:var(--fly-bg-white) !important;
414
+ }}
415
+ .content-panel-alphatts .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
416
+
417
+ /* Button: Using --fly-accent for consistency with AlphaTranslator's primary action color */
418
+ .content-panel-alphatts .gr-button[elem_id="generate_button_alpha_v3"], /* Your button ID */
419
+ .content-panel-alphatts button[variant="primary"] /* General primary button */
420
+ {{
421
+ background:var(--fly-accent) !important; /* Orange accent from AlphaTranslator */
422
+ margin-top:1.5rem !important; padding:12px 20px !important; /* Adjusted padding */
423
+ transition:all 0.25s ease-in-out !important; color:white !important; font-weight:600 !important;
424
+ border-radius:var(--radius-input-original) !important; /* Your original radius */ border:none !important;
425
+ box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;
426
+ width:100% !important; font-size:1.05em !important; /* Your original font size */
427
+ display:flex; align-items:center; justify-content:center;
428
  }}
429
+ .content-panel-alphatts .gr-button[elem_id="generate_button_alpha_v3"]:hover,
430
+ .content-panel-alphatts button[variant="primary"]:hover
431
+ {{
432
+ background:#B45309 !important; /* Darker orange */ transform:translateY(-1px) !important;
433
+ box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;
434
+ }}
435
+
436
+ /* Labels (using AlphaTranslator_Styled general label style) */
437
+ .content-panel-alphatts label > span.label-text
438
+ {{
439
+ font-weight:500 !important; color:#4B5563 !important;
440
+ font-size:0.88em !important; margin-bottom:6px !important; display:inline-block;
441
+ }}
442
+ /* Your original specific label styling with icons (if you want to keep them) */
443
+ /* You would need to ensure your Gradio labels have the correct `for` attribute linking to input `elem_id`
444
+ or use JavaScript to add these pseudo-elements if Gradio doesn't directly support `for` on labels.
445
+ For simplicity, I'm omitting the ::before icon styles unless you confirm they are essential
446
+ and your Gradio setup can support them easily. The general label style above will apply.
447
+ */
448
+
449
+ /* Temperature description (from your original AlphaTTS CSS) */
450
+ .content-panel-alphatts .temp_description_class_alpha_v3 {{
451
+ font-size: 0.85em; color: #777; margin-top: -0.4rem; margin-bottom: 1rem;
452
+ }}
453
+
454
+ /* Audio Player (general styling, can be targeted by ID if set) */
455
+ .content-panel-alphatts .gr-audio audio, /* General audio player */
456
+ .content-panel-alphatts #output_audio_player_alpha_v3 audio /* Your specific ID */
457
  {{
458
+ width: 100%; border-radius: var(--radius-input-original); margin-top:0.8rem;
 
459
  }}
460
+
461
+ /* Examples (using AlphaTranslator_Styled examples button style) */
462
+ .content-panel-alphatts div[label*="نمونه‌های کاربردی"] .gr-button.gr-button-tool, /* Targetting by label */
463
+ .content-panel-alphatts div[label*="نمونه‌های کاربردی"] .gr-sample-button
464
+ {{
465
+ background-color:#E0E7FF !important; color:var(--fly-primary) !important;
466
+ border-radius:6px !important; font-size:0.78em !important; padding:4px 8px !important;
 
 
 
 
 
 
 
467
  }}
468
+ .content-panel-alphatts .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
469
+
470
+ /* Footer styling from AlphaTranslator_Styled */
471
+ .app-footer-alphatts {{ /* Changed class name slightly */
472
+ text-align:center;font-size:0.85em;color:var(--fly-text-secondary);margin-top:2.5rem;
473
+ padding:1rem 0;background-color:rgba(255,255,255,0.3);backdrop-filter:blur(5px);
474
+ border-top:1px solid var(--fly-border-color);
475
+ }}
476
+ footer {{display:none !important;}} /* Hides default Gradio footer */
477
+
478
 
479
+ /* Responsive adjustments from AlphaTranslator_Styled */
480
+ @media (min-width:640px) {{
481
+ .main-content-area-alphatts {{padding:1.5rem;max-width:700px;}}
482
+ .content-panel-alphatts {{padding:1.5rem;}}
483
+ .app-header-alphatts h1 {{font-size:2.5em !important;}}
484
+ .app-header-alphatts p {{font-size:1.05em !important;}}
485
+ }}
486
+ @media (min-width:768px) {{
487
+ .main-content-area-alphatts {{max-width:780px;}}
488
+ .content-panel-alphatts {{padding:2rem;}}
489
+ .content-panel-alphatts .gr-button[elem_id="generate_button_alpha_v3"],
490
+ .content-panel-alphatts button[variant="primary"]
491
+ {{
492
+ width:auto !important; align-self:flex-start;
493
+ }}
494
+ .app-header-alphatts h1 {{font-size:2.75em !important;}}
495
+ .app-header-alphatts p {{font-size:1.1em !important;}}
496
+ }}
497
  """
498
 
499
+ # Using your original Gradio Blocks structure
500
+ # The theme `gr.themes.Base(font=[gr.themes.GoogleFont("Vazirmatn")])` is from your original.
501
+ # We are applying `app_theme_outer_styled` for the body background and `applied_css_for_alphatts` for specifics.
502
+ with gr.Blocks(theme=app_theme_outer_styled, css=applied_css_for_alphatts, title=f"آلفا TTS ({FIXED_MODEL_NAME.split('-')[1]})") as demo:
503
+ # Applying the header from AlphaTranslator_Styled structure
504
+ gr.HTML(f"""
505
+ <div class='app-header-alphatts'>
506
+ <h1>🚀 Alpha TTS</h1>
507
+ <p>جادوی تبدیل متن به صدا در دستان شما (Gemini {FIXED_MODEL_NAME.split('-')[1]})</p>
508
+ </div>
509
+ """)
510
+
511
+ # Applying the main content panel structure from AlphaTranslator_Styled
512
+ with gr.Column(elem_classes=["main-content-area-alphatts"]):
513
+ with gr.Column(elem_classes=["content-panel-alphatts"]): # Your original AlphaTTS used Column, let's keep it simple
514
+ # AlphaTranslator used Group, but Column is fine.
515
+
516
+ # Your original UI layout from AlphaTTS_Original
517
+ # Note: `elem_id`s are from your original AlphaTTS code.
518
+ # CSS selectors have been updated to try and match these or use general selectors.
519
+
520
+ # Warning if GEMINI_API_KEY is not set
521
+ if not os.environ.get("GEMINI_API_KEY"):
522
+ missing_key_msg = (
523
+ "⚠️ هشدار: متغیر محیطی GEMINI_API_KEY تنظیم نشده است. "
524
+ "قابلیت تبدیل متن به گفتار احتمالاً کار نخواهد کرد. "
525
+ "لطفاً این متغیر را در بخش Secrets این Space تنظیم کنید."
526
+ )
527
+ gr.Markdown(f"<div class='api-warning-message'>{missing_key_msg}</div>")
528
 
529
+ use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False, elem_id="use_file_cb_alpha_v3")
530
+
531
+ uploaded_file_input = gr.File(
532
+ label=" ",
533
+ file_types=['.txt'],
534
+ visible=False,
535
+ elem_id="file_uploader_alpha_main_v3"
536
+ )
537
+
538
+ text_to_speak_tb = gr.Textbox(
539
+ label="متن فارسی برای تبدیل",
540
+ placeholder="مثال: سلام، فردا هوا چطور است؟",
541
+ lines=5,
542
+ value="",
543
+ visible=True,
544
+ elem_id="text_input_main_alpha_v3"
545
+ )
546
+
547
+ # Your original change function for checkbox
548
+ use_file_input_cb.change(
549
+ fn=lambda x: (gr.update(visible=x, label=" " if x else "متن فارسی برای تبدیل"), gr.update(visible=not x)),
550
+ inputs=use_file_input_cb,
551
+ outputs=[uploaded_file_input, text_to_speak_tb]
552
+ )
553
 
554
+ speech_prompt_tb = gr.Textbox(
555
+ label="سبک گفتار (اختیاری)",
556
+ placeholder="مثال: با لحنی شاد و پرانرژی",
557
+ value="با لحنی دوستانه و رسا صحبت کن.",
558
+ lines=2, elem_id="speech_prompt_alpha_v3"
559
+ )
560
+
561
+ speaker_voice_dd = gr.Dropdown(
562
+ SPEAKER_VOICES, label="انتخاب گوینده و لهجه", value="Charon", elem_id="speaker_voice_alpha_v3"
563
+ )
564
+
565
+ temperature_slider = gr.Slider(
566
+ minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="میزان خلاقیت صدا",
567
+ elem_id="temperature_slider_alpha_v3"
568
+ )
569
+ gr.Markdown("<p class='temp_description_class_alpha_v3'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایین‌تر = یکنواختی بیشتر.</p>")
570
 
571
+ generate_button = gr.Button("🚀 تولید و پخش صدا", elem_id="generate_button_alpha_v3") # Removed variant="primary" to let CSS handle it via elem_id
572
+
573
+ output_audio = gr.Audio(label=" ", type="filepath", elem_id="output_audio_player_alpha_v3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
+ # Your original Examples section
576
+ # Applying a custom HR from AlphaTranslator_Styled
577
+ gr.HTML("<hr class='custom-hr'>")
578
+ gr.Markdown(
579
+ "<h3 style='text-align:center; font-weight:500; color:var(--fly-text-secondary); margin-top:1.5rem; margin-bottom:1rem;'>نمونه‌های کاربردی</h3>",
580
+ # elem_id="examples_section_title_v3" # elem_id from your original
581
+ )
582
+ gr.Examples(
583
+ examples=[
584
+ [False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
585
+ [False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی آلفا است.", "با صدایی طبیعی و روان.", "Charon", 0.9],
586
+ ],
587
+ inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
588
+ outputs=[output_audio], # Original AlphaTTS only output to audio
589
+ fn=gradio_tts_interface,
590
+ cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true" # From AlphaTranslator
591
+ )
592
 
593
+ # Footer from AlphaTranslator_Styled
594
+ gr.Markdown(f"<p class='app-footer-alphatts'>Alpha TTS © 2024 - Model: {FIXED_MODEL_NAME}</p>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
 
597
+ # --- Event Handlers (from YOUR AlphaTTS_Original) ---
598
+ if generate_button is not None:
599
+ generate_button.click(
 
 
 
 
 
 
600
  fn=gradio_tts_interface,
601
+ inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
602
+ outputs=[output_audio] # Original AlphaTTS only output to audio
603
  )
604
+ else:
605
+ logging.error("دکمه تولید صدا (generate_button_alpha_v3) به درستی مقداردهی اولیه نشده است.")
606
+ # --- END: Gradio UI ---
 
607
 
608
  if __name__ == "__main__":
609
+ # Removed auto-restart thread to keep it closer to your original AlphaTTS.
610
+ # If you need it, you can re-add the auto_restart_service function and thread start.
611
+
612
+ # Check if PYDUB is available at launch
613
+ if not PYDUB_AVAILABLE:
614
+ logging.warning("Pydub (for audio merging) not found. Please install with `pip install pydub`. Merging will be disabled if multiple audio chunks are generated.")
 
615
 
616
  demo.launch(
617
+ server_name="0.0.0.0",
618
+ server_port=int(os.getenv("PORT", 7860)),
619
+ debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true",
620
  show_error=True
621
  )