Hamed744 commited on
Commit
b914b1f
·
verified ·
1 Parent(s): 6381681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -154
app.py CHANGED
@@ -31,7 +31,7 @@ OUTPUT_DIR = "generated_audio"
31
  if not os.path.exists(OUTPUT_DIR):
32
  os.makedirs(OUTPUT_DIR)
33
 
34
- # --- Helper functions (unchanged from previous correct version) ---
35
  def log_message(msg, current_logs):
36
  print(msg)
37
  return f"{current_logs}\n{msg}".strip()
@@ -39,8 +39,7 @@ def log_message(msg, current_logs):
39
  def save_binary_file(file_name, data, log_func, current_logs):
40
  full_path = os.path.join(OUTPUT_DIR, file_name)
41
  try:
42
- with open(full_path, "wb") as f:
43
- f.write(data)
44
  current_logs = log_func(f"✅ فایل در مسیر زیر ذخیره شد: {full_path}", current_logs)
45
  return full_path, current_logs
46
  except Exception as e:
@@ -49,10 +48,8 @@ def save_binary_file(file_name, data, log_func, current_logs):
49
 
50
  def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
51
  parameters = parse_audio_mime_type(mime_type)
52
- bits_per_sample = parameters["bits_per_sample"]
53
- sample_rate = parameters["rate"]
54
- num_channels = 1
55
- data_size = len(audio_data)
56
  bytes_per_sample = bits_per_sample // 8
57
  block_align = num_channels * bytes_per_sample
58
  byte_rate = sample_rate * block_align
@@ -61,9 +58,8 @@ def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
61
  return header + audio_data
62
 
63
  def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
64
- bits_per_sample = 16; rate = 24000
65
- parts = mime_type.split(";")
66
- for param in parts:
67
  param = param.strip()
68
  if param.lower().startswith("rate="):
69
  try: rate = int(param.split("=", 1)[1])
@@ -74,29 +70,24 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
74
  return {"bits_per_sample": bits_per_sample, "rate": rate}
75
 
76
  def load_text_from_file(file_obj, log_func, current_logs):
77
- if file_obj is None:
78
- current_logs = log_func("❌ هیچ فایلی آپلود نشد.", current_logs)
79
- return "", current_logs
80
  file_path = file_obj.name
81
- current_logs = log_func(f"✅ فایل '{os.path.basename(file_path)}' با موفقیت دریافت شد.", current_logs)
82
  try:
83
  with open(file_path, 'r', encoding='utf-8') as f: content = f.read().strip()
84
- current_logs = log_func(f"📖 متن بارگذاری شده: {len(content)} کاراکتر", current_logs)
85
- current_logs = log_func(f"📝 نمونه متن: '{content[:100]}{'...' if len(content) > 100 else ''}'", current_logs)
86
  return content, current_logs
87
- except Exception as e:
88
- current_logs = log_func(f"❌ خطا در خواندن فایل: {e}", current_logs)
89
- return "", current_logs
90
 
91
  def smart_text_split(text, max_size=3800):
92
  if len(text) <= max_size: return [text]
93
- chunks = []; current_chunk = ""
94
  sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
95
  for sentence in sentences:
96
  if len(current_chunk) + len(sentence) + 1 > max_size:
97
  if current_chunk: chunks.append(current_chunk.strip())
98
  if len(sentence) > max_size:
99
- words = sentence.split(); temp_word_chunk = ""
100
  for word in words:
101
  if len(temp_word_chunk) + len(word) + 1 > max_size:
102
  if temp_word_chunk: chunks.append(temp_word_chunk.strip())
@@ -113,33 +104,26 @@ def smart_text_split(text, max_size=3800):
113
  return [c for c in chunks if c]
114
 
115
  def merge_audio_files_func(file_paths, output_filename, log_func, current_logs):
116
- if not PYDUB_AVAILABLE:
117
- current_logs = log_func("❌ pydub در دسترس نیست. نمی‌توان فایل‌ها را ادغام کرد.", current_logs)
118
- return None, current_logs
119
  output_path = os.path.join(OUTPUT_DIR, output_filename)
120
  try:
121
- current_logs = log_func(f"🔗 در حال ادغام {len(file_paths)} فایل صوتی...", current_logs)
122
  combined = AudioSegment.empty()
123
  for i, file_path in enumerate(file_paths):
124
  if os.path.exists(file_path):
125
- current_logs = log_func(f"📎 اضافه کردن فایل {i+1}: {file_path}", current_logs)
126
  try:
127
  audio = AudioSegment.from_file(file_path)
128
  combined += audio
129
  if i < len(file_paths) - 1: combined += AudioSegment.silent(duration=500)
130
  except Exception as e_pydub:
131
- current_logs = log_func(f"⚠️ خطا در خواندن فایل صوتی {file_path} با pydub: {e_pydub}. از این فایل صرف نظر شد.", current_logs)
132
  continue
133
- else: current_logs = log_func(f"⚠️ فایل پیدا نشد: {file_path}", current_logs)
134
- if not combined:
135
- current_logs = log_func("❌ هیچ فایل صوتی معتبری برای ادغام یافت نشد.", current_logs)
136
- return None, current_logs
137
  combined.export(output_path, format="wav")
138
- current_logs = log_func(f"✅ فایل ادغام شده ذخیره شد: {output_path}", current_logs)
139
- return output_path, current_logs
140
- except Exception as e:
141
- current_logs = log_func(f"❌ خطا در ادغام فایل‌ها: {e}", current_logs)
142
- return None, current_logs
143
 
144
  def create_zip_file(file_paths, zip_name_base, log_func, current_logs):
145
  zip_filename = os.path.join(OUTPUT_DIR, f"{zip_name_base}.zip")
@@ -147,89 +131,64 @@ def create_zip_file(file_paths, zip_name_base, log_func, current_logs):
147
  with zipfile.ZipFile(zip_filename, 'w') as zipf:
148
  for file_path in file_paths:
149
  if os.path.exists(file_path): zipf.write(file_path, os.path.basename(file_path))
150
- current_logs = log_func(f"📦 فایل ZIP ایجاد شد: {zip_filename}", current_logs)
151
- return zip_filename, current_logs
152
- except Exception as e:
153
- current_logs = log_func(f"❌ خطا در ایجاد فایل ZIP: {e}", current_logs)
154
- return None, current_logs
155
 
156
- # --- Main generation function for Gradio ---
157
  def generate_audio_from_text_gradio(
158
- api_key_hf_secret,
159
- input_method,
160
- text_to_speak_ui,
161
- uploaded_file_ui,
162
- speech_prompt_ui,
163
- model_name_ui,
164
- speaker_voice_ui,
165
- temperature_ui,
166
- max_chunk_size_ui,
167
- sleep_between_requests_ui,
168
- output_filename_base_ui,
169
- merge_audio_files_ui,
170
- delete_partial_files_ui
171
  ):
172
  logs = "⏳ شروع فرآیند..."
173
-
174
  if not api_key_hf_secret:
175
- logs = log_message("❌ کلید API جمینای (GEMINI_API_KEY) در Secrets این اسپیس تنظیم نشده است. لطفاً آن را اضافه کنید.", logs)
176
- return logs, None, None, gr.update(visible=False)
177
 
178
- # Set API key in environment for genai.Client() to pick up
179
  os.environ["GEMINI_API_KEY"] = api_key_hf_secret
180
- logs = log_message("🔑 کلید API از Hugging Face Secrets بارگذاری و در متغیر محیطی تنظیم شد.", logs)
181
 
182
  client = None
183
  try:
184
- logs = log_message("🛠️ در حال ایجاد کلاینت جمینای با `genai.Client()`...", logs)
185
- client = genai.Client(api_key=api_key_hf_secret) # Explicitly pass API key
186
- logs = log_message("✅ کلاینت جمینای با موفقیت ایجاد شد.", logs)
187
  except Exception as e:
188
- logs = log_message(f"❌ خطا در ایجاد کلاینت جمینای: {type(e).__name__} - {e}", logs)
189
- logs = log_message(" ممکن است نیاز به تنظیم نسخه کتابخانه `google-genai` در `requirements.txt` باشد.", logs)
190
- return logs, None, None, gr.update(visible=False)
191
 
192
  text_input_content = ""
193
  if input_method == "آپلود فایل":
194
- if uploaded_file_ui is None:
195
- logs = log_message("❌ حالت آپلود فایل انتخاب شده اما فایلی آپلود نشده است.", logs)
196
- return logs, None, None, gr.update(visible=False)
197
  text_input_content, logs = load_text_from_file(uploaded_file_ui, log_message, logs)
198
- if not text_input_content:
199
- return logs, None, None, gr.update(visible=False)
200
  else:
201
  text_input_content = text_to_speak_ui
202
 
203
- if not text_input_content or text_input_content.strip() == "":
204
- logs = log_message("❌ خطا: متن ورودی برای تبدیل به گفتار خالی است.", logs)
205
- return logs, None, None, gr.update(visible=False)
206
 
207
  text_chunks = smart_text_split(text_input_content, max_chunk_size_ui)
208
  logs = log_message(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.", logs)
209
- for i, chunk in enumerate(text_chunks):
210
- logs = log_message(f"📝 قطعه {i+1}: {len(chunk)} کاراکتر", logs)
211
 
212
  generated_files = []
213
- for i, chunk in enumerate(text_chunks):
214
- logs = log_message(f"\n🔊 تولید صدا برای قطعه {i+1}/{len(text_chunks)}...", logs)
215
-
216
- final_text_for_api = f'"{speech_prompt_ui}"\n{chunk}' if speech_prompt_ui and speech_prompt_ui.strip() else chunk
217
 
218
- # Using the structure from Colab for `client.models.generate_content_stream`
 
 
 
219
  api_contents = [
220
- genai_types.Content( # Use the aliased import
221
  role="user",
222
- parts=[
223
- genai_types.Part.from_text(text=final_text_for_api),
224
- ],
225
  ),
226
  ]
227
 
228
  genai_speech_config = genai_types.SpeechConfig(
229
  voice_config=genai_types.VoiceConfig(
230
- prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(
231
- voice_name=speaker_voice_ui
232
- )
233
  )
234
  )
235
 
@@ -241,59 +200,41 @@ def generate_audio_from_text_gradio(
241
 
242
  try:
243
  if not hasattr(client, 'models') or not hasattr(client.models, 'generate_content_stream'): # type: ignore
244
- logs = log_message(f"❌ کلاینت (`{type(client)}`) متد `models.generate_content_stream` را ندارد.", logs)
245
- logs = log_message(" این روش برای مدل‌های preview-tts در Colab استفاده شده بود. ممکن است نسخه کتابخانه متفاوت باشد.", logs)
246
- # As a last resort, try to see if the client itself has generate_content (unlikely for this path)
247
- if hasattr(client, 'generate_content_stream'):
248
- logs = log_message(" تلاش برای استفاده از `client.generate_content_stream`...", logs)
249
- # This would need a different config structure, might fail.
250
- # This part is highly speculative.
251
- stream_iterator = client.generate_content_stream( # type: ignore
252
- model=model_name_ui,
253
- contents=api_contents,
254
- generation_config=stream_generation_config # Older API might use 'config'
255
- )
256
- else:
257
- logs = log_message(" هیچ روش شناخته شده‌ای برای تولید محتوای استریم با این کلاینت یافت نشد.", logs)
258
- continue # Skip to next chunk
259
- else:
260
- # This is the path that matches the Colab notebook structure
261
- stream_iterator = client.models.generate_content_stream( # type: ignore
262
- model=model_name_ui,
263
- contents=api_contents,
264
- config=stream_generation_config, # `config` was used in Colab's `generate_content_stream`
265
- )
266
 
267
  chunk_filename_base = f"{output_filename_base_ui}_part_{i+1:03d}"
268
- audio_data_buffer = b""
269
- mime_type_from_api = "audio/wav"
270
 
271
  for chunk_response in stream_iterator:
272
- if (
273
- chunk_response.candidates
274
- and chunk_response.candidates[0].content
275
- and chunk_response.candidates[0].content.parts
276
- and chunk_response.candidates[0].content.parts[0].inline_data
277
- ):
278
  inline_data = chunk_response.candidates[0].content.parts[0].inline_data
279
  audio_data_buffer += inline_data.data
280
  mime_type_from_api = inline_data.mime_type
281
  elif chunk_response.text:
 
282
  if "error" in chunk_response.text.lower() or "failed" in chunk_response.text.lower():
283
- logs = log_message(f"❌ خطای API در قطعه {i+1}: {chunk_response.text}", logs)
284
  else:
285
- logs = log_message(f"ℹ️ پیام متنی از API: {chunk_response.text}", logs)
 
286
 
287
  if audio_data_buffer:
288
  file_extension = mimetypes.guess_extension(mime_type_from_api)
289
  final_audio_data = audio_data_buffer
290
  if file_extension is None or file_extension.lower() not in ['.wav', '.mp3', '.ogg', '.aac']:
291
  if "audio/L" in mime_type_from_api or "audio/raw" in mime_type_from_api:
292
- logs = log_message(f"ℹ️ API MimeType: {mime_type_from_api}. تبدیل به WAV...", logs)
293
  final_audio_data = convert_to_wav(audio_data_buffer, mime_type_from_api)
294
  file_extension = ".wav"
295
  else:
296
- logs = log_message(f"ℹ️ MimeType ناشناخته: {mime_type_from_api}. ذخیره با پسوند .bin.", logs)
297
  file_extension = ".bin"
298
  if mime_type_from_api == "audio/wav" and (file_extension != ".wav" and file_extension != ".wave"): file_extension = ".wav"
299
  elif mime_type_from_api == "audio/mpeg" and file_extension != ".mp3": file_extension = ".mp3"
@@ -305,17 +246,17 @@ def generate_audio_from_text_gradio(
305
  generated_files.append(saved_file_path)
306
  logs = log_message(f"✅ قطعه {i+1} تولید شد: {saved_file_path}", logs)
307
  else:
308
- if not f"❌ خطای API در قطعه {i+1}" in logs:
309
- logs = log_message(f"❌ قطعه {i+1} بدون داده صوتی بازگردانده شد.", logs)
310
 
311
  except Exception as e:
312
- error_msg = f"❌ خطا در تولید قطعه {i+1}: {type(e).__name__} - {e}"
313
- # logs = log_message(f"Traceback: {traceback.format_exc()}", logs) # Uncomment for detailed debug
314
- if "API_KEY_INVALID" in str(e) or "API key not valid" in str(e): error_msg += "\n🔑 کلید API نامعتبر است."
315
- elif "permission" in str(e).lower() or "access" in str(e).lower() or "403" in str(e): error_msg += f"\n🚫 عدم دسترسی به مدل {model_name_ui}."
316
- elif "429" in str(e) or "rate limit" in str(e).lower() or "quota" in str(e).lower(): error_msg += f"\n🐢 محدودیت تعداد درخواست (Quota)."
317
- elif "DeadlineExceeded" in str(e) or "504" in str(e): error_msg += f"\n⏱️ درخواست Timeout."
318
  logs = log_message(error_msg, logs)
 
319
  continue
320
 
321
  if i < len(text_chunks) - 1 and sleep_between_requests_ui > 0:
@@ -323,46 +264,44 @@ def generate_audio_from_text_gradio(
323
  time.sleep(sleep_between_requests_ui)
324
 
325
  if not generated_files:
326
- logs = log_message("❌ هیچ فایل صوتی تولید نشد!", logs)
327
- return logs, None, None, gr.update(visible=False)
328
 
329
- logs = log_message(f"\n🎉 {len(generated_files)} فایل صوتی با موفقیت تولید شد!", logs)
330
- final_audio_path = None; zip_file_path = None; zip_visible = False
331
 
332
  if merge_audio_files_ui and len(generated_files) > 1:
333
  if not PYDUB_AVAILABLE:
334
- logs = log_message("⚠️ pydub برای ادغام در دسترس نیست...", logs)
335
- zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all_parts", log_message, logs)
336
  if zip_file_path: zip_visible = True
337
  if generated_files: final_audio_path = generated_files[0]
338
  else:
339
- merged_filename = f"{output_filename_base_ui}_merged.wav"
340
- final_audio_path, logs = merge_audio_files_func(generated_files, merged_filename, log_message, logs)
341
  if final_audio_path:
342
- logs = log_message(f"🎵 فایل نهایی ادغام شده: {final_audio_path}", logs)
343
  if delete_partial_files_ui:
344
  for fp_del in generated_files:
345
  if fp_del != final_audio_path:
346
- try: os.remove(fp_del); logs = log_message(f"🗑️ فایل جزئی حذف شد: {fp_del}", logs)
347
- except Exception as e_del: logs = log_message(f"⚠️ خطا در حذف {fp_del}: {e_del}", logs)
348
  else:
349
- logs = log_message("⚠️ ادغام ناموفق. ارائه ZIP...", logs)
350
- zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all_parts", log_message, logs)
351
  if zip_file_path: zip_visible = True
352
  if generated_files: final_audio_path = generated_files[0]
353
  elif len(generated_files) == 1:
354
  final_audio_path = generated_files[0]
355
  logs = log_message(f"🎵 فایل نهایی: {final_audio_path}", logs)
356
- elif len(generated_files) > 1:
357
- zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all_parts", log_message, logs)
358
  if zip_file_path: zip_visible = True
359
- final_audio_path = generated_files[0] if generated_files else None
360
 
361
  if not final_audio_path and not zip_file_path:
362
- logs = log_message("🛑 هیچ خروجی صوتی برای نمایش یا دانلود وجود ندارد.", logs)
363
  return logs, final_audio_path, zip_file_path, gr.update(visible=zip_visible)
364
 
365
- # --- Gradio UI (unchanged from previous correct version) ---
366
  css = """
367
  body { direction: rtl; }
368
  .rtl_override { direction: rtl !important; text-align: right !important; }
@@ -372,17 +311,16 @@ footer { display: none !important; }
372
  .gradio-container { max-width: 800px !important; margin: auto !important; }
373
  """
374
  API_KEY_FROM_ENV = os.environ.get("GEMINI_API_KEY")
 
375
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), css=css) as demo:
376
  gr.Markdown(
377
  """
378
  <div style='text-align: center; font-family: "Arial", sans-serif;'>
379
  <h1 class='rtl_override'>تبدیل متن به گفتار با Gemini API</h1>
380
- <p class='rtl_override'>این ابزار متن شما را با استفاده از مدل‌های پیشرفته گوگل به گفتار تبدیل می‌کند.</p>
381
- <p class='rtl_override'>بر اساس نوت‌بوک کولب ارائه شده توسط شما، با استفاده از مدل‌های دقیق و تنظیمات مشخص شده.</p>
382
- <p class='rtl_override'>ساخته شده توسط: <a href="https://github.com/aigolden" target="_blank">aigolden</a> (با راهنمایی شما)</p>
383
  </div>
384
  """
385
- )
386
  api_key_status_text = "⚠️ کلید API جمینای (GEMINI_API_KEY) در Secrets این اسپیس تنظیم نشده است."
387
  if API_KEY_FROM_ENV: api_key_status_text = "✅ کلید API جمینای از Secrets بارگذاری شد."
388
  gr.Markdown(f"<p style='text-align:center; color: {'green' if API_KEY_FROM_ENV else 'red'};' class='rtl_override'>{api_key_status_text}</p>")
@@ -393,12 +331,14 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
393
  input_method_radio = gr.Radio(["ورودی متنی", "آپلود فایل"], label="روش ورودی", value="ورودی متنی", elem_classes="rtl_override")
394
  text_to_speak_area = gr.Textbox(label="متن مورد نظر", placeholder="متن خود را اینجا وارد کنید...", lines=5, visible=True, elem_classes="rtl_override")
395
  uploaded_file_input = gr.File(label="فایل متنی (.txt)", file_types=[".txt"], visible=False, elem_classes="rtl_override") # type: ignore
396
- speech_prompt_area = gr.Textbox(label="پرامپت سبک گفتار (اختیاری)", placeholder="مثال: از زبان یک یوتوبر پر انرژی...", lines=2, elem_classes="rtl_override")
 
397
  gr.Markdown("<h3 class='rtl_override'>تنظیمات مدل و خروجی</h3>", elem_classes="rtl_override")
398
  model_name_dropdown = gr.Dropdown(MODELS_LIST, label="مدل", value=MODELS_LIST[0], elem_classes="rtl_override")
399
- speaker_voice_dropdown = gr.Dropdown(SPEAKER_VOICES_LIST, label="گوینده", value="Charon", elem_classes="rtl_override")
400
  temperature_slider = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label="دما", elem_classes="rtl_override")
401
  output_filename_base_input = gr.Textbox(value="gemini_tts_output", label="نام پایه فایل خروجی", elem_classes="rtl_override")
 
402
  with gr.Column(scale=1):
403
  gr.Markdown("<h3 class='rtl_override'>تنظیمات پیشرفته</h3>", elem_classes="rtl_override")
404
  max_chunk_size_slider = gr.Slider(minimum=2000, maximum=4000, step=100, value=3800, label="حداکثر کاراکتر در قطعه", elem_classes="rtl_override")
@@ -411,6 +351,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
411
  submit_button = gr.Button("🎤 تولید صدا", variant="primary", elem_id="submit_button_custom")
412
  gr.Markdown("<h3 class='rtl_override'>خروجی</h3>", elem_classes="rtl_override")
413
  status_output_area = gr.Textbox(label="پیام‌های وضعیت", lines=10, interactive=False, elem_classes="rtl_override")
 
414
  with gr.Row():
415
  audio_player_output = gr.Audio(label="فایل صوتی نهایی/اولین قطعه", type="filepath", elem_classes="rtl_override") # type: ignore
416
  zip_file_output = gr.File(label="دانلود همه قطعات (ZIP)", type="filepath", visible=False, elem_classes="rtl_override") # type: ignore
 
31
  if not os.path.exists(OUTPUT_DIR):
32
  os.makedirs(OUTPUT_DIR)
33
 
34
+ # --- Helper functions (unchanged) ---
35
  def log_message(msg, current_logs):
36
  print(msg)
37
  return f"{current_logs}\n{msg}".strip()
 
39
  def save_binary_file(file_name, data, log_func, current_logs):
40
  full_path = os.path.join(OUTPUT_DIR, file_name)
41
  try:
42
+ with open(full_path, "wb") as f: f.write(data)
 
43
  current_logs = log_func(f"✅ فایل در مسیر زیر ذخیره شد: {full_path}", current_logs)
44
  return full_path, current_logs
45
  except Exception as e:
 
48
 
49
  def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
50
  parameters = parse_audio_mime_type(mime_type)
51
+ bits_per_sample, sample_rate = parameters["bits_per_sample"], parameters["rate"]
52
+ num_channels, data_size = 1, len(audio_data)
 
 
53
  bytes_per_sample = bits_per_sample // 8
54
  block_align = num_channels * bytes_per_sample
55
  byte_rate = sample_rate * block_align
 
58
  return header + audio_data
59
 
60
  def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
61
+ bits_per_sample, rate = 16, 24000
62
+ for param in mime_type.split(";"):
 
63
  param = param.strip()
64
  if param.lower().startswith("rate="):
65
  try: rate = int(param.split("=", 1)[1])
 
70
  return {"bits_per_sample": bits_per_sample, "rate": rate}
71
 
72
  def load_text_from_file(file_obj, log_func, current_logs):
73
+ if file_obj is None: return "", log_func("❌ هیچ فایلی آپلود نشد.", current_logs)
 
 
74
  file_path = file_obj.name
75
+ current_logs = log_func(f"✅ فایل '{os.path.basename(file_path)}' دریافت شد.", current_logs)
76
  try:
77
  with open(file_path, 'r', encoding='utf-8') as f: content = f.read().strip()
78
+ current_logs = log_func(f"📖 متن: {len(content)} کاراکتر. نمونه: '{content[:100]}{'...' if len(content) > 100 else ''}'", current_logs)
 
79
  return content, current_logs
80
+ except Exception as e: return "", log_func(f"❌ خطا در خواندن فایل: {e}", current_logs)
 
 
81
 
82
  def smart_text_split(text, max_size=3800):
83
  if len(text) <= max_size: return [text]
84
+ chunks, current_chunk = [], ""
85
  sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
86
  for sentence in sentences:
87
  if len(current_chunk) + len(sentence) + 1 > max_size:
88
  if current_chunk: chunks.append(current_chunk.strip())
89
  if len(sentence) > max_size:
90
+ words, temp_word_chunk = sentence.split(), ""
91
  for word in words:
92
  if len(temp_word_chunk) + len(word) + 1 > max_size:
93
  if temp_word_chunk: chunks.append(temp_word_chunk.strip())
 
104
  return [c for c in chunks if c]
105
 
106
  def merge_audio_files_func(file_paths, output_filename, log_func, current_logs):
107
+ if not PYDUB_AVAILABLE: return None, log_func("❌ pydub نیست.", current_logs)
 
 
108
  output_path = os.path.join(OUTPUT_DIR, output_filename)
109
  try:
110
+ current_logs = log_func(f"🔗 ادغام {len(file_paths)} فایل...", current_logs)
111
  combined = AudioSegment.empty()
112
  for i, file_path in enumerate(file_paths):
113
  if os.path.exists(file_path):
114
+ current_logs = log_func(f"📎 فایل {i+1}: {file_path}", current_logs)
115
  try:
116
  audio = AudioSegment.from_file(file_path)
117
  combined += audio
118
  if i < len(file_paths) - 1: combined += AudioSegment.silent(duration=500)
119
  except Exception as e_pydub:
120
+ current_logs = log_func(f"⚠️ خطا pydub {file_path}: {e_pydub}. رد شد.", current_logs)
121
  continue
122
+ else: current_logs = log_func(f"⚠️ فایل نیست: {file_path}", current_logs)
123
+ if not combined: return None, log_func("❌ فایل معتبری برای ادغام نبود.", current_logs)
 
 
124
  combined.export(output_path, format="wav")
125
+ return output_path, log_func(f"✅ ادغام شد: {output_path}", current_logs)
126
+ except Exception as e: return None, log_func(f"❌ خطا ادغام: {e}", current_logs)
 
 
 
127
 
128
  def create_zip_file(file_paths, zip_name_base, log_func, current_logs):
129
  zip_filename = os.path.join(OUTPUT_DIR, f"{zip_name_base}.zip")
 
131
  with zipfile.ZipFile(zip_filename, 'w') as zipf:
132
  for file_path in file_paths:
133
  if os.path.exists(file_path): zipf.write(file_path, os.path.basename(file_path))
134
+ return zip_filename, log_func(f"📦 ZIP شد: {zip_filename}", current_logs)
135
+ except Exception as e: return None, log_func(f"❌ خطا ZIP: {e}", current_logs)
 
 
 
136
 
137
+ # --- Main generation function ---
138
  def generate_audio_from_text_gradio(
139
+ api_key_hf_secret, input_method, text_to_speak_ui, uploaded_file_ui,
140
+ speech_prompt_ui, model_name_ui, speaker_voice_ui, temperature_ui,
141
+ max_chunk_size_ui, sleep_between_requests_ui, output_filename_base_ui,
142
+ merge_audio_files_ui, delete_partial_files_ui
 
 
 
 
 
 
 
 
 
143
  ):
144
  logs = "⏳ شروع فرآیند..."
 
145
  if not api_key_hf_secret:
146
+ return log_message("❌ کلید API جمینای در Secrets نیست.", logs), None, None, gr.update(visible=False)
 
147
 
 
148
  os.environ["GEMINI_API_KEY"] = api_key_hf_secret
149
+ logs = log_message("🔑 کلید API از Secrets بارگذاری شد.", logs)
150
 
151
  client = None
152
  try:
153
+ logs = log_message("🛠️ ایجاد کلاینت `genai.Client()`...", logs)
154
+ client = genai.Client(api_key=api_key_hf_secret)
155
+ logs = log_message("✅ کلاینت ایجاد شد.", logs)
156
  except Exception as e:
157
+ return log_message(f"❌ خطا ایجاد کلاینت: {type(e).__name__} - {e}", logs), None, None, gr.update(visible=False)
 
 
158
 
159
  text_input_content = ""
160
  if input_method == "آپلود فایل":
 
 
 
161
  text_input_content, logs = load_text_from_file(uploaded_file_ui, log_message, logs)
162
+ if not text_input_content: return logs, None, None, gr.update(visible=False)
 
163
  else:
164
  text_input_content = text_to_speak_ui
165
 
166
+ if not text_input_content or not text_input_content.strip():
167
+ return log_message("❌ متن ورودی خالی است.", logs), None, None, gr.update(visible=False)
 
168
 
169
  text_chunks = smart_text_split(text_input_content, max_chunk_size_ui)
170
  logs = log_message(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.", logs)
171
+ for i, chunk_text in enumerate(text_chunks): # Renamed chunk to chunk_text
172
+ logs = log_message(f"📝 قطعه {i+1}: {len(chunk_text)} کاراکتر", logs)
173
 
174
  generated_files = []
175
+ for i, chunk_text_for_api in enumerate(text_chunks): # Use the chunk_text directly
176
+ logs = log_message(f"\n🔊 تولید صدا قطعه {i+1}/{len(text_chunks)}...", logs)
 
 
177
 
178
+ # IMPORTANT CHANGE: final_text_for_api is now just the chunk
179
+ # The speech_prompt_ui is NOT added to the text for these models/API calls
180
+ final_text_for_api = chunk_text_for_api
181
+
182
  api_contents = [
183
+ genai_types.Content(
184
  role="user",
185
+ parts=[genai_types.Part.from_text(text=final_text_for_api)],
 
 
186
  ),
187
  ]
188
 
189
  genai_speech_config = genai_types.SpeechConfig(
190
  voice_config=genai_types.VoiceConfig(
191
+ prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=speaker_voice_ui)
 
 
192
  )
193
  )
194
 
 
200
 
201
  try:
202
  if not hasattr(client, 'models') or not hasattr(client.models, 'generate_content_stream'): # type: ignore
203
+ logs = log_message(f"❌ کلاینت (`{type(client)}`) متد `models.generate_content_stream` ندارد.", logs)
204
+ continue
205
+
206
+ stream_iterator = client.models.generate_content_stream( # type: ignore
207
+ model=model_name_ui, contents=api_contents, config=stream_generation_config,
208
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  chunk_filename_base = f"{output_filename_base_ui}_part_{i+1:03d}"
211
+ audio_data_buffer, mime_type_from_api = b"", "audio/wav"
 
212
 
213
  for chunk_response in stream_iterator:
214
+ if (chunk_response.candidates and chunk_response.candidates[0].content and
215
+ chunk_response.candidates[0].content.parts and
216
+ chunk_response.candidates[0].content.parts[0].inline_data):
 
 
 
217
  inline_data = chunk_response.candidates[0].content.parts[0].inline_data
218
  audio_data_buffer += inline_data.data
219
  mime_type_from_api = inline_data.mime_type
220
  elif chunk_response.text:
221
+ log_text = f"💬 پیام API قطعه {i+1}: {chunk_response.text}"
222
  if "error" in chunk_response.text.lower() or "failed" in chunk_response.text.lower():
223
+ logs = log_message(f"❌ {log_text}", logs)
224
  else:
225
+ logs = log_message(f"ℹ️ {log_text}", logs)
226
+
227
 
228
  if audio_data_buffer:
229
  file_extension = mimetypes.guess_extension(mime_type_from_api)
230
  final_audio_data = audio_data_buffer
231
  if file_extension is None or file_extension.lower() not in ['.wav', '.mp3', '.ogg', '.aac']:
232
  if "audio/L" in mime_type_from_api or "audio/raw" in mime_type_from_api:
233
+ logs = log_message(f"ℹ️ Mime: {mime_type_from_api}. تبدیل به WAV...", logs)
234
  final_audio_data = convert_to_wav(audio_data_buffer, mime_type_from_api)
235
  file_extension = ".wav"
236
  else:
237
+ logs = log_message(f"ℹ️ Mime ناشناخته: {mime_type_from_api}. ذخیره .bin.", logs)
238
  file_extension = ".bin"
239
  if mime_type_from_api == "audio/wav" and (file_extension != ".wav" and file_extension != ".wave"): file_extension = ".wav"
240
  elif mime_type_from_api == "audio/mpeg" and file_extension != ".mp3": file_extension = ".mp3"
 
246
  generated_files.append(saved_file_path)
247
  logs = log_message(f"✅ قطعه {i+1} تولید شد: {saved_file_path}", logs)
248
  else:
249
+ if not f"❌ پیام API قطعه {i+1}" in logs: # Avoid duplicate error if API already sent one
250
+ logs = log_message(f"❌ قطعه {i+1} بدون داده صوتی.", logs)
251
 
252
  except Exception as e:
253
+ error_msg = f"❌ خطا تولید قطعه {i+1}: {type(e).__name__} - {e}"
254
+ if "API_KEY_INVALID" in str(e): error_msg += "\n🔑 کلید API نامعتبر."
255
+ elif "permission" in str(e).lower() or "403" in str(e): error_msg += f"\n🚫 عدم دسترسی به {model_name_ui}."
256
+ elif "429" in str(e) or "quota" in str(e).lower(): error_msg += f"\n🐢 محدودیت Quota."
257
+ elif "DeadlineExceeded" in str(e) or "504" in str(e): error_msg += f"\n⏱️ Timeout."
 
258
  logs = log_message(error_msg, logs)
259
+ # logs = log_message(traceback.format_exc(), logs) # DEBUG
260
  continue
261
 
262
  if i < len(text_chunks) - 1 and sleep_between_requests_ui > 0:
 
264
  time.sleep(sleep_between_requests_ui)
265
 
266
  if not generated_files:
267
+ return log_message("❌ هیچ فایل صوتی تولید نشد!", logs), None, None, gr.update(visible=False)
 
268
 
269
+ logs = log_message(f"\n🎉 {len(generated_files)} فایل صوتی تولید شد!", logs)
270
+ final_audio_path, zip_file_path, zip_visible = None, None, False
271
 
272
  if merge_audio_files_ui and len(generated_files) > 1:
273
  if not PYDUB_AVAILABLE:
274
+ logs = log_message("⚠️ pydub نیست. ارائه ZIP.", logs)
275
+ zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all", log_message, logs)
276
  if zip_file_path: zip_visible = True
277
  if generated_files: final_audio_path = generated_files[0]
278
  else:
279
+ final_audio_path, logs = merge_audio_files_func(generated_files, f"{output_filename_base_ui}_merged.wav", log_message, logs)
 
280
  if final_audio_path:
281
+ logs = log_message(f"🎵 ادغام شده: {final_audio_path}", logs)
282
  if delete_partial_files_ui:
283
  for fp_del in generated_files:
284
  if fp_del != final_audio_path:
285
+ try: os.remove(fp_del); logs = log_message(f"🗑️ حذف: {fp_del}", logs)
286
+ except Exception as e_del: logs = log_message(f"⚠️ خطا حذف {fp_del}: {e_del}", logs)
287
  else:
288
+ logs = log_message("⚠️ ادغام ناموفق. ارائه ZIP.", logs)
289
+ zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all", log_message, logs)
290
  if zip_file_path: zip_visible = True
291
  if generated_files: final_audio_path = generated_files[0]
292
  elif len(generated_files) == 1:
293
  final_audio_path = generated_files[0]
294
  logs = log_message(f"🎵 فایل نهایی: {final_audio_path}", logs)
295
+ elif len(generated_files) > 1: # Not merging
296
+ zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all", log_message, logs)
297
  if zip_file_path: zip_visible = True
298
+ if generated_files: final_audio_path = generated_files[0]
299
 
300
  if not final_audio_path and not zip_file_path:
301
+ return log_message("🛑 خروجی صوتی نیست.", logs), None, None, gr.update(visible=False)
302
  return logs, final_audio_path, zip_file_path, gr.update(visible=zip_visible)
303
 
304
+ # --- Gradio UI (Largely unchanged, ensure default values are correct) ---
305
  css = """
306
  body { direction: rtl; }
307
  .rtl_override { direction: rtl !important; text-align: right !important; }
 
311
  .gradio-container { max-width: 800px !important; margin: auto !important; }
312
  """
313
  API_KEY_FROM_ENV = os.environ.get("GEMINI_API_KEY")
314
+
315
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), css=css) as demo:
316
  gr.Markdown(
317
  """
318
  <div style='text-align: center; font-family: "Arial", sans-serif;'>
319
  <h1 class='rtl_override'>تبدیل متن به گفتار با Gemini API</h1>
320
+ <p class='rtl_override'>توجه: قابلیت "پرامپت سبک گفتار" فعلا برای این مدل‌ها به طور کامل پشتیبانی نمی‌شود.</p>
 
 
321
  </div>
322
  """
323
+ ) # Added a note about speech prompt
324
  api_key_status_text = "⚠️ کلید API جمینای (GEMINI_API_KEY) در Secrets این اسپیس تنظیم نشده است."
325
  if API_KEY_FROM_ENV: api_key_status_text = "✅ کلید API جمینای از Secrets بارگذاری شد."
326
  gr.Markdown(f"<p style='text-align:center; color: {'green' if API_KEY_FROM_ENV else 'red'};' class='rtl_override'>{api_key_status_text}</p>")
 
331
  input_method_radio = gr.Radio(["ورودی متنی", "آپلود فایل"], label="روش ورودی", value="ورودی متنی", elem_classes="rtl_override")
332
  text_to_speak_area = gr.Textbox(label="متن مورد نظر", placeholder="متن خود را اینجا وارد کنید...", lines=5, visible=True, elem_classes="rtl_override")
333
  uploaded_file_input = gr.File(label="فایل متنی (.txt)", file_types=[".txt"], visible=False, elem_classes="rtl_override") # type: ignore
334
+ speech_prompt_area = gr.Textbox(label="پرامپت سبک گفتار (اختیاری - فعلا تاثیر محدود)", placeholder="مثال: شاد و پر انرژی", lines=2, elem_classes="rtl_override") # Clarified limited effect
335
+
336
  gr.Markdown("<h3 class='rtl_override'>تنظیمات مدل و خروجی</h3>", elem_classes="rtl_override")
337
  model_name_dropdown = gr.Dropdown(MODELS_LIST, label="مدل", value=MODELS_LIST[0], elem_classes="rtl_override")
338
+ speaker_voice_dropdown = gr.Dropdown(SPEAKER_VOICES_LIST, label="گوینده", value="Charon", elem_classes="rtl_override") # Default Charon
339
  temperature_slider = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label="دما", elem_classes="rtl_override")
340
  output_filename_base_input = gr.Textbox(value="gemini_tts_output", label="نام پایه فایل خروجی", elem_classes="rtl_override")
341
+
342
  with gr.Column(scale=1):
343
  gr.Markdown("<h3 class='rtl_override'>تنظیمات پیشرفته</h3>", elem_classes="rtl_override")
344
  max_chunk_size_slider = gr.Slider(minimum=2000, maximum=4000, step=100, value=3800, label="حداکثر کاراکتر در قطعه", elem_classes="rtl_override")
 
351
  submit_button = gr.Button("🎤 تولید صدا", variant="primary", elem_id="submit_button_custom")
352
  gr.Markdown("<h3 class='rtl_override'>خروجی</h3>", elem_classes="rtl_override")
353
  status_output_area = gr.Textbox(label="پیام‌های وضعیت", lines=10, interactive=False, elem_classes="rtl_override")
354
+
355
  with gr.Row():
356
  audio_player_output = gr.Audio(label="فایل صوتی نهایی/اولین قطعه", type="filepath", elem_classes="rtl_override") # type: ignore
357
  zip_file_output = gr.File(label="دانلود همه قطعات (ZIP)", type="filepath", visible=False, elem_classes="rtl_override") # type: ignore