Update app.py
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ OUTPUT_DIR = "generated_audio"
|
|
31 |
if not os.path.exists(OUTPUT_DIR):
|
32 |
os.makedirs(OUTPUT_DIR)
|
33 |
|
34 |
-
# --- Helper functions (unchanged
|
35 |
def log_message(msg, current_logs):
|
36 |
print(msg)
|
37 |
return f"{current_logs}\n{msg}".strip()
|
@@ -39,8 +39,7 @@ def log_message(msg, current_logs):
|
|
39 |
def save_binary_file(file_name, data, log_func, current_logs):
|
40 |
full_path = os.path.join(OUTPUT_DIR, file_name)
|
41 |
try:
|
42 |
-
with open(full_path, "wb") as f:
|
43 |
-
f.write(data)
|
44 |
current_logs = log_func(f"✅ فایل در مسیر زیر ذخیره شد: {full_path}", current_logs)
|
45 |
return full_path, current_logs
|
46 |
except Exception as e:
|
@@ -49,10 +48,8 @@ def save_binary_file(file_name, data, log_func, current_logs):
|
|
49 |
|
50 |
def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
|
51 |
parameters = parse_audio_mime_type(mime_type)
|
52 |
-
bits_per_sample = parameters["bits_per_sample"]
|
53 |
-
|
54 |
-
num_channels = 1
|
55 |
-
data_size = len(audio_data)
|
56 |
bytes_per_sample = bits_per_sample // 8
|
57 |
block_align = num_channels * bytes_per_sample
|
58 |
byte_rate = sample_rate * block_align
|
@@ -61,9 +58,8 @@ def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
|
|
61 |
return header + audio_data
|
62 |
|
63 |
def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
|
64 |
-
bits_per_sample = 16
|
65 |
-
|
66 |
-
for param in parts:
|
67 |
param = param.strip()
|
68 |
if param.lower().startswith("rate="):
|
69 |
try: rate = int(param.split("=", 1)[1])
|
@@ -74,29 +70,24 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
|
|
74 |
return {"bits_per_sample": bits_per_sample, "rate": rate}
|
75 |
|
76 |
def load_text_from_file(file_obj, log_func, current_logs):
|
77 |
-
if file_obj is None:
|
78 |
-
current_logs = log_func("❌ هیچ فایلی آپلود نشد.", current_logs)
|
79 |
-
return "", current_logs
|
80 |
file_path = file_obj.name
|
81 |
-
current_logs = log_func(f"✅ فایل '{os.path.basename(file_path)}'
|
82 |
try:
|
83 |
with open(file_path, 'r', encoding='utf-8') as f: content = f.read().strip()
|
84 |
-
current_logs = log_func(f"📖
|
85 |
-
current_logs = log_func(f"📝 نمونه متن: '{content[:100]}{'...' if len(content) > 100 else ''}'", current_logs)
|
86 |
return content, current_logs
|
87 |
-
except Exception as e:
|
88 |
-
current_logs = log_func(f"❌ خطا در خواندن فایل: {e}", current_logs)
|
89 |
-
return "", current_logs
|
90 |
|
91 |
def smart_text_split(text, max_size=3800):
|
92 |
if len(text) <= max_size: return [text]
|
93 |
-
chunks = []
|
94 |
sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
|
95 |
for sentence in sentences:
|
96 |
if len(current_chunk) + len(sentence) + 1 > max_size:
|
97 |
if current_chunk: chunks.append(current_chunk.strip())
|
98 |
if len(sentence) > max_size:
|
99 |
-
words = sentence.split()
|
100 |
for word in words:
|
101 |
if len(temp_word_chunk) + len(word) + 1 > max_size:
|
102 |
if temp_word_chunk: chunks.append(temp_word_chunk.strip())
|
@@ -113,33 +104,26 @@ def smart_text_split(text, max_size=3800):
|
|
113 |
return [c for c in chunks if c]
|
114 |
|
115 |
def merge_audio_files_func(file_paths, output_filename, log_func, current_logs):
|
116 |
-
if not PYDUB_AVAILABLE:
|
117 |
-
current_logs = log_func("❌ pydub در دسترس نیست. نمیتوان فایلها را ادغام کرد.", current_logs)
|
118 |
-
return None, current_logs
|
119 |
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
120 |
try:
|
121 |
-
current_logs = log_func(f"🔗
|
122 |
combined = AudioSegment.empty()
|
123 |
for i, file_path in enumerate(file_paths):
|
124 |
if os.path.exists(file_path):
|
125 |
-
current_logs = log_func(f"📎
|
126 |
try:
|
127 |
audio = AudioSegment.from_file(file_path)
|
128 |
combined += audio
|
129 |
if i < len(file_paths) - 1: combined += AudioSegment.silent(duration=500)
|
130 |
except Exception as e_pydub:
|
131 |
-
current_logs = log_func(f"⚠️ خطا
|
132 |
continue
|
133 |
-
else: current_logs = log_func(f"⚠️ فایل
|
134 |
-
if not combined:
|
135 |
-
current_logs = log_func("❌ هیچ فایل صوتی معتبری برای ادغام یافت نشد.", current_logs)
|
136 |
-
return None, current_logs
|
137 |
combined.export(output_path, format="wav")
|
138 |
-
|
139 |
-
|
140 |
-
except Exception as e:
|
141 |
-
current_logs = log_func(f"❌ خطا در ادغام فایلها: {e}", current_logs)
|
142 |
-
return None, current_logs
|
143 |
|
144 |
def create_zip_file(file_paths, zip_name_base, log_func, current_logs):
|
145 |
zip_filename = os.path.join(OUTPUT_DIR, f"{zip_name_base}.zip")
|
@@ -147,89 +131,64 @@ def create_zip_file(file_paths, zip_name_base, log_func, current_logs):
|
|
147 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
|
148 |
for file_path in file_paths:
|
149 |
if os.path.exists(file_path): zipf.write(file_path, os.path.basename(file_path))
|
150 |
-
|
151 |
-
|
152 |
-
except Exception as e:
|
153 |
-
current_logs = log_func(f"❌ خطا در ایجاد فایل ZIP: {e}", current_logs)
|
154 |
-
return None, current_logs
|
155 |
|
156 |
-
# --- Main generation function
|
157 |
def generate_audio_from_text_gradio(
|
158 |
-
api_key_hf_secret,
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
speech_prompt_ui,
|
163 |
-
model_name_ui,
|
164 |
-
speaker_voice_ui,
|
165 |
-
temperature_ui,
|
166 |
-
max_chunk_size_ui,
|
167 |
-
sleep_between_requests_ui,
|
168 |
-
output_filename_base_ui,
|
169 |
-
merge_audio_files_ui,
|
170 |
-
delete_partial_files_ui
|
171 |
):
|
172 |
logs = "⏳ شروع فرآیند..."
|
173 |
-
|
174 |
if not api_key_hf_secret:
|
175 |
-
|
176 |
-
return logs, None, None, gr.update(visible=False)
|
177 |
|
178 |
-
# Set API key in environment for genai.Client() to pick up
|
179 |
os.environ["GEMINI_API_KEY"] = api_key_hf_secret
|
180 |
-
logs = log_message("🔑 کلید API از
|
181 |
|
182 |
client = None
|
183 |
try:
|
184 |
-
logs = log_message("🛠️
|
185 |
-
client = genai.Client(api_key=api_key_hf_secret)
|
186 |
-
logs = log_message("✅ کلاینت
|
187 |
except Exception as e:
|
188 |
-
|
189 |
-
logs = log_message(" ممکن است نیاز به تنظیم نسخه کتابخانه `google-genai` در `requirements.txt` باشد.", logs)
|
190 |
-
return logs, None, None, gr.update(visible=False)
|
191 |
|
192 |
text_input_content = ""
|
193 |
if input_method == "آپلود فایل":
|
194 |
-
if uploaded_file_ui is None:
|
195 |
-
logs = log_message("❌ حالت آپلود فایل انتخاب شده اما فایلی آپلود نشده است.", logs)
|
196 |
-
return logs, None, None, gr.update(visible=False)
|
197 |
text_input_content, logs = load_text_from_file(uploaded_file_ui, log_message, logs)
|
198 |
-
if not text_input_content:
|
199 |
-
return logs, None, None, gr.update(visible=False)
|
200 |
else:
|
201 |
text_input_content = text_to_speak_ui
|
202 |
|
203 |
-
if not text_input_content or text_input_content.strip()
|
204 |
-
|
205 |
-
return logs, None, None, gr.update(visible=False)
|
206 |
|
207 |
text_chunks = smart_text_split(text_input_content, max_chunk_size_ui)
|
208 |
logs = log_message(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.", logs)
|
209 |
-
for i,
|
210 |
-
logs = log_message(f"📝 قطعه {i+1}: {len(
|
211 |
|
212 |
generated_files = []
|
213 |
-
for i,
|
214 |
-
logs = log_message(f"\n🔊 تولید صدا
|
215 |
-
|
216 |
-
final_text_for_api = f'"{speech_prompt_ui}"\n{chunk}' if speech_prompt_ui and speech_prompt_ui.strip() else chunk
|
217 |
|
218 |
-
#
|
|
|
|
|
|
|
219 |
api_contents = [
|
220 |
-
genai_types.Content(
|
221 |
role="user",
|
222 |
-
parts=[
|
223 |
-
genai_types.Part.from_text(text=final_text_for_api),
|
224 |
-
],
|
225 |
),
|
226 |
]
|
227 |
|
228 |
genai_speech_config = genai_types.SpeechConfig(
|
229 |
voice_config=genai_types.VoiceConfig(
|
230 |
-
prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(
|
231 |
-
voice_name=speaker_voice_ui
|
232 |
-
)
|
233 |
)
|
234 |
)
|
235 |
|
@@ -241,59 +200,41 @@ def generate_audio_from_text_gradio(
|
|
241 |
|
242 |
try:
|
243 |
if not hasattr(client, 'models') or not hasattr(client.models, 'generate_content_stream'): # type: ignore
|
244 |
-
logs = log_message(f"❌ کلاینت (`{type(client)}`) متد `models.generate_content_stream`
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
# This part is highly speculative.
|
251 |
-
stream_iterator = client.generate_content_stream( # type: ignore
|
252 |
-
model=model_name_ui,
|
253 |
-
contents=api_contents,
|
254 |
-
generation_config=stream_generation_config # Older API might use 'config'
|
255 |
-
)
|
256 |
-
else:
|
257 |
-
logs = log_message(" هیچ روش شناخته شدهای برای تولید محتوای استریم با این کلاینت یافت نشد.", logs)
|
258 |
-
continue # Skip to next chunk
|
259 |
-
else:
|
260 |
-
# This is the path that matches the Colab notebook structure
|
261 |
-
stream_iterator = client.models.generate_content_stream( # type: ignore
|
262 |
-
model=model_name_ui,
|
263 |
-
contents=api_contents,
|
264 |
-
config=stream_generation_config, # `config` was used in Colab's `generate_content_stream`
|
265 |
-
)
|
266 |
|
267 |
chunk_filename_base = f"{output_filename_base_ui}_part_{i+1:03d}"
|
268 |
-
audio_data_buffer = b""
|
269 |
-
mime_type_from_api = "audio/wav"
|
270 |
|
271 |
for chunk_response in stream_iterator:
|
272 |
-
if (
|
273 |
-
chunk_response.candidates
|
274 |
-
|
275 |
-
and chunk_response.candidates[0].content.parts
|
276 |
-
and chunk_response.candidates[0].content.parts[0].inline_data
|
277 |
-
):
|
278 |
inline_data = chunk_response.candidates[0].content.parts[0].inline_data
|
279 |
audio_data_buffer += inline_data.data
|
280 |
mime_type_from_api = inline_data.mime_type
|
281 |
elif chunk_response.text:
|
|
|
282 |
if "error" in chunk_response.text.lower() or "failed" in chunk_response.text.lower():
|
283 |
-
logs = log_message(f"❌
|
284 |
else:
|
285 |
-
logs = log_message(f"ℹ️
|
|
|
286 |
|
287 |
if audio_data_buffer:
|
288 |
file_extension = mimetypes.guess_extension(mime_type_from_api)
|
289 |
final_audio_data = audio_data_buffer
|
290 |
if file_extension is None or file_extension.lower() not in ['.wav', '.mp3', '.ogg', '.aac']:
|
291 |
if "audio/L" in mime_type_from_api or "audio/raw" in mime_type_from_api:
|
292 |
-
logs = log_message(f"ℹ️
|
293 |
final_audio_data = convert_to_wav(audio_data_buffer, mime_type_from_api)
|
294 |
file_extension = ".wav"
|
295 |
else:
|
296 |
-
logs = log_message(f"ℹ️
|
297 |
file_extension = ".bin"
|
298 |
if mime_type_from_api == "audio/wav" and (file_extension != ".wav" and file_extension != ".wave"): file_extension = ".wav"
|
299 |
elif mime_type_from_api == "audio/mpeg" and file_extension != ".mp3": file_extension = ".mp3"
|
@@ -305,17 +246,17 @@ def generate_audio_from_text_gradio(
|
|
305 |
generated_files.append(saved_file_path)
|
306 |
logs = log_message(f"✅ قطعه {i+1} تولید شد: {saved_file_path}", logs)
|
307 |
else:
|
308 |
-
if not f"❌
|
309 |
-
logs = log_message(f"❌ قطعه {i+1} بدون داده
|
310 |
|
311 |
except Exception as e:
|
312 |
-
error_msg = f"❌ خطا
|
313 |
-
|
314 |
-
|
315 |
-
elif "
|
316 |
-
elif "
|
317 |
-
elif "DeadlineExceeded" in str(e) or "504" in str(e): error_msg += f"\n⏱️ درخواست Timeout."
|
318 |
logs = log_message(error_msg, logs)
|
|
|
319 |
continue
|
320 |
|
321 |
if i < len(text_chunks) - 1 and sleep_between_requests_ui > 0:
|
@@ -323,46 +264,44 @@ def generate_audio_from_text_gradio(
|
|
323 |
time.sleep(sleep_between_requests_ui)
|
324 |
|
325 |
if not generated_files:
|
326 |
-
|
327 |
-
return logs, None, None, gr.update(visible=False)
|
328 |
|
329 |
-
logs = log_message(f"\n🎉 {len(generated_files)} فایل صوتی
|
330 |
-
final_audio_path
|
331 |
|
332 |
if merge_audio_files_ui and len(generated_files) > 1:
|
333 |
if not PYDUB_AVAILABLE:
|
334 |
-
logs = log_message("⚠️ pydub
|
335 |
-
zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}
|
336 |
if zip_file_path: zip_visible = True
|
337 |
if generated_files: final_audio_path = generated_files[0]
|
338 |
else:
|
339 |
-
|
340 |
-
final_audio_path, logs = merge_audio_files_func(generated_files, merged_filename, log_message, logs)
|
341 |
if final_audio_path:
|
342 |
-
logs = log_message(f"🎵
|
343 |
if delete_partial_files_ui:
|
344 |
for fp_del in generated_files:
|
345 |
if fp_del != final_audio_path:
|
346 |
-
try: os.remove(fp_del); logs = log_message(f"🗑️
|
347 |
-
except Exception as e_del: logs = log_message(f"⚠️ خطا
|
348 |
else:
|
349 |
-
logs = log_message("⚠️ ادغام ناموفق. ارائه ZIP
|
350 |
-
zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}
|
351 |
if zip_file_path: zip_visible = True
|
352 |
if generated_files: final_audio_path = generated_files[0]
|
353 |
elif len(generated_files) == 1:
|
354 |
final_audio_path = generated_files[0]
|
355 |
logs = log_message(f"🎵 فایل نهایی: {final_audio_path}", logs)
|
356 |
-
elif len(generated_files) > 1:
|
357 |
-
zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}
|
358 |
if zip_file_path: zip_visible = True
|
359 |
-
final_audio_path = generated_files[0]
|
360 |
|
361 |
if not final_audio_path and not zip_file_path:
|
362 |
-
|
363 |
return logs, final_audio_path, zip_file_path, gr.update(visible=zip_visible)
|
364 |
|
365 |
-
# --- Gradio UI (unchanged
|
366 |
css = """
|
367 |
body { direction: rtl; }
|
368 |
.rtl_override { direction: rtl !important; text-align: right !important; }
|
@@ -372,17 +311,16 @@ footer { display: none !important; }
|
|
372 |
.gradio-container { max-width: 800px !important; margin: auto !important; }
|
373 |
"""
|
374 |
API_KEY_FROM_ENV = os.environ.get("GEMINI_API_KEY")
|
|
|
375 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), css=css) as demo:
|
376 |
gr.Markdown(
|
377 |
"""
|
378 |
<div style='text-align: center; font-family: "Arial", sans-serif;'>
|
379 |
<h1 class='rtl_override'>تبدیل متن به گفتار با Gemini API</h1>
|
380 |
-
<p class='rtl_override'
|
381 |
-
<p class='rtl_override'>بر اساس نوتبوک کولب ارائه شده توسط شما، با استفاده از مدلهای دقیق و تنظیمات مشخص شده.</p>
|
382 |
-
<p class='rtl_override'>ساخته شده توسط: <a href="https://github.com/aigolden" target="_blank">aigolden</a> (با راهنمایی شما)</p>
|
383 |
</div>
|
384 |
"""
|
385 |
-
)
|
386 |
api_key_status_text = "⚠️ کلید API جمینای (GEMINI_API_KEY) در Secrets این اسپیس تنظیم نشده است."
|
387 |
if API_KEY_FROM_ENV: api_key_status_text = "✅ کلید API جمینای از Secrets بارگذاری شد."
|
388 |
gr.Markdown(f"<p style='text-align:center; color: {'green' if API_KEY_FROM_ENV else 'red'};' class='rtl_override'>{api_key_status_text}</p>")
|
@@ -393,12 +331,14 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
|
|
393 |
input_method_radio = gr.Radio(["ورودی متنی", "آپلود فایل"], label="روش ورودی", value="ورودی متنی", elem_classes="rtl_override")
|
394 |
text_to_speak_area = gr.Textbox(label="متن مورد نظر", placeholder="متن خود را اینجا وارد کنید...", lines=5, visible=True, elem_classes="rtl_override")
|
395 |
uploaded_file_input = gr.File(label="فایل متنی (.txt)", file_types=[".txt"], visible=False, elem_classes="rtl_override") # type: ignore
|
396 |
-
speech_prompt_area = gr.Textbox(label="پرامپت سبک گفتار (اختیاری)", placeholder="مثال:
|
|
|
397 |
gr.Markdown("<h3 class='rtl_override'>تنظیمات مدل و خروجی</h3>", elem_classes="rtl_override")
|
398 |
model_name_dropdown = gr.Dropdown(MODELS_LIST, label="مدل", value=MODELS_LIST[0], elem_classes="rtl_override")
|
399 |
-
speaker_voice_dropdown = gr.Dropdown(SPEAKER_VOICES_LIST, label="گوینده", value="Charon", elem_classes="rtl_override")
|
400 |
temperature_slider = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label="دما", elem_classes="rtl_override")
|
401 |
output_filename_base_input = gr.Textbox(value="gemini_tts_output", label="نام پایه فایل خروجی", elem_classes="rtl_override")
|
|
|
402 |
with gr.Column(scale=1):
|
403 |
gr.Markdown("<h3 class='rtl_override'>تنظیمات پیشرفته</h3>", elem_classes="rtl_override")
|
404 |
max_chunk_size_slider = gr.Slider(minimum=2000, maximum=4000, step=100, value=3800, label="حداکثر کاراکتر در قطعه", elem_classes="rtl_override")
|
@@ -411,6 +351,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
|
|
411 |
submit_button = gr.Button("🎤 تولید صدا", variant="primary", elem_id="submit_button_custom")
|
412 |
gr.Markdown("<h3 class='rtl_override'>خروجی</h3>", elem_classes="rtl_override")
|
413 |
status_output_area = gr.Textbox(label="پیامهای وضعیت", lines=10, interactive=False, elem_classes="rtl_override")
|
|
|
414 |
with gr.Row():
|
415 |
audio_player_output = gr.Audio(label="فایل صوتی نهایی/اولین قطعه", type="filepath", elem_classes="rtl_override") # type: ignore
|
416 |
zip_file_output = gr.File(label="دانلود همه قطعات (ZIP)", type="filepath", visible=False, elem_classes="rtl_override") # type: ignore
|
|
|
31 |
if not os.path.exists(OUTPUT_DIR):
|
32 |
os.makedirs(OUTPUT_DIR)
|
33 |
|
34 |
+
# --- Helper functions (unchanged) ---
|
35 |
def log_message(msg, current_logs):
|
36 |
print(msg)
|
37 |
return f"{current_logs}\n{msg}".strip()
|
|
|
39 |
def save_binary_file(file_name, data, log_func, current_logs):
|
40 |
full_path = os.path.join(OUTPUT_DIR, file_name)
|
41 |
try:
|
42 |
+
with open(full_path, "wb") as f: f.write(data)
|
|
|
43 |
current_logs = log_func(f"✅ فایل در مسیر زیر ذخیره شد: {full_path}", current_logs)
|
44 |
return full_path, current_logs
|
45 |
except Exception as e:
|
|
|
48 |
|
49 |
def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
|
50 |
parameters = parse_audio_mime_type(mime_type)
|
51 |
+
bits_per_sample, sample_rate = parameters["bits_per_sample"], parameters["rate"]
|
52 |
+
num_channels, data_size = 1, len(audio_data)
|
|
|
|
|
53 |
bytes_per_sample = bits_per_sample // 8
|
54 |
block_align = num_channels * bytes_per_sample
|
55 |
byte_rate = sample_rate * block_align
|
|
|
58 |
return header + audio_data
|
59 |
|
60 |
def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
|
61 |
+
bits_per_sample, rate = 16, 24000
|
62 |
+
for param in mime_type.split(";"):
|
|
|
63 |
param = param.strip()
|
64 |
if param.lower().startswith("rate="):
|
65 |
try: rate = int(param.split("=", 1)[1])
|
|
|
70 |
return {"bits_per_sample": bits_per_sample, "rate": rate}
|
71 |
|
72 |
def load_text_from_file(file_obj, log_func, current_logs):
|
73 |
+
if file_obj is None: return "", log_func("❌ هیچ فایلی آپلود نشد.", current_logs)
|
|
|
|
|
74 |
file_path = file_obj.name
|
75 |
+
current_logs = log_func(f"✅ فایل '{os.path.basename(file_path)}' دریافت شد.", current_logs)
|
76 |
try:
|
77 |
with open(file_path, 'r', encoding='utf-8') as f: content = f.read().strip()
|
78 |
+
current_logs = log_func(f"📖 متن: {len(content)} کاراکتر. نمونه: '{content[:100]}{'...' if len(content) > 100 else ''}'", current_logs)
|
|
|
79 |
return content, current_logs
|
80 |
+
except Exception as e: return "", log_func(f"❌ خطا در خواندن فایل: {e}", current_logs)
|
|
|
|
|
81 |
|
82 |
def smart_text_split(text, max_size=3800):
|
83 |
if len(text) <= max_size: return [text]
|
84 |
+
chunks, current_chunk = [], ""
|
85 |
sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
|
86 |
for sentence in sentences:
|
87 |
if len(current_chunk) + len(sentence) + 1 > max_size:
|
88 |
if current_chunk: chunks.append(current_chunk.strip())
|
89 |
if len(sentence) > max_size:
|
90 |
+
words, temp_word_chunk = sentence.split(), ""
|
91 |
for word in words:
|
92 |
if len(temp_word_chunk) + len(word) + 1 > max_size:
|
93 |
if temp_word_chunk: chunks.append(temp_word_chunk.strip())
|
|
|
104 |
return [c for c in chunks if c]
|
105 |
|
106 |
def merge_audio_files_func(file_paths, output_filename, log_func, current_logs):
|
107 |
+
if not PYDUB_AVAILABLE: return None, log_func("❌ pydub نیست.", current_logs)
|
|
|
|
|
108 |
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
109 |
try:
|
110 |
+
current_logs = log_func(f"🔗 ادغام {len(file_paths)} فایل...", current_logs)
|
111 |
combined = AudioSegment.empty()
|
112 |
for i, file_path in enumerate(file_paths):
|
113 |
if os.path.exists(file_path):
|
114 |
+
current_logs = log_func(f"📎 فایل {i+1}: {file_path}", current_logs)
|
115 |
try:
|
116 |
audio = AudioSegment.from_file(file_path)
|
117 |
combined += audio
|
118 |
if i < len(file_paths) - 1: combined += AudioSegment.silent(duration=500)
|
119 |
except Exception as e_pydub:
|
120 |
+
current_logs = log_func(f"⚠️ خطا pydub {file_path}: {e_pydub}. رد شد.", current_logs)
|
121 |
continue
|
122 |
+
else: current_logs = log_func(f"⚠️ فایل نیست: {file_path}", current_logs)
|
123 |
+
if not combined: return None, log_func("❌ فایل معتبری برای ادغام نبود.", current_logs)
|
|
|
|
|
124 |
combined.export(output_path, format="wav")
|
125 |
+
return output_path, log_func(f"✅ ادغام شد: {output_path}", current_logs)
|
126 |
+
except Exception as e: return None, log_func(f"❌ خطا ادغام: {e}", current_logs)
|
|
|
|
|
|
|
127 |
|
128 |
def create_zip_file(file_paths, zip_name_base, log_func, current_logs):
|
129 |
zip_filename = os.path.join(OUTPUT_DIR, f"{zip_name_base}.zip")
|
|
|
131 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
|
132 |
for file_path in file_paths:
|
133 |
if os.path.exists(file_path): zipf.write(file_path, os.path.basename(file_path))
|
134 |
+
return zip_filename, log_func(f"📦 ZIP شد: {zip_filename}", current_logs)
|
135 |
+
except Exception as e: return None, log_func(f"❌ خطا ZIP: {e}", current_logs)
|
|
|
|
|
|
|
136 |
|
137 |
+
# --- Main generation function ---
|
138 |
def generate_audio_from_text_gradio(
|
139 |
+
api_key_hf_secret, input_method, text_to_speak_ui, uploaded_file_ui,
|
140 |
+
speech_prompt_ui, model_name_ui, speaker_voice_ui, temperature_ui,
|
141 |
+
max_chunk_size_ui, sleep_between_requests_ui, output_filename_base_ui,
|
142 |
+
merge_audio_files_ui, delete_partial_files_ui
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
):
|
144 |
logs = "⏳ شروع فرآیند..."
|
|
|
145 |
if not api_key_hf_secret:
|
146 |
+
return log_message("❌ کلید API جمینای در Secrets نیست.", logs), None, None, gr.update(visible=False)
|
|
|
147 |
|
|
|
148 |
os.environ["GEMINI_API_KEY"] = api_key_hf_secret
|
149 |
+
logs = log_message("🔑 کلید API از Secrets بارگذاری شد.", logs)
|
150 |
|
151 |
client = None
|
152 |
try:
|
153 |
+
logs = log_message("🛠️ ایجاد کلاینت `genai.Client()`...", logs)
|
154 |
+
client = genai.Client(api_key=api_key_hf_secret)
|
155 |
+
logs = log_message("✅ کلاینت ایجاد شد.", logs)
|
156 |
except Exception as e:
|
157 |
+
return log_message(f"❌ خطا ایجاد کلاینت: {type(e).__name__} - {e}", logs), None, None, gr.update(visible=False)
|
|
|
|
|
158 |
|
159 |
text_input_content = ""
|
160 |
if input_method == "آپلود فایل":
|
|
|
|
|
|
|
161 |
text_input_content, logs = load_text_from_file(uploaded_file_ui, log_message, logs)
|
162 |
+
if not text_input_content: return logs, None, None, gr.update(visible=False)
|
|
|
163 |
else:
|
164 |
text_input_content = text_to_speak_ui
|
165 |
|
166 |
+
if not text_input_content or not text_input_content.strip():
|
167 |
+
return log_message("❌ متن ورودی خالی است.", logs), None, None, gr.update(visible=False)
|
|
|
168 |
|
169 |
text_chunks = smart_text_split(text_input_content, max_chunk_size_ui)
|
170 |
logs = log_message(f"📊 متن به {len(text_chunks)} قطعه تقسیم شد.", logs)
|
171 |
+
for i, chunk_text in enumerate(text_chunks): # Renamed chunk to chunk_text
|
172 |
+
logs = log_message(f"📝 قطعه {i+1}: {len(chunk_text)} کاراکتر", logs)
|
173 |
|
174 |
generated_files = []
|
175 |
+
for i, chunk_text_for_api in enumerate(text_chunks): # Use the chunk_text directly
|
176 |
+
logs = log_message(f"\n🔊 تولید صدا قطعه {i+1}/{len(text_chunks)}...", logs)
|
|
|
|
|
177 |
|
178 |
+
# IMPORTANT CHANGE: final_text_for_api is now just the chunk
|
179 |
+
# The speech_prompt_ui is NOT added to the text for these models/API calls
|
180 |
+
final_text_for_api = chunk_text_for_api
|
181 |
+
|
182 |
api_contents = [
|
183 |
+
genai_types.Content(
|
184 |
role="user",
|
185 |
+
parts=[genai_types.Part.from_text(text=final_text_for_api)],
|
|
|
|
|
186 |
),
|
187 |
]
|
188 |
|
189 |
genai_speech_config = genai_types.SpeechConfig(
|
190 |
voice_config=genai_types.VoiceConfig(
|
191 |
+
prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=speaker_voice_ui)
|
|
|
|
|
192 |
)
|
193 |
)
|
194 |
|
|
|
200 |
|
201 |
try:
|
202 |
if not hasattr(client, 'models') or not hasattr(client.models, 'generate_content_stream'): # type: ignore
|
203 |
+
logs = log_message(f"❌ کلاینت (`{type(client)}`) متد `models.generate_content_stream` ندارد.", logs)
|
204 |
+
continue
|
205 |
+
|
206 |
+
stream_iterator = client.models.generate_content_stream( # type: ignore
|
207 |
+
model=model_name_ui, contents=api_contents, config=stream_generation_config,
|
208 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
chunk_filename_base = f"{output_filename_base_ui}_part_{i+1:03d}"
|
211 |
+
audio_data_buffer, mime_type_from_api = b"", "audio/wav"
|
|
|
212 |
|
213 |
for chunk_response in stream_iterator:
|
214 |
+
if (chunk_response.candidates and chunk_response.candidates[0].content and
|
215 |
+
chunk_response.candidates[0].content.parts and
|
216 |
+
chunk_response.candidates[0].content.parts[0].inline_data):
|
|
|
|
|
|
|
217 |
inline_data = chunk_response.candidates[0].content.parts[0].inline_data
|
218 |
audio_data_buffer += inline_data.data
|
219 |
mime_type_from_api = inline_data.mime_type
|
220 |
elif chunk_response.text:
|
221 |
+
log_text = f"💬 پیام API قطعه {i+1}: {chunk_response.text}"
|
222 |
if "error" in chunk_response.text.lower() or "failed" in chunk_response.text.lower():
|
223 |
+
logs = log_message(f"❌ {log_text}", logs)
|
224 |
else:
|
225 |
+
logs = log_message(f"ℹ️ {log_text}", logs)
|
226 |
+
|
227 |
|
228 |
if audio_data_buffer:
|
229 |
file_extension = mimetypes.guess_extension(mime_type_from_api)
|
230 |
final_audio_data = audio_data_buffer
|
231 |
if file_extension is None or file_extension.lower() not in ['.wav', '.mp3', '.ogg', '.aac']:
|
232 |
if "audio/L" in mime_type_from_api or "audio/raw" in mime_type_from_api:
|
233 |
+
logs = log_message(f"ℹ️ Mime: {mime_type_from_api}. تبدیل به WAV...", logs)
|
234 |
final_audio_data = convert_to_wav(audio_data_buffer, mime_type_from_api)
|
235 |
file_extension = ".wav"
|
236 |
else:
|
237 |
+
logs = log_message(f"ℹ️ Mime ناشناخته: {mime_type_from_api}. ذخیره .bin.", logs)
|
238 |
file_extension = ".bin"
|
239 |
if mime_type_from_api == "audio/wav" and (file_extension != ".wav" and file_extension != ".wave"): file_extension = ".wav"
|
240 |
elif mime_type_from_api == "audio/mpeg" and file_extension != ".mp3": file_extension = ".mp3"
|
|
|
246 |
generated_files.append(saved_file_path)
|
247 |
logs = log_message(f"✅ قطعه {i+1} تولید شد: {saved_file_path}", logs)
|
248 |
else:
|
249 |
+
if not f"❌ پیام API قطعه {i+1}" in logs: # Avoid duplicate error if API already sent one
|
250 |
+
logs = log_message(f"❌ قطعه {i+1} بدون داده صوتی.", logs)
|
251 |
|
252 |
except Exception as e:
|
253 |
+
error_msg = f"❌ خطا تولید قطعه {i+1}: {type(e).__name__} - {e}"
|
254 |
+
if "API_KEY_INVALID" in str(e): error_msg += "\n🔑 کلید API نامعتبر."
|
255 |
+
elif "permission" in str(e).lower() or "403" in str(e): error_msg += f"\n🚫 عدم دسترسی به {model_name_ui}."
|
256 |
+
elif "429" in str(e) or "quota" in str(e).lower(): error_msg += f"\n🐢 محدودیت Quota."
|
257 |
+
elif "DeadlineExceeded" in str(e) or "504" in str(e): error_msg += f"\n⏱️ Timeout."
|
|
|
258 |
logs = log_message(error_msg, logs)
|
259 |
+
# logs = log_message(traceback.format_exc(), logs) # DEBUG
|
260 |
continue
|
261 |
|
262 |
if i < len(text_chunks) - 1 and sleep_between_requests_ui > 0:
|
|
|
264 |
time.sleep(sleep_between_requests_ui)
|
265 |
|
266 |
if not generated_files:
|
267 |
+
return log_message("❌ هیچ فایل صوتی تولید نشد!", logs), None, None, gr.update(visible=False)
|
|
|
268 |
|
269 |
+
logs = log_message(f"\n🎉 {len(generated_files)} فایل صوتی تولید شد!", logs)
|
270 |
+
final_audio_path, zip_file_path, zip_visible = None, None, False
|
271 |
|
272 |
if merge_audio_files_ui and len(generated_files) > 1:
|
273 |
if not PYDUB_AVAILABLE:
|
274 |
+
logs = log_message("⚠️ pydub نیست. ارائه ZIP.", logs)
|
275 |
+
zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all", log_message, logs)
|
276 |
if zip_file_path: zip_visible = True
|
277 |
if generated_files: final_audio_path = generated_files[0]
|
278 |
else:
|
279 |
+
final_audio_path, logs = merge_audio_files_func(generated_files, f"{output_filename_base_ui}_merged.wav", log_message, logs)
|
|
|
280 |
if final_audio_path:
|
281 |
+
logs = log_message(f"🎵 ادغام شده: {final_audio_path}", logs)
|
282 |
if delete_partial_files_ui:
|
283 |
for fp_del in generated_files:
|
284 |
if fp_del != final_audio_path:
|
285 |
+
try: os.remove(fp_del); logs = log_message(f"🗑️ حذف: {fp_del}", logs)
|
286 |
+
except Exception as e_del: logs = log_message(f"⚠️ خطا حذف {fp_del}: {e_del}", logs)
|
287 |
else:
|
288 |
+
logs = log_message("⚠️ ادغام ناموفق. ارائه ZIP.", logs)
|
289 |
+
zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all", log_message, logs)
|
290 |
if zip_file_path: zip_visible = True
|
291 |
if generated_files: final_audio_path = generated_files[0]
|
292 |
elif len(generated_files) == 1:
|
293 |
final_audio_path = generated_files[0]
|
294 |
logs = log_message(f"🎵 فایل نهایی: {final_audio_path}", logs)
|
295 |
+
elif len(generated_files) > 1: # Not merging
|
296 |
+
zip_file_path, logs = create_zip_file(generated_files, f"{output_filename_base_ui}_all", log_message, logs)
|
297 |
if zip_file_path: zip_visible = True
|
298 |
+
if generated_files: final_audio_path = generated_files[0]
|
299 |
|
300 |
if not final_audio_path and not zip_file_path:
|
301 |
+
return log_message("🛑 خروجی صوتی نیست.", logs), None, None, gr.update(visible=False)
|
302 |
return logs, final_audio_path, zip_file_path, gr.update(visible=zip_visible)
|
303 |
|
304 |
+
# --- Gradio UI (Largely unchanged, ensure default values are correct) ---
|
305 |
css = """
|
306 |
body { direction: rtl; }
|
307 |
.rtl_override { direction: rtl !important; text-align: right !important; }
|
|
|
311 |
.gradio-container { max-width: 800px !important; margin: auto !important; }
|
312 |
"""
|
313 |
API_KEY_FROM_ENV = os.environ.get("GEMINI_API_KEY")
|
314 |
+
|
315 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), css=css) as demo:
|
316 |
gr.Markdown(
|
317 |
"""
|
318 |
<div style='text-align: center; font-family: "Arial", sans-serif;'>
|
319 |
<h1 class='rtl_override'>تبدیل متن به گفتار با Gemini API</h1>
|
320 |
+
<p class='rtl_override'>توجه: قابلیت "پرامپت سبک گفتار" فعلا برای این مدلها به طور کامل پشتیبانی نمیشود.</p>
|
|
|
|
|
321 |
</div>
|
322 |
"""
|
323 |
+
) # Added a note about speech prompt
|
324 |
api_key_status_text = "⚠️ کلید API جمینای (GEMINI_API_KEY) در Secrets این اسپیس تنظیم نشده است."
|
325 |
if API_KEY_FROM_ENV: api_key_status_text = "✅ کلید API جمینای از Secrets بارگذاری شد."
|
326 |
gr.Markdown(f"<p style='text-align:center; color: {'green' if API_KEY_FROM_ENV else 'red'};' class='rtl_override'>{api_key_status_text}</p>")
|
|
|
331 |
input_method_radio = gr.Radio(["ورودی متنی", "آپلود فایل"], label="روش ورودی", value="ورودی متنی", elem_classes="rtl_override")
|
332 |
text_to_speak_area = gr.Textbox(label="متن مورد نظر", placeholder="متن خود را اینجا وارد کنید...", lines=5, visible=True, elem_classes="rtl_override")
|
333 |
uploaded_file_input = gr.File(label="فایل متنی (.txt)", file_types=[".txt"], visible=False, elem_classes="rtl_override") # type: ignore
|
334 |
+
speech_prompt_area = gr.Textbox(label="پرامپت سبک گفتار (اختیاری - فعلا تاثیر محدود)", placeholder="مثال: شاد و پر انرژی", lines=2, elem_classes="rtl_override") # Clarified limited effect
|
335 |
+
|
336 |
gr.Markdown("<h3 class='rtl_override'>تنظیمات مدل و خروجی</h3>", elem_classes="rtl_override")
|
337 |
model_name_dropdown = gr.Dropdown(MODELS_LIST, label="مدل", value=MODELS_LIST[0], elem_classes="rtl_override")
|
338 |
+
speaker_voice_dropdown = gr.Dropdown(SPEAKER_VOICES_LIST, label="گوینده", value="Charon", elem_classes="rtl_override") # Default Charon
|
339 |
temperature_slider = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label="دما", elem_classes="rtl_override")
|
340 |
output_filename_base_input = gr.Textbox(value="gemini_tts_output", label="نام پایه فایل خروجی", elem_classes="rtl_override")
|
341 |
+
|
342 |
with gr.Column(scale=1):
|
343 |
gr.Markdown("<h3 class='rtl_override'>تنظیمات پیشرفته</h3>", elem_classes="rtl_override")
|
344 |
max_chunk_size_slider = gr.Slider(minimum=2000, maximum=4000, step=100, value=3800, label="حداکثر کاراکتر در قطعه", elem_classes="rtl_override")
|
|
|
351 |
submit_button = gr.Button("🎤 تولید صدا", variant="primary", elem_id="submit_button_custom")
|
352 |
gr.Markdown("<h3 class='rtl_override'>خروجی</h3>", elem_classes="rtl_override")
|
353 |
status_output_area = gr.Textbox(label="پیامهای وضعیت", lines=10, interactive=False, elem_classes="rtl_override")
|
354 |
+
|
355 |
with gr.Row():
|
356 |
audio_player_output = gr.Audio(label="فایل صوتی نهایی/اولین قطعه", type="filepath", elem_classes="rtl_override") # type: ignore
|
357 |
zip_file_output = gr.File(label="دانلود همه قطعات (ZIP)", type="filepath", visible=False, elem_classes="rtl_override") # type: ignore
|