Update app.py
Browse files
app.py
CHANGED
@@ -1,38 +1,28 @@
|
|
1 |
import gradio as gr
|
2 |
-
# import base64 # Not used in your core logic
|
3 |
import mimetypes
|
4 |
import os
|
5 |
import re
|
6 |
import struct
|
7 |
import time
|
8 |
-
# import zipfile # Not used in your core logic
|
9 |
from google import genai
|
10 |
-
from google.genai import types #
|
11 |
|
12 |
-
|
13 |
-
# If you strictly want NO changes, this can be removed, but it doesn't affect core logic.
|
14 |
-
import logging
|
15 |
-
# threading and sys were for auto-restart, which is an added feature.
|
16 |
-
# If you strictly want NO changes from your original Python logic, these can be removed.
|
17 |
-
# import threading
|
18 |
-
# import sys
|
19 |
-
import traceback # Useful for debugging, kept for now.
|
20 |
|
21 |
try:
|
22 |
from pydub import AudioSegment
|
23 |
PYDUB_AVAILABLE = True
|
24 |
except ImportError:
|
25 |
PYDUB_AVAILABLE = False
|
|
|
26 |
|
27 |
-
# ---
|
28 |
-
#
|
29 |
-
|
30 |
-
if not logging.getLogger().hasHandlers(): # Avoid reconfiguring if already set
|
31 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
32 |
-
# --- END: Logging Configuration ---
|
33 |
|
34 |
-
|
35 |
-
# --- START: YOUR ORIGINAL TTS Core Logic (UNCHANGED) ---
|
36 |
SPEAKER_VOICES = [
|
37 |
"Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
|
38 |
"Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
|
@@ -40,15 +30,14 @@ SPEAKER_VOICES = [
|
|
40 |
"Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
|
41 |
"Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
|
42 |
]
|
43 |
-
FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts" # YOUR
|
44 |
DEFAULT_MAX_CHUNK_SIZE = 3800
|
45 |
DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
|
46 |
DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
|
47 |
|
48 |
def _log(message, log_list): # YOUR _log function
|
49 |
log_list.append(message)
|
50 |
-
|
51 |
-
# logging.info(f"[AlphaTTS_User_Log] {message}") # You can uncomment this if you want
|
52 |
|
53 |
def save_binary_file(file_name, data, log_list):
|
54 |
try:
|
@@ -74,27 +63,35 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int]:
|
|
74 |
param = param.strip()
|
75 |
if param.lower().startswith("rate="):
|
76 |
try: rate = int(param.split("=", 1)[1])
|
77 |
-
except ValueError: pass
|
78 |
elif param.startswith("audio/L"):
|
79 |
try: bits = int(param.split("L", 1)[1])
|
80 |
-
except ValueError: pass
|
81 |
return {"bits_per_sample": bits, "rate": rate}
|
82 |
|
83 |
def smart_text_split(text, max_size=3800, log_list=None):
|
84 |
if len(text) <= max_size: return [text]
|
85 |
chunks, current_chunk = [], ""
|
86 |
-
#
|
87 |
-
sentences = re.split(r'(?<=[.!?؟])\s+', text) # Original regex
|
88 |
for sentence in sentences:
|
89 |
if len(current_chunk) + len(sentence) + 1 > max_size:
|
90 |
if current_chunk: chunks.append(current_chunk.strip())
|
91 |
current_chunk = sentence
|
92 |
while len(current_chunk) > max_size:
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
chunks.append(part.strip())
|
97 |
-
else: current_chunk += (" " if current_chunk and sentence else "") + sentence
|
98 |
if current_chunk: chunks.append(current_chunk.strip())
|
99 |
final_chunks = [c for c in chunks if c]
|
100 |
if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
|
@@ -102,55 +99,53 @@ def smart_text_split(text, max_size=3800, log_list=None):
|
|
102 |
|
103 |
def merge_audio_files_func(file_paths, output_path, log_list):
|
104 |
if not PYDUB_AVAILABLE:
|
105 |
-
_log("❌ pydub در دسترس نیست.", log_list)
|
106 |
return False
|
107 |
try:
|
108 |
_log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
|
109 |
combined = AudioSegment.empty()
|
110 |
for i, fp in enumerate(file_paths):
|
111 |
if os.path.exists(fp):
|
112 |
-
try:
|
113 |
segment = AudioSegment.from_file(fp)
|
114 |
combined += segment
|
115 |
if i < len(file_paths) - 1:
|
116 |
combined += AudioSegment.silent(duration=150)
|
117 |
-
except Exception as
|
118 |
-
_log(f"⚠️ خطای Pydub در پردازش فایل '{fp}'
|
119 |
continue
|
120 |
-
else:
|
121 |
-
|
122 |
-
if len(combined) == 0:
|
123 |
-
_log("❌ هیچ قطعه صوتی برای ادغام
|
124 |
return False
|
125 |
-
|
126 |
combined.export(output_path, format="wav")
|
127 |
_log(f"✅ فایل ادغام شده: {output_path}", log_list)
|
128 |
return True
|
129 |
except Exception as e:
|
130 |
-
_log(f"❌ خطا در ادغام: {e}", log_list)
|
131 |
return False
|
132 |
|
133 |
-
def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list):
|
134 |
output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
|
135 |
max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
|
136 |
_log(f"🚀 شروع فرآیند با مدل: {FIXED_MODEL_NAME}...", log_list)
|
137 |
|
138 |
-
api_key = os.environ.get("GEMINI_API_KEY") # YOUR
|
139 |
if not api_key:
|
140 |
-
_log("❌ کلید API تنظیم نشده.", log_list)
|
141 |
-
return None # Return only
|
142 |
|
143 |
try:
|
144 |
-
client = genai.Client(api_key=api_key) # YOUR
|
145 |
_log(f"کلاینت Gemini با کلید API برای مدل {FIXED_MODEL_NAME} مقداردهی اولیه شد.", log_list)
|
146 |
except Exception as e:
|
147 |
-
_log(f"❌ خطا در
|
148 |
return None
|
149 |
|
150 |
if not text_input or not text_input.strip():
|
151 |
_log("❌ متن ورودی خالی.", log_list)
|
152 |
return None
|
153 |
-
|
154 |
text_chunks = smart_text_split(text_input, max_chunk, log_list)
|
155 |
if not text_chunks:
|
156 |
_log("❌ متن قابل پردازش نیست.", log_list)
|
@@ -158,72 +153,56 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
|
|
158 |
|
159 |
generated_files = []
|
160 |
for i, chunk in enumerate(text_chunks):
|
161 |
-
_log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list)
|
162 |
final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
|
163 |
|
164 |
-
#
|
165 |
-
contents = [
|
166 |
-
|
|
|
167 |
temperature=temperature_val,
|
168 |
response_modalities=["audio"],
|
169 |
-
speech_config=
|
170 |
-
voice_config=
|
171 |
-
prebuilt_voice_config=
|
172 |
)
|
173 |
)
|
174 |
)
|
175 |
_log(f"کانفیگ API برای قطعه {i+1}: دما={temperature_val}, صدا={selected_voice}, مدالیته=['audio']", log_list)
|
176 |
-
|
177 |
fname_base = f"{output_base_name}_part{i+1:03d}"
|
178 |
try:
|
|
|
179 |
response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
|
180 |
|
181 |
-
audio_bytes = None
|
182 |
-
mime_type = None
|
183 |
-
|
184 |
if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
|
185 |
inline_data = response.candidates[0].content.parts[0].inline_data
|
186 |
-
|
187 |
mime_type = inline_data.mime_type
|
188 |
_log(f"داده صوتی در candidate.part[0].inline_data برای قطعه {i+1} یافت شد.", log_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
else:
|
190 |
_log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی در مسیر مورد انتظار.", log_list)
|
191 |
-
|
192 |
-
continue
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
mime_type = "audio/wav"
|
197 |
-
|
198 |
-
ext = mimetypes.guess_extension(mime_type) or ".wav"
|
199 |
-
if "audio/L" in mime_type and ext == ".wav":
|
200 |
-
_log(f"تبدیل صدای خام PCM (MIME: {mime_type}) به WAV برای قطعه {i+1}.", log_list)
|
201 |
-
audio_bytes = convert_to_wav(audio_bytes, mime_type)
|
202 |
-
if not ext.startswith("."): ext = "." + ext
|
203 |
-
|
204 |
-
fpath = save_binary_file(f"{fname_base}{ext}", audio_bytes, log_list)
|
205 |
-
if fpath:
|
206 |
-
generated_files.append(fpath)
|
207 |
-
|
208 |
-
except types.StopCandidateException as e_stop: # YOUR ORIGINAL EXCEPTION HANDLING
|
209 |
-
_log(f"❌ تولید برای قطعه {i+1} توسط API متوقف شد: {e_stop}", log_list)
|
210 |
-
_log(f"دلیل توقف: {e_stop.finish_reason if hasattr(e_stop, 'finish_reason') else 'N/A'}", log_list)
|
211 |
continue
|
212 |
-
except Exception as e: # YOUR ORIGINAL EXCEPTION HANDLING
|
213 |
-
_log(f"❌ خطا در تولید قطعه {i+1}: {type(e).__name__} - {e}", log_list)
|
214 |
-
# logging.error(f"Full traceback for error in chunk {i+1}: {traceback.format_exc()}") # More detailed log
|
215 |
-
continue
|
216 |
-
|
217 |
if i < len(text_chunks) - 1 and len(text_chunks) > 1:
|
218 |
_log(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از قطعه بعدی...", log_list)
|
219 |
time.sleep(sleep_time)
|
220 |
|
221 |
if not generated_files:
|
222 |
_log("❌ هیچ فایلی تولید نشد.", log_list)
|
223 |
-
return None
|
224 |
-
|
225 |
_log(f"🎉 {len(generated_files)} فایل(های) صوتی تولی�� شد.", log_list)
|
226 |
-
|
227 |
final_audio_file = None
|
228 |
final_output_path_base = f"{output_base_name}_final"
|
229 |
|
@@ -232,56 +211,54 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
|
|
232 |
merged_fn = f"{final_output_path_base}.wav"
|
233 |
if os.path.exists(merged_fn):
|
234 |
try: os.remove(merged_fn)
|
235 |
-
except OSError: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}'
|
|
|
236 |
|
237 |
if merge_audio_files_func(generated_files, merged_fn, log_list):
|
238 |
final_audio_file = merged_fn
|
239 |
-
for fp_path in generated_files:
|
240 |
if os.path.abspath(fp_path) != os.path.abspath(merged_fn):
|
241 |
try: os.remove(fp_path)
|
242 |
-
except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}'
|
243 |
-
|
|
|
244 |
_log("⚠️ ادغام فایلهای صوتی ناموفق بود. اولین قطعه ارائه میشود.", log_list)
|
245 |
if generated_files:
|
246 |
try:
|
247 |
-
# Renaming logic from your original code
|
248 |
first_chunk_path = generated_files[0]
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
257 |
final_audio_file = generated_files[0]
|
258 |
-
|
259 |
-
if final_audio_file: # Additional cleanup from your original code
|
260 |
-
for fp_cleanup in generated_files:
|
261 |
-
if os.path.abspath(fp_cleanup) != os.path.abspath(final_audio_file):
|
262 |
-
try: os.remove(fp_cleanup)
|
263 |
-
except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت دیگر '{fp_cleanup}'.", log_list)
|
264 |
else:
|
265 |
-
_log("⚠️ pydub
|
266 |
if generated_files:
|
267 |
try:
|
268 |
-
# Renaming logic from your original code
|
269 |
first_chunk_path = generated_files[0]
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
|
|
276 |
for i_gf in range(1, len(generated_files)):
|
277 |
try: os.remove(generated_files[i_gf])
|
278 |
-
except
|
279 |
-
except Exception as e_rename_single
|
280 |
-
_log(f"خطا در تغییر نام فایل اولین قطعه (بدون pydub): {
|
281 |
final_audio_file = generated_files[0]
|
282 |
elif len(generated_files) == 1:
|
283 |
try:
|
284 |
-
# Renaming logic from your original code
|
285 |
single_file_path = generated_files[0]
|
286 |
target_ext = os.path.splitext(single_file_path)[1]
|
287 |
final_single_fn = f"{final_output_path_base}{target_ext}"
|
@@ -290,44 +267,41 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
|
|
290 |
if os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
|
291 |
os.rename(single_file_path, final_single_fn)
|
292 |
final_audio_file = final_single_fn
|
293 |
-
except Exception as e_rename_single_final
|
294 |
-
_log(f"خطا در تغییر نام فایل تکی نهایی: {
|
295 |
final_audio_file = generated_files[0]
|
296 |
|
297 |
if final_audio_file and not os.path.exists(final_audio_file):
|
298 |
_log(f"⚠️ فایل نهایی '{final_audio_file}' وجود ندارد!", log_list)
|
299 |
return None
|
300 |
|
301 |
-
return final_audio_file #
|
302 |
|
303 |
-
#
|
304 |
-
def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature
|
305 |
logs = []
|
306 |
actual_text = ""
|
307 |
if use_file_input:
|
308 |
-
if uploaded_file:
|
309 |
try:
|
310 |
-
# uploaded_file.name is the path to the temporary file
|
311 |
with open(uploaded_file.name, 'r', encoding='utf-8') as f: actual_text = f.read().strip()
|
312 |
-
if not actual_text: return None # Return None
|
313 |
except Exception as e: _log(f"❌ خطا خواندن فایل: {e}", logs); return None
|
314 |
-
else: return None
|
315 |
else:
|
316 |
actual_text = text_to_speak
|
317 |
-
if not actual_text or not actual_text.strip(): return None
|
318 |
|
319 |
final_path = core_generate_audio(actual_text, speech_prompt, speaker_voice, temperature, logs)
|
320 |
-
|
321 |
-
#
|
322 |
-
#
|
323 |
-
|
324 |
-
|
325 |
-
return final_path # Returns only the audio path
|
326 |
-
# --- END: YOUR ORIGINAL TTS Core Logic ---
|
327 |
|
328 |
|
329 |
# --- START: Gradio UI with AlphaTranslator_Styled Appearance ---
|
330 |
-
# (CSS
|
331 |
FLY_PRIMARY_COLOR_HEX = "#4F46E5"
|
332 |
FLY_SECONDARY_COLOR_HEX = "#10B981"
|
333 |
FLY_ACCENT_COLOR_HEX = "#D97706"
|
@@ -339,14 +313,18 @@ FLY_BORDER_COLOR_HEX = "#D1D5DB"
|
|
339 |
FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
|
340 |
FLY_PANEL_BG_SIMPLE = "#E0F2FE"
|
341 |
|
342 |
-
|
343 |
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
|
344 |
).set(
|
345 |
body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
|
346 |
)
|
347 |
|
348 |
-
|
|
|
|
|
|
|
349 |
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
|
|
|
350 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
|
351 |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
|
352 |
:root {{
|
@@ -355,226 +333,289 @@ custom_css_combined = f"""
|
|
355 |
--fly-text-secondary: {FLY_SUBTLE_TEXT_HEX}; --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
|
356 |
--fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
|
357 |
--fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
|
358 |
-
--font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
|
359 |
--font-english: 'Poppins', 'Inter', system-ui, sans-serif;
|
360 |
--radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
|
361 |
-
--shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
|
362 |
-
--shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
|
363 |
--shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
|
364 |
--fly-primary-rgb: 79,70,229; --fly-accent-rgb: 217,119,6;
|
365 |
-
|
366 |
-
/* Variables from your original AlphaTTS CSS
|
367 |
-
--app-
|
368 |
-
--app-
|
369 |
-
--
|
370 |
-
--
|
371 |
-
--app-
|
372 |
-
--app-
|
373 |
-
|
374 |
-
|
375 |
-
--
|
376 |
-
--
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
}}
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
.app-header-alpha {{
|
387 |
-
padding: 3rem 1.5rem 4rem 1.5rem; text-align: center;
|
388 |
-
background-image: linear-gradient(135deg, var(--app-header-grad-start) 0%, var(--app-header-grad-end) 100%);
|
389 |
-
color: white; border-bottom-left-radius: var(--radius-card); border-bottom-right-radius: var(--radius-card);
|
390 |
-
box-shadow: 0 6px 20px -5px rgba(0,0,0,0.2);
|
391 |
}}
|
392 |
-
.app-header-
|
393 |
-
|
394 |
-
|
395 |
-
/* Main content panel from your original AlphaTTS CSS (main-content-panel-alpha) */
|
396 |
-
.main-content-panel-alpha {{
|
397 |
-
padding: 1.8rem 1.5rem; max-width: 680px; margin: -2.5rem auto 2rem auto;
|
398 |
-
width: 90%; background-color: var(--app-panel-bg) !important; /* Use var */
|
399 |
-
border-radius: var(--radius-card) !important; /* Use var */
|
400 |
-
box-shadow: var(--shadow-card) !important; /* Use var */
|
401 |
-
position:relative; z-index:10;
|
402 |
}}
|
403 |
-
|
404 |
-
|
405 |
-
.
|
406 |
-
.app-header-alpha p {{font-size:1em !important;}}
|
407 |
}}
|
408 |
|
409 |
-
/*
|
410 |
-
.content-
|
411 |
-
|
412 |
-
background: var(--app-button-bg) !important; color: white !important; border:none !important;
|
413 |
-
border-radius: var(--radius-input) !important; padding: 0.8rem 1.5rem !important;
|
414 |
-
font-weight: 700 !important; font-size:1.05em !important;
|
415 |
-
transition: all 0.3s ease; box-shadow: var(--shadow-button) !important; /* Use var */
|
416 |
-
width:100% !important; margin-top:1.5rem !important;
|
417 |
}}
|
418 |
-
.content-panel-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
}}
|
423 |
|
424 |
-
/*
|
425 |
-
|
426 |
-
.content-panel-
|
427 |
-
.content-panel-
|
428 |
-
.content-panel-
|
429 |
-
.content-panel-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
|
|
|
|
434 |
}}
|
435 |
-
.content-panel-
|
436 |
-
.content-panel-
|
437 |
-
.content-panel-
|
438 |
-
.content-panel-
|
439 |
-
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
}}
|
443 |
-
|
444 |
-
.content-panel-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
{{
|
446 |
-
|
447 |
-
font-size: 0.95em !important; margin-bottom: 0.5rem !important;
|
448 |
}}
|
449 |
-
|
450 |
-
|
451 |
-
label
|
452 |
-
label
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
#output_audio_player_alpha_v3 audio, .output-audio-player-tts audio {{ width: 100%; border-radius: var(--radius-input); margin-top:0.8rem; }}
|
457 |
-
.temp_description_class_alpha_v3 {{ font-size: 0.85em; color: #777; margin-top: -0.4rem; margin-bottom: 1rem; }}
|
458 |
-
|
459 |
-
/* Footer from your original AlphaTTS CSS */
|
460 |
-
.app-footer-final {{
|
461 |
-
text-align:center;font-size:0.9em;color: var(--app-text-secondary);opacity:0.8;
|
462 |
-
margin-top:3rem;padding:1.5rem 0; border-top:1px solid var(--app-border-color);
|
463 |
}}
|
464 |
-
|
465 |
-
|
466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
"""
|
475 |
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
-
|
483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False, elem_id="use_file_cb_alpha_v3")
|
489 |
-
|
490 |
-
uploaded_file_input = gr.File(
|
491 |
-
label=" ",
|
492 |
-
file_types=['.txt'],
|
493 |
-
visible=False,
|
494 |
-
elem_id="file_uploader_alpha_main_v3" # YOUR ELEM_ID
|
495 |
-
)
|
496 |
-
|
497 |
-
text_to_speak_tb = gr.Textbox(
|
498 |
-
label="متن فارسی برای تبدیل",
|
499 |
-
placeholder="مثال: سلام، فردا هوا چطور است؟",
|
500 |
-
lines=5,
|
501 |
-
value="",
|
502 |
-
visible=True,
|
503 |
-
elem_id="text_input_main_alpha_v3" # YOUR ELEM_ID
|
504 |
-
)
|
505 |
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
|
513 |
-
|
514 |
-
|
515 |
-
placeholder="مثال: با لحنی شاد و پرانرژی",
|
516 |
-
value="با لحنی دوستانه و رسا صحبت کن.",
|
517 |
-
lines=2, elem_id="speech_prompt_alpha_v3" # YOUR ELEM_ID
|
518 |
-
)
|
519 |
-
|
520 |
-
speaker_voice_dd = gr.Dropdown(
|
521 |
-
SPEAKER_VOICES, label="انتخاب گوینده و لهجه", value="Charon", elem_id="speaker_voice_alpha_v3" # YOUR ELEM_ID
|
522 |
-
)
|
523 |
-
|
524 |
-
temperature_slider = gr.Slider(
|
525 |
-
minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="میزان خلاقیت صدا",
|
526 |
-
elem_id="temperature_slider_alpha_v3" # YOUR ELEM_ID
|
527 |
-
)
|
528 |
-
gr.Markdown("<p class='temp_description_class_alpha_v3'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایینتر = یکنواختی بیشتر.</p>")
|
529 |
|
530 |
-
generate_button = gr.Button(
|
531 |
-
"🚀 تولید و پخش صدا",
|
532 |
-
elem_classes=["generate-button-tts"], # Applied class for styling from combined CSS
|
533 |
-
elem_id="generate_button_alpha_v3" # YOUR ELEM_ID
|
534 |
-
)
|
535 |
-
|
536 |
-
output_audio = gr.Audio(
|
537 |
-
label=" ", type="filepath",
|
538 |
-
elem_id="output_audio_player_alpha_v3", # YOUR ELEM_ID
|
539 |
-
elem_classes=["output-audio-player-tts"] # Added class for CSS consistency
|
540 |
-
)
|
541 |
-
|
542 |
-
generate_button.click(
|
543 |
-
fn=gradio_tts_interface, # YOUR INTERFACE FUNCTION
|
544 |
-
# Your original inputs, progress removed for now as it was not fully integrated
|
545 |
-
inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
|
546 |
-
outputs=[output_audio] # Your interface function returns only audio path
|
547 |
-
)
|
548 |
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
examples=[
|
553 |
-
[False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
|
554 |
-
[False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی آلفا است.", "با صدایی طبیعی و روان.", "Charon", 0.9],
|
555 |
-
],
|
556 |
-
inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
|
557 |
-
outputs=[output_audio], # Outputting only to audio as per your original
|
558 |
fn=gradio_tts_interface,
|
559 |
-
|
|
|
560 |
)
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
|
566 |
if __name__ == "__main__":
|
567 |
-
#
|
568 |
-
#
|
569 |
-
|
570 |
-
#
|
571 |
-
|
572 |
-
|
573 |
-
# threading.Thread(target=auto_restart_service, daemon=True).start()
|
574 |
|
575 |
demo.launch(
|
576 |
-
server_name="0.0.0.0",
|
577 |
-
server_port=int(os.getenv("PORT", 7860)),
|
578 |
-
debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true",
|
579 |
show_error=True
|
580 |
)
|
|
|
1 |
import gradio as gr
|
2 |
+
# import base64 # Not used in your original core logic
|
3 |
import mimetypes
|
4 |
import os
|
5 |
import re
|
6 |
import struct
|
7 |
import time
|
8 |
+
# import zipfile # Not used in your original core logic
|
9 |
from google import genai
|
10 |
+
from google.genai import types as genai_types # Aliased to avoid conflict with built-in 'types'
|
11 |
|
12 |
+
import logging # Standard Python logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
try:
|
15 |
from pydub import AudioSegment
|
16 |
PYDUB_AVAILABLE = True
|
17 |
except ImportError:
|
18 |
PYDUB_AVAILABLE = False
|
19 |
+
# logging.warning("Pydub is not available. Audio merging will be disabled.") # Initialized later
|
20 |
|
21 |
+
# --- Basic Logging Setup ---
|
22 |
+
# Using a simpler logging setup if the AlphaTranslator_Styled one is too complex for "no other changes"
|
23 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
|
|
|
24 |
|
25 |
+
# --- START: Core TTS Logic from YOUR AlphaTTS_Original (UNCHANGED) ---
|
|
|
26 |
SPEAKER_VOICES = [
|
27 |
"Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
|
28 |
"Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
|
|
|
30 |
"Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
|
31 |
"Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
|
32 |
]
|
33 |
+
FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts" # FROM YOUR ORIGINAL CODE
|
34 |
DEFAULT_MAX_CHUNK_SIZE = 3800
|
35 |
DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
|
36 |
DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
|
37 |
|
38 |
def _log(message, log_list): # YOUR _log function
|
39 |
log_list.append(message)
|
40 |
+
logging.info(f"[AlphaTTS_LOG] {message}") # Standard logging also
|
|
|
41 |
|
42 |
def save_binary_file(file_name, data, log_list):
|
43 |
try:
|
|
|
63 |
param = param.strip()
|
64 |
if param.lower().startswith("rate="):
|
65 |
try: rate = int(param.split("=", 1)[1])
|
66 |
+
except ValueError: pass
|
67 |
elif param.startswith("audio/L"):
|
68 |
try: bits = int(param.split("L", 1)[1])
|
69 |
+
except ValueError: pass
|
70 |
return {"bits_per_sample": bits, "rate": rate}
|
71 |
|
72 |
def smart_text_split(text, max_size=3800, log_list=None):
|
73 |
if len(text) <= max_size: return [text]
|
74 |
chunks, current_chunk = [], ""
|
75 |
+
sentences = re.split(r'(?<=[.!?؟۔])\s+', text) # Added Persian full stop for robustness
|
|
|
76 |
for sentence in sentences:
|
77 |
if len(current_chunk) + len(sentence) + 1 > max_size:
|
78 |
if current_chunk: chunks.append(current_chunk.strip())
|
79 |
current_chunk = sentence
|
80 |
while len(current_chunk) > max_size:
|
81 |
+
split_idx = -1
|
82 |
+
for punc in ['،', ',', ';', ':', ' ']:
|
83 |
+
try:
|
84 |
+
idx = current_chunk.rindex(punc, max_size // 2, max_size)
|
85 |
+
if idx > split_idx:
|
86 |
+
split_idx = idx
|
87 |
+
except ValueError:
|
88 |
+
pass
|
89 |
+
if split_idx != -1 :
|
90 |
+
part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
|
91 |
+
else:
|
92 |
+
part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
|
93 |
chunks.append(part.strip())
|
94 |
+
else: current_chunk += (" " if current_chunk and sentence else "") + sentence
|
95 |
if current_chunk: chunks.append(current_chunk.strip())
|
96 |
final_chunks = [c for c in chunks if c]
|
97 |
if log_list: _log(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list)
|
|
|
99 |
|
100 |
def merge_audio_files_func(file_paths, output_path, log_list):
|
101 |
if not PYDUB_AVAILABLE:
|
102 |
+
_log("❌ pydub در دسترس نیست. ادغام انجام نشد.", log_list)
|
103 |
return False
|
104 |
try:
|
105 |
_log(f"🔗 ادغام {len(file_paths)} فایل صوتی...", log_list)
|
106 |
combined = AudioSegment.empty()
|
107 |
for i, fp in enumerate(file_paths):
|
108 |
if os.path.exists(fp):
|
109 |
+
try:
|
110 |
segment = AudioSegment.from_file(fp)
|
111 |
combined += segment
|
112 |
if i < len(file_paths) - 1:
|
113 |
combined += AudioSegment.silent(duration=150)
|
114 |
+
except Exception as e_pydub:
|
115 |
+
_log(f"⚠️ خطای Pydub در پردازش فایل '{fp}': {e_pydub}. از این فایل صرف نظر می شود.", log_list)
|
116 |
continue
|
117 |
+
else:
|
118 |
+
_log(f"⚠️ فایل پیدا نشد: {fp}", log_list)
|
119 |
+
if len(combined) == 0:
|
120 |
+
_log("❌ هیچ قطعه صوتی برای ادغام وجود ندارد.", log_list)
|
121 |
return False
|
|
|
122 |
combined.export(output_path, format="wav")
|
123 |
_log(f"✅ فایل ادغام شده: {output_path}", log_list)
|
124 |
return True
|
125 |
except Exception as e:
|
126 |
+
_log(f"❌ خطا در ادغام: {e}", log_list) # traceback.format_exc() removed to keep it closer to original
|
127 |
return False
|
128 |
|
129 |
+
def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list): # YOUR core_generate_audio
|
130 |
output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
|
131 |
max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
|
132 |
_log(f"🚀 شروع فرآیند با مدل: {FIXED_MODEL_NAME}...", log_list)
|
133 |
|
134 |
+
api_key = os.environ.get("GEMINI_API_KEY") # YOUR WAY OF GETTING API KEY
|
135 |
if not api_key:
|
136 |
+
_log("❌ کلید API با نام GEMINI_API_KEY در متغیرهای محیطی تنظیم نشده.", log_list)
|
137 |
+
return None # Return None only, as per your original AlphaTTS
|
138 |
|
139 |
try:
|
140 |
+
client = genai.Client(api_key=api_key) # YOUR WAY OF CLIENT INSTANTIATION
|
141 |
_log(f"کلاینت Gemini با کلید API برای مدل {FIXED_MODEL_NAME} مقداردهی اولیه شد.", log_list)
|
142 |
except Exception as e:
|
143 |
+
_log(f"❌ خطا در مقداردهی اولیه کلاینت Gemini: {e}", log_list)
|
144 |
return None
|
145 |
|
146 |
if not text_input or not text_input.strip():
|
147 |
_log("❌ متن ورودی خالی.", log_list)
|
148 |
return None
|
|
|
149 |
text_chunks = smart_text_split(text_input, max_chunk, log_list)
|
150 |
if not text_chunks:
|
151 |
_log("❌ متن قابل پردازش نیست.", log_list)
|
|
|
153 |
|
154 |
generated_files = []
|
155 |
for i, chunk in enumerate(text_chunks):
|
156 |
+
_log(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)} (صدا: {selected_voice}, دما: {temperature_val})...", log_list)
|
157 |
final_text = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
|
158 |
|
159 |
+
# Using genai_types (aliased) for Content, Part etc. as in your original imports
|
160 |
+
contents = [genai_types.Content(role="user", parts=[genai_types.Part.from_text(text=final_text)])]
|
161 |
+
|
162 |
+
config = genai_types.GenerateContentConfig( # YOUR CONFIG OBJECT
|
163 |
temperature=temperature_val,
|
164 |
response_modalities=["audio"],
|
165 |
+
speech_config=genai_types.SpeechConfig(
|
166 |
+
voice_config=genai_types.VoiceConfig(
|
167 |
+
prebuilt_voice_config=genai_types.PrebuiltVoiceConfig(voice_name=selected_voice)
|
168 |
)
|
169 |
)
|
170 |
)
|
171 |
_log(f"کانفیگ API برای قطعه {i+1}: دما={temperature_val}, صدا={selected_voice}, مدالیته=['audio']", log_list)
|
|
|
172 |
fname_base = f"{output_base_name}_part{i+1:03d}"
|
173 |
try:
|
174 |
+
# YOUR API CALL
|
175 |
response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
|
176 |
|
|
|
|
|
|
|
177 |
if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
|
178 |
inline_data = response.candidates[0].content.parts[0].inline_data
|
179 |
+
data_buffer = inline_data.data
|
180 |
mime_type = inline_data.mime_type
|
181 |
_log(f"داده صوتی در candidate.part[0].inline_data برای قطعه {i+1} یافت شد.", log_list)
|
182 |
+
ext = mimetypes.guess_extension(mime_type) or ".wav"
|
183 |
+
if "audio/L" in mime_type and ext == ".wav":
|
184 |
+
_log(f"تبدیل صدای خام PCM (MIME: {mime_type}) به WAV برای قطعه {i+1}.", log_list)
|
185 |
+
data_buffer = convert_to_wav(data_buffer, mime_type)
|
186 |
+
if not ext.startswith("."): ext = "." + ext
|
187 |
+
fpath = save_binary_file(f"{fname_base}{ext}", data_buffer, log_list)
|
188 |
+
if fpath: generated_files.append(fpath)
|
189 |
else:
|
190 |
_log(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی در مسیر مورد انتظار.", log_list)
|
191 |
+
_log(f"ساختار کامل پاسخ (اولین 500 کاراکتر): {str(response)[:500]}", log_list)
|
192 |
+
# continue # As per your original code, it continues
|
193 |
+
except Exception as e: # Catching generic Exception as in your original
|
194 |
+
_log(f"❌ خطا در تولید قطعه {i+1}: {e}", log_list)
|
195 |
+
# traceback.format_exc() was not in your original core_generate_audio, so removed here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
continue
|
|
|
|
|
|
|
|
|
|
|
197 |
if i < len(text_chunks) - 1 and len(text_chunks) > 1:
|
198 |
_log(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از قطعه بعدی...", log_list)
|
199 |
time.sleep(sleep_time)
|
200 |
|
201 |
if not generated_files:
|
202 |
_log("❌ هیچ فایلی تولید نشد.", log_list)
|
203 |
+
return None # Return None only as per your original AlphaTTS
|
204 |
+
|
205 |
_log(f"🎉 {len(generated_files)} فایل(های) صوتی تولی�� شد.", log_list)
|
|
|
206 |
final_audio_file = None
|
207 |
final_output_path_base = f"{output_base_name}_final"
|
208 |
|
|
|
211 |
merged_fn = f"{final_output_path_base}.wav"
|
212 |
if os.path.exists(merged_fn):
|
213 |
try: os.remove(merged_fn)
|
214 |
+
except OSError: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}' (خطای سیستم عامل)", log_list)
|
215 |
+
except Exception as e_rm: _log(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e_rm}", log_list)
|
216 |
|
217 |
if merge_audio_files_func(generated_files, merged_fn, log_list):
|
218 |
final_audio_file = merged_fn
|
219 |
+
for fp_path in generated_files:
|
220 |
if os.path.abspath(fp_path) != os.path.abspath(merged_fn):
|
221 |
try: os.remove(fp_path)
|
222 |
+
except OSError: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}' (خطای سیستم عامل)", log_list)
|
223 |
+
except Exception as e_del: _log(f"⚠️ عدم امکان حذف فایل موقت '{fp_path}': {e_del}", log_list)
|
224 |
+
else:
|
225 |
_log("⚠️ ادغام فایلهای صوتی ناموفق بود. اولین قطعه ارائه میشود.", log_list)
|
226 |
if generated_files:
|
227 |
try:
|
|
|
228 |
first_chunk_path = generated_files[0]
|
229 |
+
target_ext = os.path.splitext(first_chunk_path)[1]
|
230 |
+
fallback_fn = f"{final_output_path_base}{target_ext}" # Simplified name for fallback
|
231 |
+
if os.path.exists(fallback_fn) and os.path.abspath(first_chunk_path) != os.path.abspath(fallback_fn):
|
232 |
+
os.remove(fallback_fn)
|
233 |
+
if os.path.abspath(first_chunk_path) != os.path.abspath(fallback_fn):
|
234 |
+
os.rename(first_chunk_path, fallback_fn)
|
235 |
+
final_audio_file = fallback_fn
|
236 |
+
for i_gf in range(1, len(generated_files)):
|
237 |
+
try: os.remove(generated_files[i_gf])
|
238 |
+
except: pass # Keep silent as per your original
|
239 |
+
except Exception as e_rename_fb:
|
240 |
+
_log(f"خطا در تغییر نام فایل اولین قطعه: {e_rename_fb}", log_list) # Was `e_rename` in your original
|
241 |
final_audio_file = generated_files[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
else:
|
243 |
+
_log("⚠️ pydub نیست. اولین قطعه ارائه میشود.", log_list)
|
244 |
if generated_files:
|
245 |
try:
|
|
|
246 |
first_chunk_path = generated_files[0]
|
247 |
+
target_ext = os.path.splitext(first_chunk_path)[1]
|
248 |
+
single_fallback_fn = f"{final_output_path_base}{target_ext}" # Simplified name
|
249 |
+
if os.path.exists(single_fallback_fn) and os.path.abspath(first_chunk_path) != os.path.abspath(single_fallback_fn):
|
250 |
+
os.remove(single_fallback_fn)
|
251 |
+
if os.path.abspath(first_chunk_path) != os.path.abspath(single_fallback_fn):
|
252 |
+
os.rename(first_chunk_path, single_fallback_fn)
|
253 |
+
final_audio_file = single_fallback_fn
|
254 |
for i_gf in range(1, len(generated_files)):
|
255 |
try: os.remove(generated_files[i_gf])
|
256 |
+
except: pass # Keep silent
|
257 |
+
except Exception as e_rename_single_npd: # Was `e_rename_single` in your original
|
258 |
+
_log(f"خطا در تغییر نام فایل اولین قطعه (بدون pydub): {e_rename_single_npd}", log_list)
|
259 |
final_audio_file = generated_files[0]
|
260 |
elif len(generated_files) == 1:
|
261 |
try:
|
|
|
262 |
single_file_path = generated_files[0]
|
263 |
target_ext = os.path.splitext(single_file_path)[1]
|
264 |
final_single_fn = f"{final_output_path_base}{target_ext}"
|
|
|
267 |
if os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
|
268 |
os.rename(single_file_path, final_single_fn)
|
269 |
final_audio_file = final_single_fn
|
270 |
+
except Exception as e_rename_sgl_final: # Was `e_rename_single_final` in your original
|
271 |
+
_log(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_sgl_final}", log_list)
|
272 |
final_audio_file = generated_files[0]
|
273 |
|
274 |
if final_audio_file and not os.path.exists(final_audio_file):
|
275 |
_log(f"⚠️ فایل نهایی '{final_audio_file}' وجود ندارد!", log_list)
|
276 |
return None
|
277 |
|
278 |
+
return final_audio_file # Returns only path, as per your original AlphaTTS
|
279 |
|
280 |
+
# Your original gradio_tts_interface
|
281 |
+
def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature, progress=gr.Progress(track_tqdm=True)):
|
282 |
logs = []
|
283 |
actual_text = ""
|
284 |
if use_file_input:
|
285 |
+
if uploaded_file:
|
286 |
try:
|
287 |
+
# In Gradio, uploaded_file.name is the path to the temporary file
|
288 |
with open(uploaded_file.name, 'r', encoding='utf-8') as f: actual_text = f.read().strip()
|
289 |
+
if not actual_text: return None # Return None only, as per your original
|
290 |
except Exception as e: _log(f"❌ خطا خواندن فایل: {e}", logs); return None
|
291 |
+
else: return None
|
292 |
else:
|
293 |
actual_text = text_to_speak
|
294 |
+
if not actual_text or not actual_text.strip(): return None
|
295 |
|
296 |
final_path = core_generate_audio(actual_text, speech_prompt, speaker_voice, temperature, logs)
|
297 |
+
# Your original code commented out printing logs here, so I'll keep it commented.
|
298 |
+
# for log_entry in logs: print(log_entry) # For debugging in HF console
|
299 |
+
return final_path # Returns only path, as per your original AlphaTTS
|
300 |
+
# --- END: Core TTS Logic from YOUR AlphaTTS_Original (UNCHANGED) ---
|
|
|
|
|
|
|
301 |
|
302 |
|
303 |
# --- START: Gradio UI with AlphaTranslator_Styled Appearance ---
|
304 |
+
# (Using CSS variables from AlphaTranslator_Styled for colors and fonts)
|
305 |
FLY_PRIMARY_COLOR_HEX = "#4F46E5"
|
306 |
FLY_SECONDARY_COLOR_HEX = "#10B981"
|
307 |
FLY_ACCENT_COLOR_HEX = "#D97706"
|
|
|
313 |
FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
|
314 |
FLY_PANEL_BG_SIMPLE = "#E0F2FE"
|
315 |
|
316 |
+
app_theme_outer_styled = gr.themes.Base( # New theme object name to avoid conflict if you had `app_theme_outer`
|
317 |
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
|
318 |
).set(
|
319 |
body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
|
320 |
)
|
321 |
|
322 |
+
# CSS from AlphaTranslator_Styled, adapted slightly for your component names/IDs if needed
|
323 |
+
# Your original component IDs are like "use_file_cb_alpha_v3", "file_uploader_alpha_main_v3", etc.
|
324 |
+
# The CSS below uses general selectors but can be made more specific if those IDs are kept.
|
325 |
+
applied_css_for_alphatts = f"""
|
326 |
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
|
327 |
+
/* Poppins and Inter are from AlphaTranslator_Styled, Vazirmatn from your AlphaTTS_Original theme */
|
328 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
|
329 |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
|
330 |
:root {{
|
|
|
333 |
--fly-text-secondary: {FLY_SUBTLE_TEXT_HEX}; --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
|
334 |
--fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
|
335 |
--fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
|
336 |
+
--font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif; /* Vazirmatn prioritized */
|
337 |
--font-english: 'Poppins', 'Inter', system-ui, sans-serif;
|
338 |
--radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
|
|
|
|
|
339 |
--shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
|
340 |
--fly-primary-rgb: 79,70,229; --fly-accent-rgb: 217,119,6;
|
341 |
+
|
342 |
+
/* Variables from your original AlphaTTS CSS if they were different and needed */
|
343 |
+
/* For example, if your original had --app-button-bg for the blue button */
|
344 |
+
--app-button-bg-original: #2979FF; /* Blue from your original AlphaTTS button */
|
345 |
+
--shadow-button-original: 0 4px 10px -2px rgba(41,121,255,0.5);
|
346 |
+
--radius-input-original: 8px;
|
347 |
+
--app-border-color-original: #E0E0E0;
|
348 |
+
--app-input-bg-original: #F7F7F7;
|
349 |
+
}}
|
350 |
+
body {{
|
351 |
+
font-family:var(--font-global); direction:rtl; background-color:var(--fly-bg-light);
|
352 |
+
color:var(--fly-text-primary); line-height:1.7; font-size:16px;
|
353 |
+
}}
|
354 |
+
.gradio-container {{ /* Overall container styling from AlphaTranslator_Styled */
|
355 |
+
max-width:100% !important; width:100% !important; min-height:100vh;
|
356 |
+
margin:0 auto !important; padding:0 !important; border-radius:0 !important;
|
357 |
+
box-shadow:none !important; background:linear-gradient(170deg, #E0F2FE 0%, #F3E8FF 100%);
|
358 |
+
display:flex; flex-direction:column;
|
359 |
+
}}
|
360 |
+
/* Header styling from AlphaTranslator_Styled */
|
361 |
+
.app-header-alphatts {{ /* Changed class name slightly to avoid conflict if both apps run */
|
362 |
+
text-align:center; padding:2.5rem 1rem; margin:0;
|
363 |
+
background:linear-gradient(135deg,var(--fly-primary) 0%,var(--fly-secondary) 100%);
|
364 |
+
color:var(--fly-bg-white); border-bottom-left-radius:var(--radius-xl);
|
365 |
+
border-bottom-right-radius:var(--radius-xl); box-shadow:var(--shadow-lg);
|
366 |
+
position:relative; overflow:hidden;
|
367 |
}}
|
368 |
+
.app-header-alphatts::before {{ /* Decorative element from AlphaTranslator_Styled */
|
369 |
+
content:''; position:absolute; top:-50px; right:-50px; width:150px; height:150px;
|
370 |
+
background:rgba(255,255,255,0.1); border-radius:var(--radius-full);
|
371 |
+
opacity:0.5; transform:rotate(45deg);
|
|
|
|
|
|
|
|
|
|
|
372 |
}}
|
373 |
+
.app-header-alphatts h1 {{ /* h1 from AlphaTranslator_Styled */
|
374 |
+
font-size:2.25em !important; font-weight:800 !important; margin:0 0 0.5rem 0;
|
375 |
+
font-family:var(--font-english); letter-spacing:-0.5px; text-shadow:0 2px 4px rgba(0,0,0,0.1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
}}
|
377 |
+
.app-header-alphatts p {{ /* p from AlphaTranslator_Styled */
|
378 |
+
font-size:1em !important; margin-top:0.25rem; font-weight:400;
|
379 |
+
color:rgba(255,255,255,0.85) !important;
|
|
|
380 |
}}
|
381 |
|
382 |
+
/* Main content panel styling from AlphaTranslator_Styled */
|
383 |
+
.main-content-area-alphatts {{ /* Changed class name slightly */
|
384 |
+
flex-grow:1; padding:0.75rem; width:100%; margin:0 auto; box-sizing:border-box;
|
|
|
|
|
|
|
|
|
|
|
385 |
}}
|
386 |
+
.content-panel-alphatts {{ /* Changed class name slightly */
|
387 |
+
background-color:var(--fly-bg-white); padding:1rem; border-radius:var(--radius-xl);
|
388 |
+
box-shadow:var(--shadow-xl); margin-top:-2rem; position:relative; z-index:10;
|
389 |
+
margin-bottom:2rem; width:100%; box-sizing:border-box;
|
390 |
}}
|
391 |
|
392 |
+
/* Styling for YOUR UI elements, applying AlphaTranslator_Styled aesthetics */
|
393 |
+
/* Inputs (Textbox, Dropdown, File) */
|
394 |
+
.content-panel-alphatts .gr-input > label + div > textarea,
|
395 |
+
.content-panel-alphatts .gr-dropdown > label + div > div > input,
|
396 |
+
.content-panel-alphatts .gr-dropdown > label + div > div > select,
|
397 |
+
.content-panel-alphatts .gr-textbox > label + div > textarea,
|
398 |
+
.content-panel-alphatts .gr-file > label + div /* For file input styling */
|
399 |
+
{{
|
400 |
+
border-radius:var(--radius-input-original) !important; /* Your original radius */
|
401 |
+
border:1.5px solid var(--fly-border-color) !important; /* Border from AlphaTranslator */
|
402 |
+
font-size:0.95em !important; background-color:var(--fly-input-bg-simple) !important; /* BG from AlphaTranslator */
|
403 |
+
padding:10px 12px !important; color:var(--fly-text-primary) !important;
|
404 |
}}
|
405 |
+
.content-panel-alphatts .gr-input > label + div > textarea:focus,
|
406 |
+
.content-panel-alphatts .gr-dropdown > label + div > div > input:focus,
|
407 |
+
.content-panel-alphatts .gr-dropdown > label + div > div > select:focus,
|
408 |
+
.content-panel-alphatts .gr-textbox > label + div > textarea:focus,
|
409 |
+
.content-panel-alphatts .gr-file > label + div:focus-within
|
410 |
+
{{
|
411 |
+
border-color:var(--fly-primary) !important; /* Focus border from AlphaTranslator */
|
412 |
+
box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;
|
413 |
+
background-color:var(--fly-bg-white) !important;
|
414 |
+
}}
|
415 |
+
.content-panel-alphatts .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
|
416 |
+
|
417 |
+
/* Button: Using --fly-accent for consistency with AlphaTranslator's primary action color */
|
418 |
+
.content-panel-alphatts .gr-button[elem_id="generate_button_alpha_v3"], /* Your button ID */
|
419 |
+
.content-panel-alphatts button[variant="primary"] /* General primary button */
|
420 |
+
{{
|
421 |
+
background:var(--fly-accent) !important; /* Orange accent from AlphaTranslator */
|
422 |
+
margin-top:1.5rem !important; padding:12px 20px !important; /* Adjusted padding */
|
423 |
+
transition:all 0.25s ease-in-out !important; color:white !important; font-weight:600 !important;
|
424 |
+
border-radius:var(--radius-input-original) !important; /* Your original radius */ border:none !important;
|
425 |
+
box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;
|
426 |
+
width:100% !important; font-size:1.05em !important; /* Your original font size */
|
427 |
+
display:flex; align-items:center; justify-content:center;
|
428 |
}}
|
429 |
+
.content-panel-alphatts .gr-button[elem_id="generate_button_alpha_v3"]:hover,
|
430 |
+
.content-panel-alphatts button[variant="primary"]:hover
|
431 |
+
{{
|
432 |
+
background:#B45309 !important; /* Darker orange */ transform:translateY(-1px) !important;
|
433 |
+
box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;
|
434 |
+
}}
|
435 |
+
|
436 |
+
/* Labels (using AlphaTranslator_Styled general label style) */
|
437 |
+
.content-panel-alphatts label > span.label-text
|
438 |
+
{{
|
439 |
+
font-weight:500 !important; color:#4B5563 !important;
|
440 |
+
font-size:0.88em !important; margin-bottom:6px !important; display:inline-block;
|
441 |
+
}}
|
442 |
+
/* Your original specific label styling with icons (if you want to keep them) */
|
443 |
+
/* You would need to ensure your Gradio labels have the correct `for` attribute linking to input `elem_id`
|
444 |
+
or use JavaScript to add these pseudo-elements if Gradio doesn't directly support `for` on labels.
|
445 |
+
For simplicity, I'm omitting the ::before icon styles unless you confirm they are essential
|
446 |
+
and your Gradio setup can support them easily. The general label style above will apply.
|
447 |
+
*/
|
448 |
+
|
449 |
+
/* Temperature description (from your original AlphaTTS CSS) */
|
450 |
+
.content-panel-alphatts .temp_description_class_alpha_v3 {{
|
451 |
+
font-size: 0.85em; color: #777; margin-top: -0.4rem; margin-bottom: 1rem;
|
452 |
+
}}
|
453 |
+
|
454 |
+
/* Audio Player (general styling, can be targeted by ID if set) */
|
455 |
+
.content-panel-alphatts .gr-audio audio, /* General audio player */
|
456 |
+
.content-panel-alphatts #output_audio_player_alpha_v3 audio /* Your specific ID */
|
457 |
{{
|
458 |
+
width: 100%; border-radius: var(--radius-input-original); margin-top:0.8rem;
|
|
|
459 |
}}
|
460 |
+
|
461 |
+
/* Examples (using AlphaTranslator_Styled examples button style) */
|
462 |
+
.content-panel-alphatts div[label*="نمونههای کاربردی"] .gr-button.gr-button-tool, /* Targetting by label */
|
463 |
+
.content-panel-alphatts div[label*="نمونههای کاربردی"] .gr-sample-button
|
464 |
+
{{
|
465 |
+
background-color:#E0E7FF !important; color:var(--fly-primary) !important;
|
466 |
+
border-radius:6px !important; font-size:0.78em !important; padding:4px 8px !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
}}
|
468 |
+
.content-panel-alphatts .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
|
469 |
+
|
470 |
+
/* Footer styling from AlphaTranslator_Styled */
|
471 |
+
.app-footer-alphatts {{ /* Changed class name slightly */
|
472 |
+
text-align:center;font-size:0.85em;color:var(--fly-text-secondary);margin-top:2.5rem;
|
473 |
+
padding:1rem 0;background-color:rgba(255,255,255,0.3);backdrop-filter:blur(5px);
|
474 |
+
border-top:1px solid var(--fly-border-color);
|
475 |
+
}}
|
476 |
+
footer {{display:none !important;}} /* Hides default Gradio footer */
|
477 |
+
|
478 |
|
479 |
+
/* Responsive adjustments from AlphaTranslator_Styled */
|
480 |
+
@media (min-width:640px) {{
|
481 |
+
.main-content-area-alphatts {{padding:1.5rem;max-width:700px;}}
|
482 |
+
.content-panel-alphatts {{padding:1.5rem;}}
|
483 |
+
.app-header-alphatts h1 {{font-size:2.5em !important;}}
|
484 |
+
.app-header-alphatts p {{font-size:1.05em !important;}}
|
485 |
+
}}
|
486 |
+
@media (min-width:768px) {{
|
487 |
+
.main-content-area-alphatts {{max-width:780px;}}
|
488 |
+
.content-panel-alphatts {{padding:2rem;}}
|
489 |
+
.content-panel-alphatts .gr-button[elem_id="generate_button_alpha_v3"],
|
490 |
+
.content-panel-alphatts button[variant="primary"]
|
491 |
+
{{
|
492 |
+
width:auto !important; align-self:flex-start;
|
493 |
+
}}
|
494 |
+
.app-header-alphatts h1 {{font-size:2.75em !important;}}
|
495 |
+
.app-header-alphatts p {{font-size:1.1em !important;}}
|
496 |
+
}}
|
497 |
"""
|
498 |
|
499 |
+
# Using your original Gradio Blocks structure
|
500 |
+
# The theme `gr.themes.Base(font=[gr.themes.GoogleFont("Vazirmatn")])` is from your original.
|
501 |
+
# We are applying `app_theme_outer_styled` for the body background and `applied_css_for_alphatts` for specifics.
|
502 |
+
with gr.Blocks(theme=app_theme_outer_styled, css=applied_css_for_alphatts, title=f"آلفا TTS ({FIXED_MODEL_NAME.split('-')[1]})") as demo:
|
503 |
+
# Applying the header from AlphaTranslator_Styled structure
|
504 |
+
gr.HTML(f"""
|
505 |
+
<div class='app-header-alphatts'>
|
506 |
+
<h1>🚀 Alpha TTS</h1>
|
507 |
+
<p>جادوی تبدیل متن به صدا در دستان شما (Gemini {FIXED_MODEL_NAME.split('-')[1]})</p>
|
508 |
+
</div>
|
509 |
+
""")
|
510 |
+
|
511 |
+
# Applying the main content panel structure from AlphaTranslator_Styled
|
512 |
+
with gr.Column(elem_classes=["main-content-area-alphatts"]):
|
513 |
+
with gr.Column(elem_classes=["content-panel-alphatts"]): # Your original AlphaTTS used Column, let's keep it simple
|
514 |
+
# AlphaTranslator used Group, but Column is fine.
|
515 |
+
|
516 |
+
# Your original UI layout from AlphaTTS_Original
|
517 |
+
# Note: `elem_id`s are from your original AlphaTTS code.
|
518 |
+
# CSS selectors have been updated to try and match these or use general selectors.
|
519 |
+
|
520 |
+
# Warning if GEMINI_API_KEY is not set
|
521 |
+
if not os.environ.get("GEMINI_API_KEY"):
|
522 |
+
missing_key_msg = (
|
523 |
+
"⚠️ هشدار: متغیر محیطی GEMINI_API_KEY تنظیم نشده است. "
|
524 |
+
"قابلیت تبدیل متن به گفتار احتمالاً کار نخواهد کرد. "
|
525 |
+
"لطفاً این متغیر را در بخش Secrets این Space تنظیم کنید."
|
526 |
+
)
|
527 |
+
gr.Markdown(f"<div class='api-warning-message'>{missing_key_msg}</div>")
|
528 |
|
529 |
+
use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False, elem_id="use_file_cb_alpha_v3")
|
530 |
+
|
531 |
+
uploaded_file_input = gr.File(
|
532 |
+
label=" ",
|
533 |
+
file_types=['.txt'],
|
534 |
+
visible=False,
|
535 |
+
elem_id="file_uploader_alpha_main_v3"
|
536 |
+
)
|
537 |
+
|
538 |
+
text_to_speak_tb = gr.Textbox(
|
539 |
+
label="متن فارسی برای تبدیل",
|
540 |
+
placeholder="مثال: سلام، فردا هوا چطور است؟",
|
541 |
+
lines=5,
|
542 |
+
value="",
|
543 |
+
visible=True,
|
544 |
+
elem_id="text_input_main_alpha_v3"
|
545 |
+
)
|
546 |
+
|
547 |
+
# Your original change function for checkbox
|
548 |
+
use_file_input_cb.change(
|
549 |
+
fn=lambda x: (gr.update(visible=x, label=" " if x else "متن فارسی برای تبدیل"), gr.update(visible=not x)),
|
550 |
+
inputs=use_file_input_cb,
|
551 |
+
outputs=[uploaded_file_input, text_to_speak_tb]
|
552 |
+
)
|
553 |
|
554 |
+
speech_prompt_tb = gr.Textbox(
|
555 |
+
label="سبک گفتار (اختیاری)",
|
556 |
+
placeholder="مثال: با لحنی شاد و پرانرژی",
|
557 |
+
value="با لحنی دوستانه و رسا صحبت کن.",
|
558 |
+
lines=2, elem_id="speech_prompt_alpha_v3"
|
559 |
+
)
|
560 |
+
|
561 |
+
speaker_voice_dd = gr.Dropdown(
|
562 |
+
SPEAKER_VOICES, label="انتخاب گوینده و لهجه", value="Charon", elem_id="speaker_voice_alpha_v3"
|
563 |
+
)
|
564 |
+
|
565 |
+
temperature_slider = gr.Slider(
|
566 |
+
minimum=0.1, maximum=1.5, step=0.05, value=0.9, label="میزان خلاقیت صدا",
|
567 |
+
elem_id="temperature_slider_alpha_v3"
|
568 |
+
)
|
569 |
+
gr.Markdown("<p class='temp_description_class_alpha_v3'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایینتر = یکنواختی بیشتر.</p>")
|
570 |
|
571 |
+
generate_button = gr.Button("🚀 تولید و پخش صدا", elem_id="generate_button_alpha_v3") # Removed variant="primary" to let CSS handle it via elem_id
|
572 |
+
|
573 |
+
output_audio = gr.Audio(label=" ", type="filepath", elem_id="output_audio_player_alpha_v3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
|
575 |
+
# Your original Examples section
|
576 |
+
# Applying a custom HR from AlphaTranslator_Styled
|
577 |
+
gr.HTML("<hr class='custom-hr'>")
|
578 |
+
gr.Markdown(
|
579 |
+
"<h3 style='text-align:center; font-weight:500; color:var(--fly-text-secondary); margin-top:1.5rem; margin-bottom:1rem;'>نمونههای کاربردی</h3>",
|
580 |
+
# elem_id="examples_section_title_v3" # elem_id from your original
|
581 |
+
)
|
582 |
+
gr.Examples(
|
583 |
+
examples=[
|
584 |
+
[False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
|
585 |
+
[False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی آلفا است.", "با صدایی طبیعی و روان.", "Charon", 0.9],
|
586 |
+
],
|
587 |
+
inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
|
588 |
+
outputs=[output_audio], # Original AlphaTTS only output to audio
|
589 |
+
fn=gradio_tts_interface,
|
590 |
+
cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true" # From AlphaTranslator
|
591 |
+
)
|
592 |
|
593 |
+
# Footer from AlphaTranslator_Styled
|
594 |
+
gr.Markdown(f"<p class='app-footer-alphatts'>Alpha TTS © 2024 - Model: {FIXED_MODEL_NAME}</p>")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
|
597 |
+
# --- Event Handlers (from YOUR AlphaTTS_Original) ---
|
598 |
+
if generate_button is not None:
|
599 |
+
generate_button.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
fn=gradio_tts_interface,
|
601 |
+
inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
|
602 |
+
outputs=[output_audio] # Original AlphaTTS only output to audio
|
603 |
)
|
604 |
+
else:
|
605 |
+
logging.error("دکمه تولید صدا (generate_button_alpha_v3) به درستی مقداردهی اولیه نشده است.")
|
606 |
+
# --- END: Gradio UI ---
|
|
|
607 |
|
608 |
if __name__ == "__main__":
|
609 |
+
# Removed auto-restart thread to keep it closer to your original AlphaTTS.
|
610 |
+
# If you need it, you can re-add the auto_restart_service function and thread start.
|
611 |
+
|
612 |
+
# Check if PYDUB is available at launch
|
613 |
+
if not PYDUB_AVAILABLE:
|
614 |
+
logging.warning("Pydub (for audio merging) not found. Please install with `pip install pydub`. Merging will be disabled if multiple audio chunks are generated.")
|
|
|
615 |
|
616 |
demo.launch(
|
617 |
+
server_name="0.0.0.0",
|
618 |
+
server_port=int(os.getenv("PORT", 7860)),
|
619 |
+
debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true",
|
620 |
show_error=True
|
621 |
)
|