Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,75 @@
|
|
1 |
import gradio as gr
|
2 |
-
import base64
|
3 |
-
import mimetypes
|
4 |
import os
|
5 |
-
import re
|
6 |
-
import struct
|
7 |
import time
|
8 |
-
import
|
9 |
-
|
10 |
-
from google.genai import types
|
11 |
import logging
|
|
|
|
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
try:
|
16 |
from pydub import AudioSegment
|
17 |
PYDUB_AVAILABLE = True
|
18 |
-
logging.info("pydub با موفقیت ایمپورت شد.")
|
19 |
except ImportError:
|
20 |
PYDUB_AVAILABLE = False
|
21 |
-
logging.warning("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
SPEAKER_VOICES = [
|
24 |
"Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
|
25 |
"Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
|
@@ -27,22 +77,22 @@ SPEAKER_VOICES = [
|
|
27 |
"Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
|
28 |
"Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
|
29 |
]
|
30 |
-
FIXED_MODEL_NAME = "gemini-
|
31 |
DEFAULT_MAX_CHUNK_SIZE = 3800
|
32 |
-
DEFAULT_SLEEP_BETWEEN_REQUESTS =
|
33 |
-
DEFAULT_OUTPUT_FILENAME_BASE = "
|
34 |
|
35 |
-
def
|
36 |
log_list_ref.append(message)
|
37 |
-
logging.info(f"[
|
38 |
|
39 |
def save_binary_file(file_name, data, log_list_ref):
|
40 |
try:
|
41 |
with open(file_name, "wb") as f: f.write(data)
|
42 |
-
|
43 |
return file_name
|
44 |
except Exception as e:
|
45 |
-
|
46 |
return None
|
47 |
|
48 |
def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
|
@@ -69,276 +119,485 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int]:
|
|
69 |
def smart_text_split(text, max_size=3800, log_list_ref=None):
|
70 |
if len(text) <= max_size: return [text]
|
71 |
chunks, current_chunk = [], ""
|
72 |
-
sentences = re.split(r'(?<=[
|
73 |
for sentence in sentences:
|
74 |
if len(current_chunk) + len(sentence) + 1 > max_size:
|
75 |
if current_chunk: chunks.append(current_chunk.strip())
|
76 |
current_chunk = sentence
|
77 |
while len(current_chunk) > max_size:
|
78 |
-
split_idx =
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
chunks.append(part.strip())
|
81 |
-
else:
|
|
|
82 |
if current_chunk: chunks.append(current_chunk.strip())
|
83 |
final_chunks = [c for c in chunks if c]
|
84 |
-
if log_list_ref:
|
85 |
return final_chunks
|
86 |
|
87 |
def merge_audio_files_func(file_paths, output_path, log_list_ref):
|
88 |
-
if not PYDUB_AVAILABLE:
|
|
|
|
|
89 |
try:
|
90 |
-
|
91 |
combined = AudioSegment.empty()
|
92 |
for i, fp in enumerate(file_paths):
|
93 |
-
if os.path.exists(fp):
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
95 |
combined.export(output_path, format="wav")
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
98 |
|
99 |
def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list_ref):
|
100 |
output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
|
101 |
max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
|
102 |
-
_log_internal("شروع فرآیند تولید صدا...", log_list_ref)
|
103 |
-
api_key = os.environ.get("GEMINI_API_KEY_1")
|
104 |
-
if not api_key: api_key = os.environ.get("GEMINI_API_KEY")
|
105 |
-
if not api_key:
|
106 |
-
_log_internal("خطای حیاتی: هیچ Secret با نام GEMINI_API_KEY_1 یا GEMINI_API_KEY یافت نشد!", log_list_ref)
|
107 |
-
return None
|
108 |
-
_log_internal(f"استفاده از کلید API جمینای (...{api_key[-4:] if api_key else 'N/A'})", log_list_ref)
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
text_chunks = smart_text_split(text_input, max_chunk, log_list_ref)
|
114 |
-
if not text_chunks:
|
115 |
-
|
116 |
-
|
|
|
|
|
117 |
for i, chunk in enumerate(text_chunks):
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
config = types.GenerateContentConfig(temperature=temperature_val, response_modalities=["audio"],
|
122 |
-
speech_config=types.SpeechConfig(voice_config=types.VoiceConfig(
|
123 |
-
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=selected_voice))))
|
124 |
-
timestamp = int(time.time() * 1000)
|
125 |
-
temp_fname_base = f"temp_audio_{timestamp}_part{i+1:03d}"
|
126 |
try:
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
if PYDUB_AVAILABLE:
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
try:
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
if os.path.
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
_log_internal(f"فایل موقت {os.path.basename(temp_f)} پاک شد.", log_list_ref)
|
183 |
-
except Exception as e_clean:
|
184 |
-
_log_internal(f"خطا در پاک کردن فایل موقت {os.path.basename(temp_f)}: {e_clean}", log_list_ref)
|
185 |
-
|
186 |
-
if final_audio_file_to_return and not os.path.exists(final_audio_file_to_return):
|
187 |
-
_log_internal(f"فایل نهایی '{final_audio_file_to_return}' پس از پردازش وجود ندارد!", log_list_ref)
|
188 |
-
return None
|
189 |
-
|
190 |
-
return final_audio_file_to_return
|
191 |
|
192 |
-
|
193 |
-
|
|
|
|
|
194 |
actual_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
if use_file_input:
|
196 |
-
if uploaded_file:
|
197 |
try:
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
else:
|
203 |
actual_text = text_to_speak
|
204 |
-
if not actual_text or not actual_text.strip():
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
@import url('https://fonts.googleapis.com/css2?family=
|
|
|
236 |
:root {{
|
237 |
-
--
|
238 |
-
--
|
239 |
-
--
|
240 |
-
--
|
241 |
-
--
|
242 |
-
--
|
243 |
-
--
|
244 |
-
--
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
.app-header-container h1 {{ font-size: 2.3em; font-weight: 800; margin:0 0 0.4rem 0; text-shadow: 0 1px 3px rgba(0,0,0,0.2); }}
|
250 |
-
.app-header-container p {{ font-size: 1.05em; color: rgba(255,255,255,0.9); margin-top:0; opacity: 0.95; }}
|
251 |
-
.main-content-wrapper-alpha {{ padding: 1.8rem 1.5rem; max-width: 650px; margin: -2.5rem auto 2rem auto; width: 90%; background-color: var(--app-panel-bg); border-radius: var(--radius-card); box-shadow: var(--shadow-card); position:relative; z-index:10; }}
|
252 |
-
@media (max-width: 768px) {{
|
253 |
-
.main-content-wrapper-alpha {{ width: 92%; padding: 1.5rem 1.2rem; margin-top: -2rem; }}
|
254 |
-
.app-header-container h1 {{font-size:2em;}}
|
255 |
-
.app-header-container p {{font-size:1em;}}
|
256 |
-
}}
|
257 |
-
footer {{display:none !important;}}
|
258 |
-
.gradio-button.generate-button-final-alpha {{ background: var(--app-button-bg) !important; color: white !important; border:none !important; border-radius: var(--radius-input) !important; padding: 0.85rem 1.5rem !important; font-weight: 700 !important; font-size:1.05em !important; transition: all 0.25s ease; box-shadow: var(--shadow-button); width:100%; margin-top:1.8rem !important; }}
|
259 |
-
.gradio-button.generate-button-final-alpha:hover {{ filter: brightness(1.15); transform: translateY(-2px); box-shadow: 0 6px 12px -3px rgba({int(BUTTON_BG[1:3],16)},{int(BUTTON_BG[3:5],16)},{int(BUTTON_BG[5:7],16)},0.65);}}
|
260 |
-
.gradio-textbox > label + div > textarea,
|
261 |
-
.gradio-dropdown > label + div > div > input,
|
262 |
-
.gradio-dropdown select,
|
263 |
-
.gradio-file > label + div {{
|
264 |
-
border-radius: var(--radius-input) !important;
|
265 |
-
border: 1px solid var(--app-border-color) !important;
|
266 |
-
background-color: var(--app-input-bg) !important;
|
267 |
-
box-shadow: inset 0 1px 3px rgba(0,0,0,0.06);
|
268 |
-
padding: 0.8rem !important;
|
269 |
-
font-size: 0.95em !important;
|
270 |
}}
|
271 |
-
|
272 |
-
.gradio-
|
273 |
-
.
|
274 |
-
.
|
275 |
-
.
|
276 |
-
.
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
}}
|
280 |
-
label.gradio-label > .label-text {{ font-weight: 500 !important; color: var(--app-text-primary) !important; font-size: 0.98em !important; margin-bottom: 0.6rem !important; display: block; }}
|
281 |
-
.gradio-textbox[elem_id="text_input_alpha_final"] > label > .label-text::before,
|
282 |
-
.gradio-checkbox[elem_id="use_file_cb_alpha_final"] > label > .label-text > span::before,
|
283 |
-
.gradio-textbox[elem_id="speech_prompt_alpha_final"] > label > .label-text::before,
|
284 |
-
.gradio-dropdown[elem_id="speaker_voice_alpha_final"] > label > .label-text::before,
|
285 |
-
.gradio-slider[elem_id="temperature_slider_alpha_final"] > label > .label-text > span::before {{
|
286 |
-
margin-left: 10px; vertical-align: -2px; font-size: 1.1em; opacity: 0.8;
|
287 |
-
}}
|
288 |
-
.gradio-textbox[elem_id="text_input_alpha_final"] > label > .label-text::before {{ content: '📝'; }}
|
289 |
-
.gradio-checkbox[elem_id="use_file_cb_alpha_final"] > label > .label-text > span::before {{ content: '📄'; }}
|
290 |
-
.gradio-textbox[elem_id="speech_prompt_alpha_final"] > label > .label-text::before {{ content: '🗣️'; }}
|
291 |
-
.gradio-dropdown[elem_id="speaker_voice_alpha_final"] > label > .label-text::before {{ content: '🎤'; }}
|
292 |
-
.gradio-slider[elem_id="temperature_slider_alpha_final"] > label > .label-text > span::before {{ content: '🌡️'; }}
|
293 |
-
#output_audio_player_alpha_final audio {{ width: 100%; border-radius: var(--radius-input); margin-top:1rem; box-shadow: 0 2px 5px rgba(0,0,0,0.08); }}
|
294 |
-
.temp_description_class_alpha_final {{ font-size: 0.88em; color: var(--app-text-secondary); margin-top: -0.3rem; margin-bottom: 1.2rem; }}
|
295 |
-
.app-footer-container-final {{text-align:center;font-size:0.9em;color: var(--app-text-secondary);opacity:0.9; margin-top:3.5rem;padding:1.5rem 0; border-top:1px solid var(--app-border-color);}}
|
296 |
-
.gradio-examples {{ margin-top: 2.5rem !important; }}
|
297 |
-
.gradio-examples > .gradio-label > .label-text {{ font-size: 1.1em !important; font-weight: 700 !important; color: var(--app-text-primary) !important; text-align:center; margin-bottom: 1rem !important; }}
|
298 |
-
.gradio-examples table th {{ background-color: var(--app-input-bg) !important; font-weight:700 !important; font-size:0.9em !important; padding: 0.6rem 0.5rem !important; text-align:right !important; }}
|
299 |
-
.gradio-examples table td {{ padding: 0.6rem 0.5rem !important; font-size:0.9em !important; }}
|
300 |
-
.gradio-examples .gr-sample-button {{ background-color: rgba({int(BUTTON_BG[1:3],16)},{int(BUTTON_BG[3:5],16)},{int(BUTTON_BG[5:7],16)}, 0.1) !important; color: var(--app-button-bg) !important; border: 1px solid rgba({int(BUTTON_BG[1:3],16)},{int(BUTTON_BG[3:5],16)},{int(BUTTON_BG[5:7],16)}, 0.3) !important; font-weight:500 !important; }}
|
301 |
-
#output_audio_player_alpha_final > .gradio-label {{ display: none !important; }}
|
302 |
-
#file_uploader_alpha_final > .gradio-label {{ display: none !important; }}
|
303 |
"""
|
|
|
|
|
|
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
</
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
-
with gr.
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
fn=gradio_tts_interface,
|
336 |
-
|
|
|
337 |
)
|
338 |
-
|
|
|
339 |
|
340 |
if __name__ == "__main__":
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
import os
|
|
|
|
|
3 |
import time
|
4 |
+
import threading
|
5 |
+
import sys
|
|
|
6 |
import logging
|
7 |
+
import traceback
|
8 |
+
import asyncio
|
9 |
|
10 |
+
# TTS specific imports from second script
|
11 |
+
import mimetypes
|
12 |
+
import re
|
13 |
+
import struct
|
14 |
+
# import zipfile # Not directly used in final combined code
|
15 |
+
from google import genai # For TTS
|
16 |
+
from google.genai import types as genai_types # For TTS
|
17 |
|
18 |
try:
|
19 |
from pydub import AudioSegment
|
20 |
PYDUB_AVAILABLE = True
|
|
|
21 |
except ImportError:
|
22 |
PYDUB_AVAILABLE = False
|
23 |
+
logging.warning("Pydub is not available. Audio merging will be disabled. Falling back to single file or ZIP.")
|
24 |
+
|
25 |
+
# --- START: پیکربندی لاگینگ (From Alpha Translator) ---
|
26 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(threadName)s - %(message)s')
|
27 |
+
# --- END: پیکربندی لاگینگ ---
|
28 |
+
|
29 |
+
# --- START: منطق چرخش API Key ---
|
30 |
+
API_KEYS_GEMINI = []
|
31 |
+
i = 1
|
32 |
+
while True:
|
33 |
+
key = os.environ.get(f'GEMINI_API_KEY_{i}')
|
34 |
+
if key:
|
35 |
+
API_KEYS_GEMINI.append(key)
|
36 |
+
i += 1
|
37 |
+
else:
|
38 |
+
break
|
39 |
|
40 |
+
NUM_GEMINI_KEYS = len(API_KEYS_GEMINI)
|
41 |
+
current_gemini_key_index = 0
|
42 |
+
gemini_key_lock = threading.Lock()
|
43 |
+
|
44 |
+
if NUM_GEMINI_KEYS == 0:
|
45 |
+
logging.error(
|
46 |
+
'خطای حیاتی: هیچ Secret با نام GEMINI_API_KEY_n (مثلاً GEMINI_API_KEY_1) یافت نشد! ' +
|
47 |
+
'قابلیت تبدیل متن به گفتار غیرفعال خواهد بود. لطفاً Secret ها را در تنظیمات Space خود اضافه کنید.'
|
48 |
+
)
|
49 |
+
else:
|
50 |
+
logging.info(f"تعداد {NUM_GEMINI_KEYS} کلید API جیمینای بارگذاری شد.")
|
51 |
+
|
52 |
+
def get_gemini_api_key_sync():
|
53 |
+
if NUM_GEMINI_KEYS == 0:
|
54 |
+
return None
|
55 |
+
with gemini_key_lock:
|
56 |
+
global current_gemini_key_index
|
57 |
+
selected_api_key = API_KEYS_GEMINI[current_gemini_key_index]
|
58 |
+
current_gemini_key_index = (current_gemini_key_index + 1) % NUM_GEMINI_KEYS
|
59 |
+
logging.info(f"TTS Gemini: استفاده از کلید API با اندیس چرخشی: ...{selected_api_key[-4:]}")
|
60 |
+
return selected_api_key
|
61 |
+
# --- END: منطق چرخش API Key ---
|
62 |
+
|
63 |
+
# --- START: تابع ریاستارت خودکار ---
|
64 |
+
def auto_restart_service():
|
65 |
+
RESTART_INTERVAL_SECONDS = 24 * 60 * 60
|
66 |
+
logging.info(f"سرویس برای ریاستارت خودکار پس از {RESTART_INTERVAL_SECONDS / 3600:.0f} ساعت زمانبندی شده است.")
|
67 |
+
time.sleep(RESTART_INTERVAL_SECONDS)
|
68 |
+
logging.info(f"زمان ریاستارت خودکار ({RESTART_INTERVAL_SECONDS / 3600:.0f} ساعت) فرا رسیده است. برنامه خارج میشود تا توسط پلتفرم ریاستارت شود...")
|
69 |
+
os._exit(1)
|
70 |
+
# --- END: تابع ریاستارت خودکار ---
|
71 |
+
|
72 |
+
# --- START: TTS Core Logic ---
|
73 |
SPEAKER_VOICES = [
|
74 |
"Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager",
|
75 |
"Sulafat", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux",
|
|
|
77 |
"Rasalthgeti", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus",
|
78 |
"Iapetus", "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda"
|
79 |
]
|
80 |
+
FIXED_MODEL_NAME = "gemini-1.5-flash-preview-tts" # This specific model name needs to be accurate
|
81 |
DEFAULT_MAX_CHUNK_SIZE = 3800
|
82 |
+
DEFAULT_SLEEP_BETWEEN_REQUESTS = 8
|
83 |
+
DEFAULT_OUTPUT_FILENAME_BASE = "alpha_tts_audio"
|
84 |
|
85 |
+
def _log_tts(message, log_list_ref):
|
86 |
log_list_ref.append(message)
|
87 |
+
logging.info(f"[TTS_CORE] {message}")
|
88 |
|
89 |
def save_binary_file(file_name, data, log_list_ref):
|
90 |
try:
|
91 |
with open(file_name, "wb") as f: f.write(data)
|
92 |
+
_log_tts(f"✅ فایل ذخیره شد: {file_name}", log_list_ref)
|
93 |
return file_name
|
94 |
except Exception as e:
|
95 |
+
_log_tts(f"❌ خطا در ذخیره فایل {file_name}: {e}", log_list_ref)
|
96 |
return None
|
97 |
|
98 |
def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
|
|
|
119 |
def smart_text_split(text, max_size=3800, log_list_ref=None):
|
120 |
if len(text) <= max_size: return [text]
|
121 |
chunks, current_chunk = [], ""
|
122 |
+
sentences = re.split(r'(?<=[.!?؟۔])\s+', text)
|
123 |
for sentence in sentences:
|
124 |
if len(current_chunk) + len(sentence) + 1 > max_size:
|
125 |
if current_chunk: chunks.append(current_chunk.strip())
|
126 |
current_chunk = sentence
|
127 |
while len(current_chunk) > max_size:
|
128 |
+
split_idx = -1
|
129 |
+
for punc in ['،', ',', ';', ':', ' ']:
|
130 |
+
idx = current_chunk.rfind(punc, max_size // 2, max_size)
|
131 |
+
if idx > split_idx : split_idx = idx
|
132 |
+
if split_idx != -1:
|
133 |
+
part, current_chunk = current_chunk[:split_idx+1], current_chunk[split_idx+1:]
|
134 |
+
else:
|
135 |
+
part, current_chunk = current_chunk[:max_size], current_chunk[max_size:]
|
136 |
chunks.append(part.strip())
|
137 |
+
else:
|
138 |
+
current_chunk += (" " if current_chunk and sentence else "") + sentence
|
139 |
if current_chunk: chunks.append(current_chunk.strip())
|
140 |
final_chunks = [c for c in chunks if c]
|
141 |
+
if log_list_ref: _log_tts(f"📊 متن به {len(final_chunks)} قطعه تقسیم شد.", log_list_ref)
|
142 |
return final_chunks
|
143 |
|
144 |
def merge_audio_files_func(file_paths, output_path, log_list_ref):
|
145 |
+
if not PYDUB_AVAILABLE:
|
146 |
+
_log_tts("❌ Pydub در دسترس نیست. ادغام فایل انجام نشد.", log_list_ref)
|
147 |
+
return False
|
148 |
try:
|
149 |
+
_log_tts(f"🔗 شروع ادغام {len(file_paths)} فایل صوتی...", log_list_ref)
|
150 |
combined = AudioSegment.empty()
|
151 |
for i, fp in enumerate(file_paths):
|
152 |
+
if os.path.exists(fp):
|
153 |
+
segment = AudioSegment.from_file(fp)
|
154 |
+
combined += segment
|
155 |
+
if i < len(file_paths) - 1:
|
156 |
+
combined += AudioSegment.silent(duration=150)
|
157 |
+
else:
|
158 |
+
_log_tts(f"⚠️ فایل صوتی برای ادغام یافت نشد: {fp}", log_list_ref)
|
159 |
combined.export(output_path, format="wav")
|
160 |
+
_log_tts(f"✅ فایل صوتی با موفقیت در '{output_path}' ادغام و ذخیره شد.", log_list_ref)
|
161 |
+
return True
|
162 |
+
except Exception as e:
|
163 |
+
_log_tts(f"❌ خطا در هنگام ادغام فایلهای صوتی: {e}\n{traceback.format_exc()}", log_list_ref)
|
164 |
+
return False
|
165 |
|
166 |
def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, log_list_ref):
|
167 |
output_base_name = DEFAULT_OUTPUT_FILENAME_BASE
|
168 |
max_chunk, sleep_time = DEFAULT_MAX_CHUNK_SIZE, DEFAULT_SLEEP_BETWEEN_REQUESTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
+
_log_tts("🚀 شروع فرآیند تولید صدا...", log_list_ref)
|
171 |
+
|
172 |
+
api_key = get_gemini_api_key_sync()
|
173 |
+
if not api_key:
|
174 |
+
_log_tts("❌ کلید API جیمینای معتبری یافت نشد یا دریافت نشد. عملیات متوقف شد.", log_list_ref)
|
175 |
+
return None, "خطا: کلید API جیمینای برای سرویس TTS در دسترس نیست."
|
176 |
+
|
177 |
+
# REMOVED: genai.configure(api_key=api_key) - This was causing the error.
|
178 |
+
|
179 |
+
if not text_input or not text_input.strip():
|
180 |
+
_log_tts("❌ متن ورودی برای تبدیل به گفتار خالی است.", log_list_ref)
|
181 |
+
return None, "خطا: متن ورودی خالی است."
|
182 |
+
|
183 |
text_chunks = smart_text_split(text_input, max_chunk, log_list_ref)
|
184 |
+
if not text_chunks:
|
185 |
+
_log_tts("❌ متن قابل پردازش برای تبدیل به گفتار نیست.", log_list_ref)
|
186 |
+
return None, "خطا: متن قابل پردازش نیست."
|
187 |
+
|
188 |
+
generated_files = []
|
189 |
for i, chunk in enumerate(text_chunks):
|
190 |
+
_log_tts(f"🔊 پردازش قطعه {i+1}/{len(text_chunks)}...", log_list_ref)
|
191 |
+
final_text_for_tts = f'"{prompt_input}"\n{chunk}' if prompt_input and prompt_input.strip() else chunk
|
192 |
+
|
|
|
|
|
|
|
|
|
|
|
193 |
try:
|
194 |
+
# Pass api_key directly to the model instance
|
195 |
+
# Also, FIXED_MODEL_NAME should be just the model ID like "gemini-1.5-flash-preview-tts"
|
196 |
+
# The SDK often prefixes with "models/" internally or expects it.
|
197 |
+
# The `genai.GenerativeModel` constructor takes `model_name`.
|
198 |
+
# If `FIXED_MODEL_NAME` is `gemini-1.5-flash-preview-tts`, it's correct.
|
199 |
+
tts_model = genai.GenerativeModel(FIXED_MODEL_NAME, api_key=api_key)
|
200 |
+
|
201 |
+
# IMPORTANT: The `selected_voice` (e.g., "Achird", "Zephyr") from the SPEAKER_VOICES list
|
202 |
+
# is still NOT being used in the API call to `tts_model.generate_content`.
|
203 |
+
# To use these voices, you would need to find the correct parameter in the
|
204 |
+
# `generation_config` or the `generate_content` method for the `FIXED_MODEL_NAME`
|
205 |
+
# that specifies the voice. This is highly dependent on the specific model's API.
|
206 |
+
# For example, it could be:
|
207 |
+
# generation_config=genai_types.GenerationConfig(
|
208 |
+
# temperature=temperature_val,
|
209 |
+
# response_mime_type="audio/wav",
|
210 |
+
# voice_name=selected_voice # This is a HYPOTHETICAL parameter
|
211 |
+
# )
|
212 |
+
# Or it might be part of a more complex `request_options` or `tools` configuration.
|
213 |
+
# Without knowing the exact API for voice selection for "gemini-1.5-flash-preview-tts",
|
214 |
+
# the dropdown will select a voice, but the API will likely use a default voice.
|
215 |
+
_log_tts(f"Selected voice from UI: {selected_voice} (Note: This voice may not be used if API parameter is unknown)", log_list_ref)
|
216 |
+
|
217 |
+
|
218 |
+
response = tts_model.generate_content(
|
219 |
+
final_text_for_tts,
|
220 |
+
generation_config=genai_types.GenerationConfig(
|
221 |
+
temperature=temperature_val,
|
222 |
+
response_mime_type="audio/wav"
|
223 |
+
),
|
224 |
+
)
|
225 |
+
|
226 |
+
fname_base = f"{output_base_name}_part{i+1:03d}"
|
227 |
+
audio_bytes = None
|
228 |
+
mime_type = None
|
229 |
+
|
230 |
+
# Attempt to extract audio data robustly
|
231 |
+
if response.parts:
|
232 |
+
for part in response.parts:
|
233 |
+
# Check for new SDK's common blob structure
|
234 |
+
if hasattr(part, 'blob') and hasattr(part.blob, 'mime_type') and hasattr(part.blob, 'data') and part.blob.mime_type.startswith("audio/"):
|
235 |
+
audio_bytes = part.blob.data
|
236 |
+
mime_type = part.blob.mime_type
|
237 |
+
break
|
238 |
+
# Check for inline_data (older or different response structure)
|
239 |
+
elif hasattr(part, 'inline_data') and hasattr(part.inline_data, 'mime_type') and hasattr(part.inline_data, 'data') and part.inline_data.mime_type.startswith("audio/"):
|
240 |
+
audio_bytes = part.inline_data.data
|
241 |
+
mime_type = part.inline_data.mime_type
|
242 |
+
break
|
243 |
+
# Simpler direct data attribute check (less common but possible)
|
244 |
+
elif hasattr(part, 'mime_type') and hasattr(part, 'data') and part.mime_type.startswith("audio/"):
|
245 |
+
audio_bytes = part.data
|
246 |
+
mime_type = part.mime_type
|
247 |
+
break
|
248 |
+
|
249 |
+
# Fallback if candidates structure is present (from AlphaTTS original code)
|
250 |
+
if not audio_bytes and response.candidates and response.candidates[0].content.parts:
|
251 |
+
for part in response.candidates[0].content.parts:
|
252 |
+
if hasattr(part, 'inline_data') and hasattr(part.inline_data, 'mime_type') and hasattr(part.inline_data, 'data') and part.inline_data.mime_type.startswith("audio/"):
|
253 |
+
audio_bytes = part.inline_data.data
|
254 |
+
mime_type = part.inline_data.mime_type
|
255 |
+
break
|
256 |
+
|
257 |
+
|
258 |
+
if not audio_bytes:
|
259 |
+
_log_tts(f"⚠️ پاسخ API برای قطعه {i+1} بدون داده صوتی معتبر دریافت شد.", log_list_ref)
|
260 |
+
_log_tts(f"ساختار پاسخ (Response structure): {response}", log_list_ref)
|
261 |
+
continue
|
262 |
+
|
263 |
+
if not mime_type:
|
264 |
+
_log_tts(f"⚠️ MIME type برای قطعه {i+1} یافت نشد. پیشفرض wav.", log_list_ref)
|
265 |
+
mime_type = "audio/wav"
|
266 |
+
|
267 |
+
ext = mimetypes.guess_extension(mime_type) or ".wav"
|
268 |
+
if "audio/L" in mime_type and ext == ".wav":
|
269 |
+
audio_bytes = convert_to_wav(audio_bytes, mime_type)
|
270 |
+
if not ext.startswith("."): ext = "." + ext
|
271 |
+
|
272 |
+
fpath = save_binary_file(f"{fname_base}{ext}", audio_bytes, log_list_ref)
|
273 |
+
if fpath:
|
274 |
+
generated_files.append(fpath)
|
275 |
+
|
276 |
+
except Exception as e:
|
277 |
+
_log_tts(f"❌ خطا در تولید قطعه صوتی {i+1} با Gemini: {e}", log_list_ref)
|
278 |
+
# Attempt to get more details from the exception if it's an API error
|
279 |
+
if hasattr(e, 'message'): # Standard Python exception message
|
280 |
+
_log_tts(f"پیام خطا: {e.message}", log_list_ref)
|
281 |
+
if hasattr(e, 'response'): # Often present in google.api_core.exceptions
|
282 |
+
_log_tts(f"جزئیات پاسخ خطای Gemini API: {e.response}", log_list_ref)
|
283 |
+
|
284 |
+
# traceback.print_exc() # For more detailed stack trace in logs if needed
|
285 |
+
_log_tts(f"Traceback: {traceback.format_exc()}", log_list_ref)
|
286 |
+
|
287 |
+
# If the error is due to the API key itself (e.g., quota, invalid)
|
288 |
+
# the key rotation should ideally handle it on the next call, but for TTS chunks,
|
289 |
+
# this chunk fails.
|
290 |
+
continue
|
291 |
+
|
292 |
+
if i < len(text_chunks) - 1 and len(text_chunks) > 1:
|
293 |
+
_log_tts(f"💤 توقف کوتاه ({sleep_time} ثانیه) قبل از پردازش قطعه بعدی...", log_list_ref)
|
294 |
+
time.sleep(sleep_time)
|
295 |
+
|
296 |
+
if not generated_files:
|
297 |
+
_log_tts("❌ هیچ فایل صوتی تولید نشد.", log_list_ref)
|
298 |
+
return None, "تولید صدا ناموفق بود. هیچ فایلی ایجاد نشد."
|
299 |
+
|
300 |
+
_log_tts(f"🎉 {len(generated_files)} فایل(های) صوتی با موفقیت تولید شد.", log_list_ref)
|
301 |
+
final_audio_file = None
|
302 |
+
final_output_path_base = f"{output_base_name}_final"
|
303 |
+
|
304 |
+
if len(generated_files) > 1:
|
305 |
if PYDUB_AVAILABLE:
|
306 |
+
merged_fn = f"{final_output_path_base}.wav"
|
307 |
+
if os.path.exists(merged_fn):
|
308 |
+
try: os.remove(merged_fn)
|
309 |
+
except OSError as e: _log_tts(f"⚠️ عدم امکان حذف فایل ادغام شده قبلی '{merged_fn}': {e}", log_list_ref)
|
310 |
+
if merge_audio_files_func(generated_files, merged_fn, log_list_ref):
|
311 |
+
final_audio_file = merged_fn
|
312 |
+
for fp in generated_files:
|
313 |
+
if os.path.abspath(fp) != os.path.abspath(merged_fn):
|
314 |
+
try: os.remove(fp)
|
315 |
+
except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{fp}': {e_del}", log_list_ref)
|
316 |
+
else:
|
317 |
+
_log_tts("⚠️ ادغام فایلهای صوتی ناموفق بود. اولین قطعه ارائه میشود.", log_list_ref)
|
318 |
+
if generated_files:
|
319 |
+
try:
|
320 |
+
first_chunk_path = generated_files[0]
|
321 |
+
target_ext = os.path.splitext(first_chunk_path)[1]
|
322 |
+
fallback_fn = f"{final_output_path_base}_fallback{target_ext}"
|
323 |
+
if os.path.exists(fallback_fn): os.remove(fallback_fn)
|
324 |
+
os.rename(first_chunk_path, fallback_fn)
|
325 |
+
final_audio_file = fallback_fn
|
326 |
+
for i_gf in range(1, len(generated_files)):
|
327 |
+
try: os.remove(generated_files[i_gf])
|
328 |
+
except OSError as e_del: _log_tts(f"⚠️ عدم امکان حذف فایل موقت '{generated_files[i_gf]}': {e_del}", log_list_ref)
|
329 |
+
except Exception as e_rename_fallback:
|
330 |
+
_log_tts(f"خطا در تغییر نام فایل اولین قطعه (fallback): {e_rename_fallback}", log_list_ref)
|
331 |
+
final_audio_file = generated_files[0]
|
332 |
+
else:
|
333 |
+
_log_tts("⚠️ Pydub برای ادغام در دسترس نیست. اولین قطعه صوتی ارائه میشود.", log_list_ref)
|
334 |
+
if generated_files:
|
335 |
+
try:
|
336 |
+
first_chunk_path = generated_files[0]
|
337 |
+
target_ext = os.path.splitext(first_chunk_path)[1]
|
338 |
+
single_fallback_fn = f"{final_output_path_base}_single{target_ext}"
|
339 |
+
if os.path.exists(single_fallback_fn): os.remove(single_fallback_fn)
|
340 |
+
os.rename(first_chunk_path, single_fallback_fn)
|
341 |
+
final_audio_file = single_fallback_fn
|
342 |
+
for i_gf in range(1, len(generated_files)):
|
343 |
+
_log_tts(f"قطعه اضافی موجود: {generated_files[i_gf]} (ادغام نشده)", log_list_ref)
|
344 |
+
except Exception as e_rename_nopydub:
|
345 |
+
_log_tts(f"خطا در تغییر نام اولین قطعه (بدون pydub): {e_rename_nopydub}", log_list_ref)
|
346 |
+
final_audio_file = generated_files[0]
|
347 |
+
elif len(generated_files) == 1:
|
348 |
try:
|
349 |
+
single_file_path = generated_files[0]
|
350 |
+
target_ext = os.path.splitext(single_file_path)[1]
|
351 |
+
final_single_fn = f"{final_output_path_base}{target_ext}"
|
352 |
+
if os.path.exists(final_single_fn) and os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
|
353 |
+
os.remove(final_single_fn)
|
354 |
+
if os.path.abspath(single_file_path) != os.path.abspath(final_single_fn):
|
355 |
+
os.rename(single_file_path, final_single_fn)
|
356 |
+
final_audio_file = final_single_fn
|
357 |
+
except Exception as e_rename_single:
|
358 |
+
_log_tts(f"خطا در تغییر نام فایل تکی نهایی: {e_rename_single}", log_list_ref)
|
359 |
+
final_audio_file = generated_files[0]
|
360 |
+
|
361 |
+
if final_audio_file and not os.path.exists(final_audio_file):
|
362 |
+
_log_tts(f"⚠️ فایل صوتی نهایی '{final_audio_file}' پس از پردازش وجود ندارد!", log_list_ref)
|
363 |
+
return None, "خطا: فایل صوتی نهایی یافت نشد."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
+
return final_audio_file, "موفق"
|
366 |
+
|
367 |
+
def gradio_tts_interface(use_file_input, uploaded_file, text_to_speak, speech_prompt, speaker_voice, temperature):
|
368 |
+
logs_for_this_run = []
|
369 |
actual_text = ""
|
370 |
+
# status_message = "شروع پردازش..." # Will be set by core_generate_audio return
|
371 |
+
# final_audio_path = None # Will be set by core_generate_audio return
|
372 |
+
|
373 |
+
if NUM_GEMINI_KEYS == 0:
|
374 |
+
return None, "خطای پیکربندی: هیچ کلید API جیمینای برای سرویس TTS تنظیم نشده است."
|
375 |
+
|
376 |
if use_file_input:
|
377 |
+
if uploaded_file and hasattr(uploaded_file, 'name'):
|
378 |
try:
|
379 |
+
# Ensure uploaded_file.name is a valid path after upload by Gradio
|
380 |
+
# Gradio usually provides a temporary file path.
|
381 |
+
with open(uploaded_file.name, 'r', encoding='utf-8') as f:
|
382 |
+
actual_text = f.read().strip()
|
383 |
+
if not actual_text:
|
384 |
+
return None, "خطا: فایل متنی انتخاب شده خالی است."
|
385 |
+
_log_tts(f"خوانش متن از فایل: {uploaded_file.name}", logs_for_this_run)
|
386 |
+
except Exception as e:
|
387 |
+
_log_tts(f"❌ خطا در خواندن فایل متنی: {e}", logs_for_this_run)
|
388 |
+
return None, f"خطا در خواندن فایل: {e}"
|
389 |
+
else:
|
390 |
+
return None, "خطا: فایل متنی انتخاب نشده است در حالی که گزینه استفاده از فایل فعال است."
|
391 |
else:
|
392 |
actual_text = text_to_speak
|
393 |
+
if not actual_text or not actual_text.strip():
|
394 |
+
return None, "خطا: لطفاً متنی را برای تبدیل به گفتار وارد کنید."
|
395 |
+
|
396 |
+
_log_tts(f"متن ورودی برای TTS (اولین 50 کاراکتر): '{actual_text[:50]}...'", logs_for_this_run)
|
397 |
+
_log_tts(f"تنظیمات: Speaker={speaker_voice}, Temp={temperature}, Prompt='{speech_prompt[:30]}...'", logs_for_this_run)
|
398 |
+
|
399 |
+
try:
|
400 |
+
final_audio_path, generation_status_msg = core_generate_audio(
|
401 |
+
actual_text, speech_prompt, speaker_voice, temperature, logs_for_this_run
|
402 |
+
)
|
403 |
+
|
404 |
+
if final_audio_path and generation_status_msg == "موفق":
|
405 |
+
status_message_ui = "✅ تبدیل متن به گفتار با موفقیت انجام شد."
|
406 |
+
_log_tts(status_message_ui, logs_for_this_run)
|
407 |
+
return final_audio_path, status_message_ui
|
408 |
+
elif final_audio_path and generation_status_msg != "موفق":
|
409 |
+
status_message_ui = f"⚠️ {generation_status_msg} فایل صوتی ممکن است ناقص باشد."
|
410 |
+
_log_tts(status_message_ui + f" Path: {final_audio_path}", logs_for_this_run)
|
411 |
+
return final_audio_path, status_message_ui
|
412 |
+
else: # No file path, error occurred
|
413 |
+
status_message_ui = f"❌ {generation_status_msg}"
|
414 |
+
_log_tts(status_message_ui, logs_for_this_run)
|
415 |
+
return None, status_message_ui
|
416 |
+
|
417 |
+
except Exception as e:
|
418 |
+
_log_tts(f"❌ خطای پیشبینی نشده در gradio_tts_interface: {e}\n{traceback.format_exc()}", logs_for_this_run)
|
419 |
+
return None, f"خطای داخلی سرویس: {type(e).__name__}"
|
420 |
+
# --- END: TTS Core Logic ---
|
421 |
|
422 |
+
|
423 |
+
# --- START: بخش UI و Gradio ---
|
424 |
+
FLY_PRIMARY_COLOR_HEX = "#4F46E5"
|
425 |
+
FLY_SECONDARY_COLOR_HEX = "#10B981"
|
426 |
+
FLY_ACCENT_COLOR_HEX = "#D97706"
|
427 |
+
FLY_TEXT_COLOR_HEX = "#1F2937"
|
428 |
+
FLY_SUBTLE_TEXT_HEX = "#6B7280"
|
429 |
+
FLY_LIGHT_BACKGROUND_HEX = "#F9FAFB"
|
430 |
+
FLY_WHITE_HEX = "#FFFFFF"
|
431 |
+
FLY_BORDER_COLOR_HEX = "#D1D5DB"
|
432 |
+
FLY_INPUT_BG_HEX_SIMPLE = "#F3F4F6"
|
433 |
+
FLY_PANEL_BG_SIMPLE = "#E0F2FE"
|
434 |
+
|
435 |
+
app_theme_outer = gr.themes.Base(
|
436 |
+
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
|
437 |
+
).set(
|
438 |
+
body_background_fill=FLY_LIGHT_BACKGROUND_HEX,
|
439 |
+
)
|
440 |
+
|
441 |
+
custom_css = f"""
|
442 |
+
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@300;400;500;600;700;800&display=swap');
|
443 |
+
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700;800&display=swap');
|
444 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
|
445 |
:root {{
|
446 |
+
--fly-primary: {FLY_PRIMARY_COLOR_HEX}; --fly-secondary: {FLY_SECONDARY_COLOR_HEX};
|
447 |
+
--fly-accent: {FLY_ACCENT_COLOR_HEX}; --fly-text-primary: {FLY_TEXT_COLOR_HEX};
|
448 |
+
--fly-text-secondary: {FLY_SUBTLE_TEXT_HEX}; --fly-bg-light: {FLY_LIGHT_BACKGROUND_HEX};
|
449 |
+
--fly-bg-white: {FLY_WHITE_HEX}; --fly-border-color: {FLY_BORDER_COLOR_HEX};
|
450 |
+
--fly-input-bg-simple: {FLY_INPUT_BG_HEX_SIMPLE}; --fly-panel-bg-simple: {FLY_PANEL_BG_SIMPLE};
|
451 |
+
--font-global: 'Vazirmatn', 'Inter', 'Poppins', system-ui, sans-serif;
|
452 |
+
--font-english: 'Poppins', 'Inter', system-ui, sans-serif;
|
453 |
+
--radius-sm: 0.375rem; --radius-md: 0.5rem; --radius-lg: 0.75rem; --radius-xl: 1rem; --radius-full: 9999px;
|
454 |
+
--shadow-sm: 0 1px 2px 0 rgba(0,0,0,0.05); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1),0 2px 4px -2px rgba(0,0,0,0.1);
|
455 |
+
--shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1),0 4px 6px -4px rgba(0,0,0,0.1);
|
456 |
+
--shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1),0 8px 10px -6px rgba(0,0,0,0.1);
|
457 |
+
--fly-primary-rgb: 79,70,229; --fly-accent-rgb: 217,119,6;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
}}
|
459 |
+
body {{font-family:var(--font-global);direction:rtl;background-color:var(--fly-bg-light);color:var(--fly-text-primary);line-height:1.7;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-size:16px;}}
|
460 |
+
.gradio-container {{max-width:100% !important;width:100% !important;min-height:100vh;margin:0 auto !important;padding:0 !important;border-radius:0 !important;box-shadow:none !important;background:linear-gradient(170deg, #E0F2FE 0%, #F3E8FF 100%);display:flex;flex-direction:column;}}
|
461 |
+
.app-title-card {{text-align:center;padding:2.5rem 1rem;margin:0;background:linear-gradient(135deg,var(--fly-primary) 0%,var(--fly-secondary) 100%);color:var(--fly-bg-white);border-bottom-left-radius:var(--radius-xl);border-bottom-right-radius:var(--radius-xl);box-shadow:var(--shadow-lg);position:relative;overflow:hidden;}}
|
462 |
+
.app-title-card::before {{content:'';position:absolute;top:-50px;right:-50px;width:150px;height:150px;background:rgba(255,255,255,0.1);border-radius:var(--radius-full);opacity:0.5;transform:rotate(45deg);}}
|
463 |
+
.app-title-card h1 {{font-size:2.25em !important;font-weight:800 !important;margin:0 0 0.5rem 0;font-family:var(--font-english);letter-spacing:-0.5px;text-shadow:0 2px 4px rgba(0,0,0,0.1);}}
|
464 |
+
.app-title-card p {{font-size:1em !important;margin-top:0.25rem;font-weight:400;color:rgba(255,255,255,0.85) !important;}}
|
465 |
+
.app-footer-fly {{text-align:center;font-size:0.85em;color:var(--fly-text-secondary);margin-top:2.5rem;padding:1rem 0;background-color:rgba(255,255,255,0.3);backdrop-filter:blur(5px);border-top:1px solid var(--fly-border-color);}}
|
466 |
+
footer,.gradio-footer,.flagging-container,.flex.row.gap-2.absolute.bottom-2.right-2.gr-compact.gr-box.gr-text-gray-500,div[data-testid="flag"],button[title="Flag"],button[aria-label="Flag"],.footer-utils {{display:none !important;visibility:hidden !important;}}
|
467 |
+
.main-content-area {{flex-grow:1;padding:0.75rem;width:100%;margin:0 auto;box-sizing:border-box;}}
|
468 |
+
.content-panel-simple {{background-color:var(--fly-bg-white);padding:1rem;border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);margin-top:-2rem;position:relative;z-index:10;margin-bottom:2rem;width:100%;box-sizing:border-box;}}
|
469 |
+
.content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{background:var(--fly-accent) !important;margin-top:1rem !important;padding:12px 20px !important;transition:all 0.25s ease-in-out !important;color:white !important;font-weight:600 !important;border-radius:10px !important;border:none !important;box-shadow:0 3px 8px -1px rgba(var(--fly-accent-rgb),0.3) !important;width:100% !important;font-size:1em !important;display:flex;align-items:center;justify-content:center;}}
|
470 |
+
.content-panel-simple .gr-button.lg.primary:hover,.content-panel-simple button[variant="primary"]:hover {{background:#B45309 !important;transform:translateY(-1px) !important;box-shadow:0 5px 10px -1px rgba(var(--fly-accent-rgb),0.4) !important;}}
|
471 |
+
.content-panel-simple .gr-input > label + div > textarea,.content-panel-simple .gr-dropdown > label + div > div > input,.content-panel-simple .gr-dropdown > label + div > div > select,.content-panel-simple .gr-textbox > label + div > textarea, .content-panel-simple .gr-file > label + div {{border-radius:8px !important;border:1.5px solid var(--fly-border-color) !important;font-size:0.95em !important;background-color:var(--fly-input-bg-simple) !important;padding:10px 12px !important;color:var(--fly-text-primary) !important;}}
|
472 |
+
.content-panel-simple .gr-input > label + div > textarea:focus,.content-panel-simple .gr-dropdown > label + div > div > input:focus,.content-panel-simple .gr-dropdown > label + div > div > select:focus,.content-panel-simple .gr-textbox > label + div > textarea:focus, .content-panel-simple .gr-file > label + div:focus-within {{border-color:var(--fly-primary) !important;box-shadow:0 0 0 3px rgba(var(--fly-primary-rgb),0.12) !important;background-color:var(--fly-bg-white) !important;}}
|
473 |
+
.content-panel-simple .gr-file > label + div {{ text-align:center; border-style: dashed !important; }}
|
474 |
+
.content-panel-simple .gr-dropdown select {{font-family:var(--font-global) !important;width:100%;cursor:pointer;}}
|
475 |
+
.content-panel-simple .gr-textbox[label*="وضعیت"] > label + div > textarea {{background-color:var(--fly-panel-bg-simple) !important;border-color:#A5D5FE !important;min-height:auto !important; /* Adjusted for single line */ font-family:var(--font-global);font-size:0.9em !important;line-height:1.5;padding:8px 10px !important;}}
|
476 |
+
.content-panel-simple .gr-panel,.content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > .gr-panel {{border-radius:8px !important;border:1px solid var(--fly-border-color) !important;background-color:var(--fly-input-bg-simple) !important;padding:0.8rem 1rem !important;margin-top:0.6rem;box-shadow:none;}}
|
477 |
+
.content-panel-simple div[label*="تنظیمات پیشرفته"] > .gr-accordion > button.gr-button {{font-weight:500 !important;padding:8px 10px !important;border-radius:6px !important;background-color:#E5E7EB !important;color:var(--fly-text-primary) !important;border:1px solid #D1D5DB !important;}}
|
478 |
+
.content-panel-simple label > span.label-text {{font-weight:500 !important;color:#4B5563 !important;font-size:0.88em !important;margin-bottom:6px !important;display:inline-block;}}
|
479 |
+
.content-panel-simple .gr-slider label span {{font-size:0.82em !important;color:var(--fly-text-secondary);}}
|
480 |
+
.temp-description-tts {{ font-size: 0.82em !important; color: var(--fly-text-secondary) !important; margin-top: -0.5rem; margin-bottom: 1rem; padding-right: 5px; }}
|
481 |
+
.content-panel-simple div[label*="نمونه"] {{margin-top:1.5rem;}}
|
482 |
+
.content-panel-simple div[label*="نمونه"] .gr-button.gr-button-tool,.content-panel-simple div[label*="نمونه"] .gr-sample-button {{background-color:#E0E7FF !important;color:var(--fly-primary) !important;border-radius:6px !important;font-size:0.78em !important;padding:4px 8px !important;}}
|
483 |
+
.content-panel-simple .custom-hr {{height:1px;background-color:var(--fly-border-color);margin:1.5rem 0;border:none;}}
|
484 |
+
.api-warning-message {{background-color:#FFFBEB !important;color:#92400E !important;padding:10px 12px !important;border-radius:8px !important;border:1px solid #FDE68A !important;text-align:center !important;margin:0 0.2rem 1rem 0.2rem !important;font-size:0.85em !important;}}
|
485 |
+
.content-panel-simple #output_audio_tts audio {{ width: 100%; border-radius: var(--radius-md); margin-top:0.5rem; }}
|
486 |
+
@media (min-width:640px) {{.main-content-area {{padding:1.5rem;max-width:700px;}} .content-panel-simple {{padding:1.5rem;}} .app-title-card h1 {{font-size:2.5em !important;}} .app-title-card p {{font-size:1.05em !important;}} }}
|
487 |
+
@media (min-width:768px) {{
|
488 |
+
.main-content-area {{max-width:780px;}} .content-panel-simple {{padding:2rem;}}
|
489 |
+
.content-panel-simple .main-content-row {{display:flex !important;flex-direction:row !important;gap:1.5rem !important;}}
|
490 |
+
.content-panel-simple .main-content-row > .gr-column:nth-child(1) {{flex-basis:60%; min-width:0;}}
|
491 |
+
.content-panel-simple .main-content-row > .gr-column:nth-child(2) {{flex-basis:40%; min-width:0;}}
|
492 |
+
.content-panel-simple .gr-button.lg.primary,.content-panel-simple button[variant="primary"] {{width:auto !important;align-self:flex-start;}}
|
493 |
+
.app-title-card h1 {{font-size:2.75em !important;}} .app-title-card p {{font-size:1.1em !important;}}
|
494 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
"""
|
496 |
+
logging.info(f"Gradio version: {gr.__version__}")
|
497 |
+
if not PYDUB_AVAILABLE:
|
498 |
+
logging.warning("Pydub (برای ادغام فایلهای صوتی) یافت نشد. لطفاً با `pip install pydub` نصب کنید. در غیر این صورت، فقط اولین قطعه صوتی ارائه خواهد شد.")
|
499 |
|
500 |
+
with gr.Blocks(theme=app_theme_outer, css=custom_css, title="آلفا TTS") as demo:
|
501 |
+
gr.HTML(f"""
|
502 |
+
<div class="app-title-card">
|
503 |
+
<h1>🚀 Alpha TTS</h1>
|
504 |
+
<p>جادوی تبدیل متن به صدا با هوش مصنوعی Gemini</p>
|
505 |
+
</div>
|
506 |
+
""")
|
507 |
+
|
508 |
+
with gr.Column(elem_classes=["main-content-area"]):
|
509 |
+
with gr.Group(elem_classes=["content-panel-simple"]):
|
510 |
+
if NUM_GEMINI_KEYS == 0:
|
511 |
+
missing_key_msg = (
|
512 |
+
"⚠️ هشدار: قابلیت تبدیل متن به گفتار غیرفعال است. "
|
513 |
+
"هیچ کلید API جیمینای (با فرمت GEMINI_API_KEY_1, ...) "
|
514 |
+
"در بخش Secrets این Space یافت نشد. "
|
515 |
+
"لطفاً حداقل یک کلید با نام GEMINI_API_KEY_1 تنظیم کنید."
|
516 |
+
)
|
517 |
+
gr.Markdown(f"<div class='api-warning-message'>{missing_key_msg}</div>")
|
518 |
+
|
519 |
+
status_message_output = gr.Textbox(label="وضعیت پردازش", interactive=False, lines=1, placeholder="پیامهای وضعیت اینجا نمایش داده میشوند...")
|
520 |
|
521 |
+
with gr.Row(elem_classes=["main-content-row"]):
|
522 |
+
with gr.Column(scale=3):
|
523 |
+
use_file_input_cb = gr.Checkbox(label="📄 استفاده از فایل متنی (.txt)", value=False)
|
524 |
+
uploaded_file_input = gr.File(
|
525 |
+
label="آپلود فایل متنی",
|
526 |
+
file_types=['.txt'],
|
527 |
+
visible=False
|
528 |
+
)
|
529 |
+
text_to_speak_tb = gr.Textbox(
|
530 |
+
label="📝 متن فارسی برای تبدیل به گفتار",
|
531 |
+
placeholder="مثال: سلام، به پروژه آلفا خوش آمدید.",
|
532 |
+
lines=5,
|
533 |
+
value=""
|
534 |
+
)
|
535 |
+
speech_prompt_tb = gr.Textbox(
|
536 |
+
label="🗣️ سبک و زمینه گفتار (اختیاری)",
|
537 |
+
placeholder="مثال: با لحنی شاد و پرانرژی",
|
538 |
+
value="با لحنی دوستانه و رسا صحبت کن.",
|
539 |
+
lines=2
|
540 |
+
)
|
541 |
+
with gr.Column(scale=2):
|
542 |
+
speaker_voice_dd = gr.Dropdown(
|
543 |
+
SPEAKER_VOICES,
|
544 |
+
label="🎤 انتخاب گوینده (توجه: ممکن است فعلا اعمال نشود)", # Added a note
|
545 |
+
value="Charon"
|
546 |
+
)
|
547 |
+
temperature_slider = gr.Slider(
|
548 |
+
minimum=0.1, maximum=1.5, step=0.05, value=0.9,
|
549 |
+
label="🌡️ میزان خلاقیت صدا (دما)"
|
550 |
+
)
|
551 |
+
gr.Markdown("<p class='temp-description-tts'>مقادیر بالاتر = تنوع بیشتر، مقادیر پایینتر = یکنواختی بیشتر.</p>", elem_classes=["temp-description-tts-container"])
|
552 |
+
output_audio = gr.Audio(label="🎧 فایل صوتی خروجی", type="filepath", elem_id="output_audio_tts")
|
553 |
+
|
554 |
+
generate_button = gr.Button("🚀 تولید و پخش صدا", variant="primary", elem_classes=["lg"])
|
555 |
+
gr.HTML("<hr class='custom-hr'>")
|
556 |
+
|
557 |
+
gr.Examples(
|
558 |
+
examples=[
|
559 |
+
[False, None, "سلام بر شما، امیدوارم روز خوبی داشته باشید. این یک نمونه صدای تولید شده توسط آلفا است.", "با لحنی گرم و صمیمی.", "Zephyr", 0.85],
|
560 |
+
[False, None, "این یک آزمایش برای بررسی کیفیت صدای تولید شده توسط هوش مصنوعی پیشرفته جیمینای است.", "با صدایی طبیعی، روان و کمی رسمی.", "Charon", 0.9],
|
561 |
+
[False, None, "آیا میتوانم یک پیتزای پپرونی سفارش دهم؟", "پرسشی و مودبانه.", "Achird", 0.75],
|
562 |
+
],
|
563 |
+
inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
|
564 |
+
outputs=[output_audio, status_message_output],
|
565 |
+
fn=gradio_tts_interface,
|
566 |
+
cache_examples=os.getenv("GRADIO_CACHE_EXAMPLES", "False").lower() == "true",
|
567 |
+
label="💡 نمونههای کاربردی"
|
568 |
+
)
|
569 |
+
|
570 |
+
gr.Markdown("<p class='app-footer-fly'>Alpha TTS © 2024</p>")
|
571 |
+
|
572 |
+
def toggle_file_input(use_file):
|
573 |
+
if use_file:
|
574 |
+
return gr.update(visible=True, label=" "), gr.update(visible=False)
|
575 |
+
else:
|
576 |
+
return gr.update(visible=False), gr.update(visible=True, label="📝 متن فارسی برای تبدیل به گفتار")
|
577 |
+
|
578 |
+
use_file_input_cb.change(
|
579 |
+
fn=toggle_file_input,
|
580 |
+
inputs=use_file_input_cb,
|
581 |
+
outputs=[uploaded_file_input, text_to_speak_tb]
|
582 |
+
)
|
583 |
+
|
584 |
+
if generate_button is not None:
|
585 |
+
generate_button.click(
|
586 |
fn=gradio_tts_interface,
|
587 |
+
inputs=[use_file_input_cb, uploaded_file_input, text_to_speak_tb, speech_prompt_tb, speaker_voice_dd, temperature_slider],
|
588 |
+
outputs=[output_audio, status_message_output]
|
589 |
)
|
590 |
+
else:
|
591 |
+
logging.error("دکمه تولید صدا (generate_button) به درستی مقداردهی اولیه نشده است.")
|
592 |
|
593 |
if __name__ == "__main__":
|
594 |
+
if os.getenv("AUTO_RESTART_ENABLED", "true").lower() == "true":
|
595 |
+
restart_scheduler_thread = threading.Thread(target=auto_restart_service, daemon=True)
|
596 |
+
restart_scheduler_thread.start()
|
597 |
+
|
598 |
+
demo.launch(
|
599 |
+
server_name="0.0.0.0",
|
600 |
+
server_port=int(os.getenv("PORT", 7860)),
|
601 |
+
debug=os.environ.get("GRADIO_DEBUG", "False").lower() == "true",
|
602 |
+
show_error=True
|
603 |
+
)
|