Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -82,6 +82,9 @@ class ConversationConfig:
|
|
82 |
# ์๋ก์ด ๋ก์ปฌ ๋ชจ๋ธ ์ค์
|
83 |
local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
84 |
local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
|
|
|
|
|
|
|
85 |
|
86 |
|
87 |
class UnifiedAudioConverter:
|
@@ -227,8 +230,6 @@ class UnifiedAudioConverter:
|
|
227 |
return text
|
228 |
except Exception as e:
|
229 |
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
230 |
-
|
231 |
-
|
232 |
|
233 |
def _get_messages_formatter_type(self, model_name):
|
234 |
"""Get appropriate message formatter for the model"""
|
@@ -240,41 +241,77 @@ class UnifiedAudioConverter:
|
|
240 |
def _build_prompt(self, text: str, language: str = "English") -> str:
|
241 |
"""Build prompt for conversation generation"""
|
242 |
if language == "Korean":
|
|
|
243 |
template = """
|
244 |
{
|
245 |
"conversation": [
|
246 |
-
{"speaker": "", "text": ""},
|
247 |
-
{"speaker": "", "text": ""}
|
|
|
|
|
248 |
]
|
249 |
}
|
250 |
"""
|
251 |
return (
|
252 |
-
f"{text}\n\n
|
253 |
-
f"
|
254 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
)
|
256 |
else:
|
257 |
template = """
|
258 |
{
|
259 |
"conversation": [
|
260 |
-
{"speaker": "", "text": ""},
|
261 |
-
{"speaker": "", "text": ""}
|
|
|
|
|
262 |
]
|
263 |
}
|
264 |
"""
|
265 |
return (
|
266 |
-
f"{text}\n\
|
267 |
-
f"podcast conversation between two experts
|
268 |
-
f"
|
269 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
)
|
271 |
|
272 |
def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
|
273 |
"""Build messages for local LLM"""
|
274 |
if language == "Korean":
|
275 |
-
system_message =
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
else:
|
277 |
-
system_message =
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
return [
|
280 |
{"role": "system", "content": system_message},
|
@@ -291,11 +328,24 @@ class UnifiedAudioConverter:
|
|
291 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
292 |
provider = LlamaCppPythonProvider(self.local_llm)
|
293 |
|
294 |
-
# ์ธ์ด๋ณ ์์คํ
๋ฉ์์ง
|
295 |
if language == "Korean":
|
296 |
-
system_message =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
else:
|
298 |
-
system_message =
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
agent = LlamaCppAgent(
|
301 |
provider,
|
@@ -305,10 +355,10 @@ class UnifiedAudioConverter:
|
|
305 |
)
|
306 |
|
307 |
settings = provider.get_provider_default_settings()
|
308 |
-
settings.temperature = 0.
|
309 |
settings.top_k = 40
|
310 |
settings.top_p = 0.95
|
311 |
-
settings.max_tokens =
|
312 |
settings.repeat_penalty = 1.1
|
313 |
settings.stream = False
|
314 |
|
@@ -342,11 +392,18 @@ class UnifiedAudioConverter:
|
|
342 |
try:
|
343 |
self.initialize_legacy_local_mode()
|
344 |
|
345 |
-
# ์ธ์ด๋ณ ์์คํ
๋ฉ์์ง
|
346 |
if language == "Korean":
|
347 |
-
system_message =
|
|
|
|
|
|
|
|
|
348 |
else:
|
349 |
-
system_message =
|
|
|
|
|
|
|
350 |
|
351 |
chat = [
|
352 |
{"role": "system", "content": system_message},
|
@@ -370,7 +427,7 @@ class UnifiedAudioConverter:
|
|
370 |
generate_kwargs = dict(
|
371 |
model_inputs,
|
372 |
streamer=streamer,
|
373 |
-
max_new_tokens=
|
374 |
do_sample=True,
|
375 |
temperature=0.9,
|
376 |
eos_token_id=terminators,
|
@@ -393,19 +450,23 @@ class UnifiedAudioConverter:
|
|
393 |
|
394 |
except Exception as e:
|
395 |
print(f"Legacy local model also failed: {e}")
|
396 |
-
# Return default template
|
397 |
if language == "Korean":
|
398 |
return {
|
399 |
"conversation": [
|
400 |
-
{"speaker": "
|
401 |
-
{"speaker": "
|
|
|
|
|
402 |
]
|
403 |
}
|
404 |
else:
|
405 |
return {
|
406 |
"conversation": [
|
407 |
-
{"speaker": "
|
408 |
-
{"speaker": "
|
|
|
|
|
409 |
]
|
410 |
}
|
411 |
|
@@ -415,11 +476,20 @@ class UnifiedAudioConverter:
|
|
415 |
raise RuntimeError("API mode not initialized")
|
416 |
|
417 |
try:
|
418 |
-
# ์ธ์ด๋ณ ํ๋กฌํํธ ๊ตฌ์ฑ
|
419 |
if language == "Korean":
|
420 |
-
system_message =
|
|
|
|
|
|
|
|
|
|
|
421 |
else:
|
422 |
-
system_message =
|
|
|
|
|
|
|
|
|
423 |
|
424 |
chat_completion = self.llm_client.chat.completions.create(
|
425 |
messages=[
|
@@ -460,17 +530,16 @@ class UnifiedAudioConverter:
|
|
460 |
filenames = []
|
461 |
|
462 |
try:
|
463 |
-
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
464 |
if language == "Korean":
|
465 |
voices = [
|
466 |
-
"ko-KR-HyunsuNeural", # ๋จ์ฑ ์์ฑ (
|
467 |
-
"ko-KR-InJoonNeural"
|
468 |
-
|
469 |
]
|
470 |
else:
|
471 |
voices = [
|
472 |
-
"en-US-
|
473 |
-
"en-US-
|
474 |
]
|
475 |
|
476 |
for i, turn in enumerate(conversation_json["conversation"]):
|
@@ -522,13 +591,13 @@ class UnifiedAudioConverter:
|
|
522 |
# Create different voice characteristics for different speakers
|
523 |
if language == "Korean":
|
524 |
voice_configs = [
|
525 |
-
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์
|
526 |
-
{"prompt_text": "์๋
ํ์ธ์, ์ค๋
|
527 |
]
|
528 |
else:
|
529 |
voice_configs = [
|
530 |
-
{"prompt_text": "Hello,
|
531 |
-
{"prompt_text": "
|
532 |
]
|
533 |
|
534 |
for i, turn in enumerate(conversation_json["conversation"]):
|
@@ -835,6 +904,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
835 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
836 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
837 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
|
|
838 |
""")
|
839 |
|
840 |
with gr.Row():
|
@@ -898,6 +968,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
898 |
|
899 |
**ํ๊ตญ์ด ์ง์:**
|
900 |
- ๐ฐ๐ท ํ๊ตญ์ด ์ ํ ์ Edge-TTS๋ง ์ฌ์ฉ ๊ฐ๋ฅํฉ๋๋ค
|
|
|
901 |
""")
|
902 |
|
903 |
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
@@ -906,8 +977,8 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
906 |
with gr.Column():
|
907 |
conversation_output = gr.Textbox(
|
908 |
label="Generated Conversation (Editable) / ์์ฑ๋ ๋ํ (ํธ์ง ๊ฐ๋ฅ)",
|
909 |
-
lines=
|
910 |
-
max_lines=
|
911 |
interactive=True,
|
912 |
placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์์ฑ๋ ๋ํ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค. ์ค๋์ค ์์ฑ ์ ์ ํธ์งํ ์ ์์ต๋๋ค.",
|
913 |
info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ์์ ๋ฐ๋ผ ๋ํ๋ฅผ ํธ์งํ์ธ์. ํ์: 'ํ์ ์ด๋ฆ: ํ
์คํธ'"
|
|
|
82 |
# ์๋ก์ด ๋ก์ปฌ ๋ชจ๋ธ ์ค์
|
83 |
local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
84 |
local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
|
85 |
+
# ํ ํฐ ์ ์ฆ๊ฐ
|
86 |
+
max_tokens: int = 6000 # 2048์์ 6000์ผ๋ก ์ฆ๊ฐ
|
87 |
+
max_new_tokens: int = 8000 # 4000์์ 8000์ผ๋ก ์ฆ๊ฐ
|
88 |
|
89 |
|
90 |
class UnifiedAudioConverter:
|
|
|
230 |
return text
|
231 |
except Exception as e:
|
232 |
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
|
|
|
|
233 |
|
234 |
def _get_messages_formatter_type(self, model_name):
|
235 |
"""Get appropriate message formatter for the model"""
|
|
|
241 |
def _build_prompt(self, text: str, language: str = "English") -> str:
|
242 |
"""Build prompt for conversation generation"""
|
243 |
if language == "Korean":
|
244 |
+
# ๊ฐํ๋ ํ๊ตญ์ด ํ๋กฌํํธ
|
245 |
template = """
|
246 |
{
|
247 |
"conversation": [
|
248 |
+
{"speaker": "์ค์", "text": ""},
|
249 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
250 |
+
{"speaker": "์ค์", "text": ""},
|
251 |
+
{"speaker": "๋ฏผํธ", "text": ""}
|
252 |
]
|
253 |
}
|
254 |
"""
|
255 |
return (
|
256 |
+
f"{text}\n\n"
|
257 |
+
f"์ ๋ด์ฉ์ ๋ฐํ์ผ๋ก 30๋ ํ๊ตญ์ธ ๋ ๋ช
์ด ์งํํ๋ ์์ฐ์ค๋ฝ๊ณ ํฅ๋ฏธ๋ก์ด ํ๊ตญ์ด ํ์บ์คํธ ๋ํ๋ฅผ ๋ง๋ค์ด์ฃผ์ธ์.\n\n"
|
258 |
+
f"ํ์ ์ง์นจ:\n"
|
259 |
+
f"1. ์ค์(์งํ์): ์น๊ทผํ๊ณ ํธ๊ธฐ์ฌ ๋ง์ ์ฑ๊ฒฉ, ์ฒญ์ทจ์์ ๊ถ๊ธ์ฆ์ ๋๋ณ\n"
|
260 |
+
f"2. ๋ฏผํธ(์ ๋ฌธ๊ฐ): ํด๋น ์ฃผ์ ์ ๋ํ ๊น์ ์ง์์ ๊ฐ์ง ์ ๋ฌธ๊ฐ, ์ฝ๊ฒ ์ค๋ช
ํ๋ ๋ฅ๋ ฅ\n"
|
261 |
+
f"3. ํ๊ตญ์ธ์ด ์ค์ ๋ก ์ฌ์ฉํ๋ ์์ฐ์ค๋ฌ์ด ํํ๊ณผ ๊ฐํ์ฌ ์ฌ์ฉ ('์~', '๊ทธ๋ ๊ตฌ๋', '์~', '์ง์ง์?')\n"
|
262 |
+
f"4. ์ ์ ํ ์กด๋๋ง๊ณผ ํธ์ํ ๋ฐ๋ง์ ์์ด ์น๋ฐ๊ฐ ์กฐ์ฑ\n"
|
263 |
+
f"5. ํ๊ตญ ๋ฌธํ์ ์ผ์์ ๋ง๋ ๊ตฌ์ฒด์ ์ธ ์์์ ๋น์ ์ฌ์ฉ\n"
|
264 |
+
f"6. ๊ฐ ๋ํ๋ ์ถฉ๋ถํ ๊ธธ๊ณ ์์ธํ๊ฒ (์ต์ 3-4๋ฌธ์ฅ ์ด์)\n"
|
265 |
+
f"7. ์ ์ฒด ๋ํ๋ ์ต์ 10ํ ์ด์์ ์ฃผ๊ณ ๋ฐ๊ธฐ๋ก ๊ตฌ์ฑ\n"
|
266 |
+
f"8. ์ฒญ์ทจ์๊ฐ '๋๋ ๊ถ๊ธํ๋ ๋ด์ฉ์ด์ผ'๋ผ๊ณ ๊ณต๊ฐํ ์ ์๋ ์ง๋ฌธ ํฌํจ\n"
|
267 |
+
f"9. ํต์ฌ ์ ๋ณด๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ์ ๋ฌํ๋ฉด์๋ ์ง๋ฃจํ์ง ์๊ฒ ๊ตฌ์ฑ\n"
|
268 |
+
f"10. ๋ง๋ฌด๋ฆฌ๋ ํต์ฌ ๋ด์ฉ ์์ฝ๊ณผ ์ฒญ์ทจ์์๊ฒ ๋์์ด ๋๋ ์ค์ฉ์ ์กฐ์ธ\n\n"
|
269 |
+
f"๋ค์ JSON ํ์์ผ๋ก๋ง ๋ฐํํ์ธ์:\n{template}"
|
270 |
)
|
271 |
else:
|
272 |
template = """
|
273 |
{
|
274 |
"conversation": [
|
275 |
+
{"speaker": "Alex", "text": ""},
|
276 |
+
{"speaker": "Jordan", "text": ""},
|
277 |
+
{"speaker": "Alex", "text": ""},
|
278 |
+
{"speaker": "Jordan", "text": ""}
|
279 |
]
|
280 |
}
|
281 |
"""
|
282 |
return (
|
283 |
+
f"{text}\n\n"
|
284 |
+
f"Convert the provided text into an engaging, natural podcast conversation between two experts.\n\n"
|
285 |
+
f"Guidelines:\n"
|
286 |
+
f"1. Alex (Host): Curious, engaging personality representing audience questions\n"
|
287 |
+
f"2. Jordan (Expert): Knowledgeable but approachable, explains complex topics simply\n"
|
288 |
+
f"3. Use natural conversational English with appropriate reactions ('Wow', 'That's interesting', 'Really?')\n"
|
289 |
+
f"4. Include concrete examples and relatable analogies\n"
|
290 |
+
f"5. Each response should be substantial (minimum 3-4 sentences)\n"
|
291 |
+
f"6. Create at least 10 back-and-forth exchanges\n"
|
292 |
+
f"7. Address common questions and misconceptions\n"
|
293 |
+
f"8. Maintain an informative yet entertaining tone\n"
|
294 |
+
f"9. End with key takeaways and practical advice\n\n"
|
295 |
+
f"Return ONLY the JSON in this format:\n{template}"
|
296 |
)
|
297 |
|
298 |
def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
|
299 |
"""Build messages for local LLM"""
|
300 |
if language == "Korean":
|
301 |
+
system_message = (
|
302 |
+
"๋น์ ์ ํ๊ตญ ์ต๊ณ ์ ํ์บ์คํธ ๋๋ณธ ์๊ฐ์
๋๋ค. "
|
303 |
+
"ํ๊ตญ์ธ์ ์ ์์ ๋ฌธํ๋ฅผ ์๋ฒฝํ ์ดํดํ๊ณ , ์ฒญ์ทจ์๋ค์ด ๋๊น์ง ์ง์คํ ์ ์๋ "
|
304 |
+
"๋งค๋ ฅ์ ์ด๊ณ ์ ์ตํ ๋ํ๋ฅผ ๋ง๋ค์ด๋
๋๋ค. "
|
305 |
+
"์ค์ ํ๊ตญ์ธ๋ค์ด ์ผ์์์ ์ฌ์ฉํ๋ ์์ฐ์ค๋ฌ์ด ํํ๊ณผ "
|
306 |
+
"์ ์ ํ ๊ฐ์ ํํ์ ํตํด ์๋๊ฐ ์๋ ๋ํ๋ฅผ ๊ตฌ์ฑํฉ๋๋ค."
|
307 |
+
)
|
308 |
else:
|
309 |
+
system_message = (
|
310 |
+
"You are an expert podcast scriptwriter who creates engaging, "
|
311 |
+
"natural conversations that keep listeners hooked. "
|
312 |
+
"You understand how to balance information with entertainment, "
|
313 |
+
"using real conversational patterns and authentic reactions."
|
314 |
+
)
|
315 |
|
316 |
return [
|
317 |
{"role": "system", "content": system_message},
|
|
|
328 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
329 |
provider = LlamaCppPythonProvider(self.local_llm)
|
330 |
|
331 |
+
# ๊ฐํ๋ ์ธ์ด๋ณ ์์คํ
๋ฉ์์ง
|
332 |
if language == "Korean":
|
333 |
+
system_message = (
|
334 |
+
"๋น์ ์ ํ๊ตญ์ด ํ์บ์คํธ ์ ๋ฌธ ์๊ฐ์
๋๋ค. "
|
335 |
+
"ํ๊ตญ ์ฒญ์ทจ์๋ค์ ๋ฌธํ์ ๋งฅ๋ฝ๊ณผ ์ธ์ด์ ํน์ฑ์ ์๋ฒฝํ ์ดํดํ๊ณ , "
|
336 |
+
"์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๋๋ณธ์ ์์ฑํฉ๋๋ค. "
|
337 |
+
"์ค์ ํ๊ตญ์ธ์ด ๋ํํ๋ ๊ฒ์ฒ๋ผ ์์ฐ์ค๋ฌ์ด ํํ, ์ ์ ํ ๊ฐํ์ฌ, "
|
338 |
+
"๋ฌธํ์ ์ผ๋ก ์ ํฉํ ์์๋ฅผ ์ฌ์ฉํ์ฌ ์ฒญ์ทจ์๊ฐ ๊ณต๊ฐํ๊ณ ๋ชฐ์
ํ ์ ์๋ "
|
339 |
+
"๋ํ๋ฅผ ๋ง๋ค์ด์ฃผ์ธ์. JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์."
|
340 |
+
)
|
341 |
else:
|
342 |
+
system_message = (
|
343 |
+
"You are an expert podcast scriptwriter specializing in creating "
|
344 |
+
"engaging, natural conversations that captivate listeners. "
|
345 |
+
"You excel at transforming complex information into accessible, "
|
346 |
+
"entertaining dialogue while maintaining authenticity and educational value. "
|
347 |
+
"Respond only in JSON format."
|
348 |
+
)
|
349 |
|
350 |
agent = LlamaCppAgent(
|
351 |
provider,
|
|
|
355 |
)
|
356 |
|
357 |
settings = provider.get_provider_default_settings()
|
358 |
+
settings.temperature = 0.8 # ์ฝ๊ฐ ๋์ฌ์ ๋ ์์ฐ์ค๋ฌ์ด ๋ํ ์์ฑ
|
359 |
settings.top_k = 40
|
360 |
settings.top_p = 0.95
|
361 |
+
settings.max_tokens = self.config.max_tokens # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
362 |
settings.repeat_penalty = 1.1
|
363 |
settings.stream = False
|
364 |
|
|
|
392 |
try:
|
393 |
self.initialize_legacy_local_mode()
|
394 |
|
395 |
+
# ๊ฐํ๋ ์ธ์ด๋ณ ์์คํ
๋ฉ์์ง
|
396 |
if language == "Korean":
|
397 |
+
system_message = (
|
398 |
+
"๋น์ ์ ํ๊ตญ์ด ํ์บ์คํธ ์ ๋ฌธ ์๊ฐ์
๋๋ค. "
|
399 |
+
"30๋ ํ๊ตญ์ธ ์ฒญ์ทจ์๋ฅผ ๋์์ผ๋ก ์์ฐ์ค๋ฝ๊ณ ํฅ๋ฏธ๋ก์ด ๋ํ๋ฅผ ๋ง๋ค์ด์ฃผ์ธ์. "
|
400 |
+
"์ค์ ์ฌ์ฉํ๋ ํ๊ตญ์ด ํํ๊ณผ ๋ฌธํ์ ๋งฅ๋ฝ์ ๋ฐ์ํ์ฌ ์์ฑํด์ฃผ์ธ์."
|
401 |
+
)
|
402 |
else:
|
403 |
+
system_message = (
|
404 |
+
"You are an expert podcast scriptwriter. "
|
405 |
+
"Create natural, engaging conversations that inform and entertain listeners."
|
406 |
+
)
|
407 |
|
408 |
chat = [
|
409 |
{"role": "system", "content": system_message},
|
|
|
427 |
generate_kwargs = dict(
|
428 |
model_inputs,
|
429 |
streamer=streamer,
|
430 |
+
max_new_tokens=self.config.max_new_tokens, # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
431 |
do_sample=True,
|
432 |
temperature=0.9,
|
433 |
eos_token_id=terminators,
|
|
|
450 |
|
451 |
except Exception as e:
|
452 |
print(f"Legacy local model also failed: {e}")
|
453 |
+
# Return default template with Korean male names
|
454 |
if language == "Korean":
|
455 |
return {
|
456 |
"conversation": [
|
457 |
+
{"speaker": "์ค์", "text": "์๋
ํ์ธ์, ์ฌ๋ฌ๋ถ! ์ค๋๋ ์ ํฌ ํ์บ์คํธ๋ฅผ ์ฐพ์์ฃผ์
์ ์ ๋ง ๊ฐ์ฌํฉ๋๋ค."},
|
458 |
+
{"speaker": "๋ฏผํธ", "text": "์๋
ํ์ธ์! ์ค๋์ ์ ๋ง ํฅ๋ฏธ๋ก์ด ์ฃผ์ ๋ฅผ ์ค๋นํ๋๋ฐ์, ํจ๊ป ์ด์ผ๊ธฐ ๋๋ ๋ณด์์ฃ ."},
|
459 |
+
{"speaker": "์ค์", "text": "๋ค, ์ ๋ง ๊ธฐ๋๋๋๋ฐ์. ์ฒญ์ทจ์ ์ฌ๋ฌ๋ถ๋ค๋ ๊ถ๊ธํดํ์ค ๊ฒ ๊ฐ์์."},
|
460 |
+
{"speaker": "๋ฏผํธ", "text": "๋ง์์. ๊ทธ๋ผ ๋ณธ๊ฒฉ์ ์ผ๋ก ์์ํด๋ณผ๊น์?"}
|
461 |
]
|
462 |
}
|
463 |
else:
|
464 |
return {
|
465 |
"conversation": [
|
466 |
+
{"speaker": "Alex", "text": "Welcome everyone to our podcast! We have an fascinating topic to discuss today."},
|
467 |
+
{"speaker": "Jordan", "text": "Thanks for having me, Alex. I'm excited to dive into this subject with our listeners."},
|
468 |
+
{"speaker": "Alex", "text": "So let's get started. Can you give us an overview of what we'll be covering?"},
|
469 |
+
{"speaker": "Jordan", "text": "Absolutely! Today we'll explore some really interesting aspects that I think will surprise many people."}
|
470 |
]
|
471 |
}
|
472 |
|
|
|
476 |
raise RuntimeError("API mode not initialized")
|
477 |
|
478 |
try:
|
479 |
+
# ๊ฐํ๋ ์ธ์ด๋ณ ํ๋กฌํํธ ๊ตฌ์ฑ
|
480 |
if language == "Korean":
|
481 |
+
system_message = (
|
482 |
+
"๋น์ ์ ํ๊ตญ์ด ํ์บ์คํธ ์ ๋ฌธ ์๊ฐ์
๋๋ค. "
|
483 |
+
"ํ๊ตญ ์ฒญ์ทจ์๋ค์ ๋ฌธํ์ ๋งฅ๋ฝ๊ณผ ์ธ์ด์ ํน์ฑ์ ์๋ฒฝํ ์ดํดํ๊ณ , "
|
484 |
+
"์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๋๋ณธ์ ์์ฑํฉ๋๋ค. "
|
485 |
+
"์ค์(์งํ์)์ ๋ฏผํธ(์ ๋ฌธ๊ฐ)๋ผ๋ ๋ ๋ช
์ 30๋ ๋จ์ฑ์ด ๋ํํ๋ ํ์์ผ๋ก ์์ฑํ์ธ์."
|
486 |
+
)
|
487 |
else:
|
488 |
+
system_message = (
|
489 |
+
"You are an expert podcast scriptwriter who creates engaging, "
|
490 |
+
"natural conversations between Alex (host) and Jordan (expert). "
|
491 |
+
"Create informative yet entertaining dialogue that keeps listeners engaged."
|
492 |
+
)
|
493 |
|
494 |
chat_completion = self.llm_client.chat.completions.create(
|
495 |
messages=[
|
|
|
530 |
filenames = []
|
531 |
|
532 |
try:
|
533 |
+
# ์ธ์ด๋ณ ์์ฑ ์ค์ - ํ๊ตญ์ด๋ ๋ชจ๋ ๋จ์ฑ ์์ฑ
|
534 |
if language == "Korean":
|
535 |
voices = [
|
536 |
+
"ko-KR-HyunsuNeural", # ๋จ์ฑ ์์ฑ 1 (์ฐจ๋ถํ๊ณ ์ ๋ขฐ๊ฐ ์๋)
|
537 |
+
"ko-KR-InJoonNeural" # ๋จ์ฑ ์์ฑ 2 (ํ๊ธฐ์ฐจ๊ณ ์น๊ทผํ)
|
|
|
538 |
]
|
539 |
else:
|
540 |
voices = [
|
541 |
+
"en-US-AndrewMultilingualNeural", # ๋จ์ฑ ์์ฑ 1
|
542 |
+
"en-US-BrianMultilingualNeural" # ๋จ์ฑ ์์ฑ 2
|
543 |
]
|
544 |
|
545 |
for i, turn in enumerate(conversation_json["conversation"]):
|
|
|
591 |
# Create different voice characteristics for different speakers
|
592 |
if language == "Korean":
|
593 |
voice_configs = [
|
594 |
+
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์ค์์
๋๋ค. ์ฌ๋ฌ๋ถ๊ณผ ํจ๊ป ํฅ๋ฏธ๋ก์ด ์ด์ผ๊ธฐ๋ฅผ ๋๋ ๋ณด๊ฒ ์ต๋๋ค.", "gender": "male"},
|
595 |
+
{"prompt_text": "์๋
ํ์ธ์, ์ ๋ ์ค๋ ์ด ์ฃผ์ ์ ๋ํด ์ค๋ช
๋๋ฆด ๋ฏผํธ์
๋๋ค. ์ฝ๊ณ ์ฌ๋ฏธ์๊ฒ ์ค๋ช
๋๋ฆด๊ฒ์.", "gender": "male"}
|
596 |
]
|
597 |
else:
|
598 |
voice_configs = [
|
599 |
+
{"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast. Let's explore this fascinating topic together.", "gender": "male"},
|
600 |
+
{"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights on this subject with you all today.", "gender": "male"}
|
601 |
]
|
602 |
|
603 |
for i, turn in enumerate(conversation_json["conversation"]):
|
|
|
904 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
905 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
906 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
907 |
+
- **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
|
908 |
""")
|
909 |
|
910 |
with gr.Row():
|
|
|
968 |
|
969 |
**ํ๊ตญ์ด ์ง์:**
|
970 |
- ๐ฐ๐ท ํ๊ตญ์ด ์ ํ ์ Edge-TTS๋ง ์ฌ์ฉ ๊ฐ๋ฅํฉ๋๋ค
|
971 |
+
- ๐จโ๐จ ํ๊ตญ์ด ๋ํ๋ ์ค์(์งํ์)์ ๋ฏผํธ(์ ๋ฌธ๊ฐ) ๋ ๋จ์ฑ์ด ์งํํฉ๋๋ค
|
972 |
""")
|
973 |
|
974 |
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
|
|
977 |
with gr.Column():
|
978 |
conversation_output = gr.Textbox(
|
979 |
label="Generated Conversation (Editable) / ์์ฑ๋ ๋ํ (ํธ์ง ๊ฐ๋ฅ)",
|
980 |
+
lines=20, # ๋ ๊ธด ๋ํ๋ฅผ ์ํด ์ฆ๊ฐ
|
981 |
+
max_lines=40,
|
982 |
interactive=True,
|
983 |
placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์์ฑ๋ ๋ํ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค. ์ค๋์ค ์์ฑ ์ ์ ํธ์งํ ์ ์์ต๋๋ค.",
|
984 |
info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ์์ ๋ฐ๋ผ ๋ํ๋ฅผ ํธ์งํ์ธ์. ํ์: 'ํ์ ์ด๋ฆ: ํ
์คํธ'"
|