Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -327,23 +327,25 @@ def _build_prompt(self, text: str, language: str = "English", search_context: st
|
|
327 |
base_prompt = (
|
328 |
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
329 |
f"{context_part}"
|
330 |
-
f"์ ๋ด์ฉ์ผ๋ก
|
331 |
f"## ํ์ ์๊ตฌ์ฌํญ:\n"
|
332 |
f"1. **์ต์ 18ํ ์ด์์ ๋ํ ๊ตํ** (์ค์ 9ํ, ๋ฏผํธ 9ํ ์ด์)\n"
|
333 |
-
f"2. **๋ํ ์คํ์ผ**:
|
334 |
f"3. **ํ์ ์ญํ **:\n"
|
335 |
-
f" - ์ค์: ์งํ์ (
|
336 |
-
f" - ๋ฏผํธ: ์ ๋ฌธ๊ฐ (
|
337 |
-
f"4.
|
338 |
-
f" - ์ค์:
|
339 |
-
f" - ๋ฏผํธ:
|
340 |
-
f" -
|
|
|
341 |
f"5. **๋ด์ฉ ๊ตฌ์ฑ**:\n"
|
342 |
-
f" - ๋์
๋ถ (2-3ํ):
|
343 |
-
f" - ์ ๊ฐ๋ถ (
|
344 |
-
f" - ๋ง๋ฌด๋ฆฌ (3
|
345 |
-
f"6.
|
346 |
-
f"
|
|
|
347 |
)
|
348 |
|
349 |
return base_prompt
|
@@ -382,22 +384,24 @@ def _build_prompt(self, text: str, language: str = "English", search_context: st
|
|
382 |
base_prompt = (
|
383 |
f"# Content:\n{text}\n\n"
|
384 |
f"{context_part}"
|
385 |
-
f"Create a
|
386 |
f"## Requirements:\n"
|
387 |
f"1. **Minimum 18 conversation exchanges** (Alex 9+, Jordan 9+)\n"
|
388 |
-
f"2. **Style**:
|
389 |
f"3. **Roles**:\n"
|
390 |
-
f" - Alex: Host (
|
391 |
-
f" - Jordan: Expert (
|
392 |
-
f"4. **
|
393 |
-
f" - Alex:
|
394 |
-
f" - Jordan:
|
395 |
-
f" -
|
|
|
396 |
f"5. **Structure**:\n"
|
397 |
-
f" - Introduction (2-3 exchanges): Topic
|
398 |
-
f" - Main content (
|
399 |
-
f" - Conclusion (3
|
400 |
-
f"
|
|
|
401 |
)
|
402 |
|
403 |
return base_prompt
|
@@ -555,7 +559,7 @@ class UnifiedAudioConverter:
|
|
555 |
|
556 |
|
557 |
def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
|
558 |
-
"""Build prompt for conversation generation with enhanced
|
559 |
# ํ
์คํธ ๊ธธ์ด ์ ํ
|
560 |
max_text_length = 4500 if search_context else 6000
|
561 |
if len(text) > max_text_length:
|
@@ -589,19 +593,21 @@ class UnifiedAudioConverter:
|
|
589 |
base_prompt = (
|
590 |
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
591 |
f"{context_part}"
|
592 |
-
f"์ ๋ด์ฉ์ผ๋ก
|
593 |
f"## ํต์ฌ ์ง์นจ:\n"
|
594 |
-
f"1. **๋ํ ์คํ์ผ**:
|
595 |
f"2. **ํ์ ์ญํ **:\n"
|
596 |
-
f" - ์ค์: ์งํ์/ํธ์คํธ (
|
597 |
-
f" - ๋ฏผํธ: ์ ๋ฌธ๊ฐ (
|
598 |
-
f"3.
|
599 |
-
f" -
|
600 |
-
f" -
|
601 |
-
f" -
|
602 |
-
f"4.
|
603 |
-
f" -
|
604 |
-
f" -
|
|
|
|
|
605 |
f"5. **ํ์ ๊ท์น**: ์๋ก ์กด๋๋ง ์ฌ์ฉ, 12-15ํ ๋ํ ๊ตํ\n\n"
|
606 |
f"JSON ํ์์ผ๋ก๋ง ๋ฐํ:\n{template}"
|
607 |
)
|
@@ -636,19 +642,21 @@ class UnifiedAudioConverter:
|
|
636 |
base_prompt = (
|
637 |
f"# Content:\n{text}\n\n"
|
638 |
f"{context_part}"
|
639 |
-
f"Create a
|
640 |
f"## Key Guidelines:\n"
|
641 |
-
f"1. **Style**:
|
642 |
f"2. **Roles**:\n"
|
643 |
-
f" - Alex: Host (
|
644 |
-
f" - Jordan: Expert (
|
645 |
-
f"3. **
|
646 |
-
f" - Alex
|
647 |
-
f" - Jordan
|
648 |
-
f" -
|
649 |
-
f"4. **
|
650 |
-
f" -
|
651 |
-
f" -
|
|
|
|
|
652 |
f"5. **Length**: 12-15 exchanges total\n\n"
|
653 |
f"Return JSON only:\n{template}"
|
654 |
)
|
@@ -658,30 +666,30 @@ class UnifiedAudioConverter:
|
|
658 |
|
659 |
|
660 |
def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
|
661 |
-
"""Build messages for local LLM with enhanced
|
662 |
if language == "Korean":
|
663 |
system_message = (
|
664 |
-
"๋น์ ์ ํ๊ตญ ์ต๊ณ ์
|
665 |
-
"
|
666 |
"ํต์ฌ ์์น:\n"
|
667 |
-
"1.
|
668 |
-
"2. ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋
|
669 |
-
"3.
|
670 |
-
"4.
|
671 |
-
"5.
|
672 |
-
"6. ๋ฐ๋์ ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ๋ฉฐ,
|
673 |
)
|
674 |
else:
|
675 |
system_message = (
|
676 |
-
"You are an expert
|
677 |
-
"
|
678 |
"Key principles:\n"
|
679 |
-
"1. The host (Alex)
|
680 |
-
"2. The expert (Jordan)
|
681 |
-
"3.
|
682 |
-
"4.
|
683 |
-
"5.
|
684 |
-
"6.
|
685 |
)
|
686 |
|
687 |
return [
|
@@ -691,7 +699,7 @@ class UnifiedAudioConverter:
|
|
691 |
|
692 |
@spaces.GPU(duration=120)
|
693 |
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
694 |
-
"""Extract conversation using new local LLM with enhanced
|
695 |
try:
|
696 |
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ (ํค์๋ ๊ธฐ๋ฐ์ด ์๋ ๊ฒฝ์ฐ)
|
697 |
search_context = ""
|
@@ -711,29 +719,34 @@ class UnifiedAudioConverter:
|
|
711 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
712 |
provider = LlamaCppPythonProvider(self.local_llm)
|
713 |
|
714 |
-
# ๊ฐํ๋
|
715 |
if language == "Korean":
|
716 |
system_message = (
|
717 |
-
"๋น์ ์ ํ๊ตญ์
|
718 |
-
"์ฒญ์ทจ์๋ค์ด
|
719 |
"์์ฑ ๊ท์น:\n"
|
720 |
-
"1. ์งํ์(์ค์)๋
|
721 |
-
"2. ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋
|
722 |
-
"
|
723 |
-
"
|
724 |
-
"
|
725 |
-
"
|
726 |
-
"
|
|
|
|
|
727 |
)
|
728 |
else:
|
729 |
system_message = (
|
730 |
-
"You are a professional
|
731 |
-
"
|
732 |
"Writing rules:\n"
|
733 |
-
"1. Host (Alex)
|
734 |
-
"2. Expert (Jordan)
|
735 |
-
"
|
736 |
-
"
|
|
|
|
|
|
|
737 |
"5. Create 12-15 conversation exchanges\n"
|
738 |
"6. Respond only in JSON format"
|
739 |
)
|
@@ -746,7 +759,7 @@ class UnifiedAudioConverter:
|
|
746 |
)
|
747 |
|
748 |
settings = provider.get_provider_default_settings()
|
749 |
-
settings.temperature = 0.
|
750 |
settings.top_k = 40
|
751 |
settings.top_p = 0.95
|
752 |
settings.max_tokens = self.config.max_tokens # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
@@ -784,24 +797,24 @@ class UnifiedAudioConverter:
|
|
784 |
|
785 |
@spaces.GPU(duration=120)
|
786 |
def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
|
787 |
-
"""Extract conversation using legacy local model with enhanced style"""
|
788 |
try:
|
789 |
self.initialize_legacy_local_mode()
|
790 |
|
791 |
-
# ๊ฐํ๋
|
792 |
if language == "Korean":
|
793 |
system_message = (
|
794 |
-
"๋น์ ์
|
795 |
-
"์งํ์(์ค์)๋
|
796 |
-
"
|
797 |
-
"
|
798 |
)
|
799 |
else:
|
800 |
system_message = (
|
801 |
-
"You are a
|
802 |
-
"Create
|
803 |
-
"and the expert (Jordan) gives
|
804 |
-
"
|
805 |
)
|
806 |
|
807 |
chat = [
|
@@ -828,7 +841,7 @@ class UnifiedAudioConverter:
|
|
828 |
streamer=streamer,
|
829 |
max_new_tokens=self.config.max_new_tokens, # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
830 |
do_sample=True,
|
831 |
-
temperature=0.
|
832 |
eos_token_id=terminators,
|
833 |
)
|
834 |
|
@@ -856,45 +869,53 @@ class UnifiedAudioConverter:
|
|
856 |
return self._get_default_english_conversation()
|
857 |
|
858 |
def _get_default_korean_conversation(self) -> Dict:
|
859 |
-
"""๋
|
860 |
return {
|
861 |
"conversation": [
|
862 |
-
{"speaker": "์ค์", "text": "์๋
ํ์ธ์, ์ฌ๋ฌ๋ถ!
|
863 |
-
{"speaker": "๋ฏผํธ", "text": "๋ค,
|
864 |
-
{"speaker": "์ค์", "text": "
|
865 |
-
{"speaker": "๋ฏผํธ", "text": "
|
866 |
-
{"speaker": "์ค์", "text": "
|
867 |
-
{"speaker": "๋ฏผํธ", "text": "
|
868 |
-
{"speaker": "์ค์", "text": "
|
869 |
-
{"speaker": "๋ฏผํธ", "text": "
|
870 |
-
{"speaker": "์ค์", "text": "
|
871 |
-
{"speaker": "๋ฏผํธ", "text": "
|
872 |
-
{"speaker": "์ค์", "text": "
|
873 |
-
{"speaker": "๋ฏผํธ", "text": "
|
|
|
|
|
874 |
]
|
875 |
}
|
876 |
|
877 |
def _get_default_english_conversation(self) -> Dict:
|
878 |
-
"""Enhanced
|
879 |
return {
|
880 |
"conversation": [
|
881 |
-
{"speaker": "Alex", "text": "Welcome everyone to our podcast!
|
882 |
-
{"speaker": "Jordan", "text": "Thanks, Alex.
|
883 |
-
{"speaker": "Alex", "text": "
|
884 |
-
{"speaker": "Jordan", "text": "
|
885 |
-
{"speaker": "Alex", "text": "
|
886 |
-
{"speaker": "Jordan", "text": "Absolutely. Let me
|
887 |
-
{"speaker": "Alex", "text": "That
|
888 |
-
{"speaker": "Jordan", "text": "
|
889 |
-
{"speaker": "Alex", "text": "
|
890 |
-
{"speaker": "Jordan", "text": "
|
891 |
-
{"speaker": "Alex", "text": "
|
892 |
-
{"speaker": "Jordan", "text": "
|
|
|
|
|
|
|
|
|
|
|
|
|
893 |
]
|
894 |
}
|
895 |
|
896 |
def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
|
897 |
-
"""Extract conversation using API with enhanced
|
898 |
if not self.llm_client:
|
899 |
raise RuntimeError("API mode not initialized")
|
900 |
|
@@ -911,24 +932,26 @@ class UnifiedAudioConverter:
|
|
911 |
except Exception as e:
|
912 |
print(f"Search failed, continuing without context: {e}")
|
913 |
|
914 |
-
# ๊ฐํ๋
|
915 |
if language == "Korean":
|
916 |
system_message = (
|
917 |
-
"๋น์ ์ ํ๊ตญ์
|
918 |
-
"
|
919 |
-
"์ค์(์งํ์)๋
|
920 |
-
"๋ฏผํธ(์ ๋ฌธ๊ฐ)๋
|
921 |
-
"
|
922 |
-
"
|
|
|
923 |
)
|
924 |
else:
|
925 |
system_message = (
|
926 |
-
"You are a professional
|
927 |
-
"Create
|
928 |
-
"Alex (host)
|
929 |
-
"while Jordan (expert)
|
930 |
-
"
|
931 |
-
"
|
|
|
932 |
)
|
933 |
|
934 |
chat_completion = self.llm_client.chat.completions.create(
|
@@ -937,7 +960,7 @@ class UnifiedAudioConverter:
|
|
937 |
{"role": "user", "content": self._build_prompt(text, language, search_context)}
|
938 |
],
|
939 |
model=self.config.api_model_name,
|
940 |
-
temperature=0.
|
941 |
)
|
942 |
|
943 |
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
@@ -1227,7 +1250,8 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
|
|
1227 |
else: # API mode (now secondary)
|
1228 |
api_key = os.environ.get("TOGETHER_API_KEY")
|
1229 |
if not api_key:
|
1230 |
-
|
|
|
1231 |
conversation_json = converter.extract_conversation_local(text, language)
|
1232 |
else:
|
1233 |
try:
|
@@ -1342,19 +1366,20 @@ if LLAMA_CPP_AVAILABLE:
|
|
1342 |
|
1343 |
# Gradio Interface
|
1344 |
with gr.Blocks(theme='soft', title="AI Podcast Generator") as demo:
|
1345 |
-
gr.Markdown("# ๐๏ธ AI Podcast Generator")
|
1346 |
-
gr.Markdown("Convert any article, blog, PDF document, or topic into an engaging podcast conversation!")
|
1347 |
|
1348 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
1349 |
with gr.Row():
|
1350 |
gr.Markdown(f"""
|
1351 |
-
### ๐ค Enhanced Configuration:
|
1352 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
1353 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
1354 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
1355 |
-
- **Conversation
|
|
|
1356 |
- **Search**: {"โ
Brave Search Enabled" if BRAVE_KEY else "โ Brave Search Not Available - Set BSEARCH_API"}
|
1357 |
-
- **
|
1358 |
""")
|
1359 |
|
1360 |
with gr.Row():
|
@@ -1418,34 +1443,35 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator") as demo:
|
|
1418 |
)
|
1419 |
|
1420 |
gr.Markdown("""
|
1421 |
-
**๐ป
|
1422 |
-
-
|
1423 |
-
- Host asks
|
1424 |
-
- Expert
|
1425 |
-
-
|
|
|
1426 |
|
1427 |
**๐ Keyword Feature:**
|
1428 |
- Enter any topic to generate a podcast
|
1429 |
- Automatically searches latest information
|
1430 |
-
- Creates
|
1431 |
|
1432 |
-
**๐ฐ๐ท ํ๊ตญ์ด
|
1433 |
-
-
|
1434 |
-
- ์งํ์(์ค์)๊ฐ
|
1435 |
-
- ์ ๋ฌธ๊ฐ(๋ฏผํธ)๊ฐ
|
1436 |
-
-
|
1437 |
""")
|
1438 |
|
1439 |
-
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
1440 |
|
1441 |
with gr.Row():
|
1442 |
with gr.Column():
|
1443 |
conversation_output = gr.Textbox(
|
1444 |
-
label="Generated Conversation (Editable) / ์์ฑ๋
|
1445 |
-
lines=
|
1446 |
-
max_lines=
|
1447 |
interactive=True,
|
1448 |
-
placeholder="
|
1449 |
info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ์์ ๋ฐ๋ผ ๋ํ๋ฅผ ํธ์งํ์ธ์. ํ์: 'ํ์ ์ด๋ฆ: ํ
์คํธ'"
|
1450 |
)
|
1451 |
|
@@ -1456,7 +1482,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator") as demo:
|
|
1456 |
|
1457 |
with gr.Column():
|
1458 |
audio_output = gr.Audio(
|
1459 |
-
label="Podcast Audio / ํ์บ์คํธ ์ค๋์ค",
|
1460 |
type="filepath",
|
1461 |
interactive=False
|
1462 |
)
|
@@ -1472,9 +1498,9 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator") as demo:
|
|
1472 |
gr.Examples(
|
1473 |
examples=[
|
1474 |
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
1475 |
-
["", "Keyword", "Local", "Edge-TTS", "English"], #
|
1476 |
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
1477 |
-
["", "Keyword", "Local", "Edge-TTS", "Korean"], # Korean keyword
|
1478 |
],
|
1479 |
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
1480 |
outputs=[conversation_output, status_output],
|
@@ -1528,4 +1554,4 @@ if __name__ == "__main__":
|
|
1528 |
share=False,
|
1529 |
server_name="0.0.0.0",
|
1530 |
server_port=7860
|
1531 |
-
)
|
|
|
327 |
base_prompt = (
|
328 |
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
329 |
f"{context_part}"
|
330 |
+
f"์ ๋ด์ฉ์ผ๋ก ์ ๋ฌธ์ ์ด๊ณ ์ฌ์ธต์ ์ธ ๋ผ๋์ค ํ์บ์คํธ ๋๋ด ํ๋ก๊ทธ๋จ ๋๋ณธ์ ์์ฑํด์ฃผ์ธ์.\n\n"
|
331 |
f"## ํ์ ์๊ตฌ์ฌํญ:\n"
|
332 |
f"1. **์ต์ 18ํ ์ด์์ ๋ํ ๊ตํ** (์ค์ 9ํ, ๋ฏผํธ 9ํ ์ด์)\n"
|
333 |
+
f"2. **๋ํ ์คํ์ผ**: ์ ๋ฌธ์ ์ด๊ณ ๊น์ด ์๋ ํ์บ์คํธ ๋๋ด\n"
|
334 |
f"3. **ํ์ ์ญํ **:\n"
|
335 |
+
f" - ์ค์: ์งํ์ (ํต์ฐฐ๋ ฅ ์๋ ์ง๋ฌธ, ํต์ฌ ํฌ์ธํธ ์ ๋ฆฌ, ์ฒญ์ทจ์ ๊ด์ ๋๋ณ)\n"
|
336 |
+
f" - ๋ฏผํธ: ์ ๋ฌธ๊ฐ (์์ธํ๊ณ ์ ๋ฌธ์ ์ธ ์ค๋ช
, ๊ตฌ์ฒด์ ์์, ๋ฐ์ดํฐ ๊ธฐ๋ฐ ๋ถ์)\n"
|
337 |
+
f"4. **๋ต๋ณ ๊ท์น**:\n"
|
338 |
+
f" - ์ค์: 1-2๋ฌธ์ฅ์ ๋ช
ํํ ์ง๋ฌธ์ด๋ ์์ฝ\n"
|
339 |
+
f" - ๋ฏผํธ: **๋ฐ๋์ 2-4๋ฌธ์ฅ์ผ๋ก ์ถฉ์คํ๊ฒ ๋ต๋ณ** (ํต์ฌ ๊ฐ๋
์ค๋ช
+ ๋ถ์ฐ ์ค๋ช
+ ์์/๊ทผ๊ฑฐ)\n"
|
340 |
+
f" - ์ ๋ฌธ ์ฉ์ด๋ ์ฝ๊ฒ ํ์ด์ ์ค๋ช
\n"
|
341 |
+
f" - ๊ตฌ์ฒด์ ์ธ ์์น, ์ฌ๋ก, ์ฐ๊ตฌ ๊ฒฐ๊ณผ ์ธ์ฉ\n"
|
342 |
f"5. **๋ด์ฉ ๊ตฌ์ฑ**:\n"
|
343 |
+
f" - ๋์
๋ถ (2-3ํ): ์ฃผ์ ์ ์ค์์ฑ๊ณผ ๋ฐฐ๊ฒฝ ์ค๋ช
\n"
|
344 |
+
f" - ์ ๊ฐ๋ถ (12-14ํ): ํต์ฌ ๋ด์ฉ์ ๋ค๊ฐ๋๋ก ์ฌ์ธต ๋ถ์\n"
|
345 |
+
f" - ๋ง๋ฌด๋ฆฌ (2-3ํ): ํต์ฌ ์์ฝ๊ณผ ๋ฏธ๋ ์ ๋ง\n"
|
346 |
+
f"6. **์ ๋ฌธ์ฑ**: ํ์ ์ ๊ทผ๊ฑฐ์ ์ค๋ฌด์ ํต์ฐฐ์ ๊ท ํ์๊ฒ ํฌํจ\n"
|
347 |
+
f"7. **ํ์**: ์๋ก ์กด๋๋ง ์ฌ์ฉ, ์ฒญ์ทจ์๊ฐ ์ ๋ฌธ ์ง์์ ์ป์ ์ ์๋๋ก ์์ธํ ์ค๋ช
\n\n"
|
348 |
+
f"๋ฐ๋์ ์ JSON ํ์์ผ๋ก 18ํ ์ด์์ ์ ๋ฌธ์ ์ธ ๋ํ๋ฅผ ์์ฑํ์ธ์:\n{template}"
|
349 |
)
|
350 |
|
351 |
return base_prompt
|
|
|
384 |
base_prompt = (
|
385 |
f"# Content:\n{text}\n\n"
|
386 |
f"{context_part}"
|
387 |
+
f"Create a professional and in-depth podcast conversation.\n\n"
|
388 |
f"## Requirements:\n"
|
389 |
f"1. **Minimum 18 conversation exchanges** (Alex 9+, Jordan 9+)\n"
|
390 |
+
f"2. **Style**: Professional, insightful podcast discussion\n"
|
391 |
f"3. **Roles**:\n"
|
392 |
+
f" - Alex: Host (insightful questions, key point summaries, audience perspective)\n"
|
393 |
+
f" - Jordan: Expert (detailed explanations, concrete examples, data-driven analysis)\n"
|
394 |
+
f"4. **Response Rules**:\n"
|
395 |
+
f" - Alex: 1-2 sentence clear questions or summaries\n"
|
396 |
+
f" - Jordan: **Must answer in 2-4 sentences** (core concept + elaboration + example/evidence)\n"
|
397 |
+
f" - Explain technical terms clearly\n"
|
398 |
+
f" - Include specific data, cases, research findings\n"
|
399 |
f"5. **Structure**:\n"
|
400 |
+
f" - Introduction (2-3 exchanges): Topic importance and context\n"
|
401 |
+
f" - Main content (12-14 exchanges): Multi-angle deep analysis\n"
|
402 |
+
f" - Conclusion (2-3 exchanges): Key takeaways and future outlook\n"
|
403 |
+
f"6. **Expertise**: Balance academic rigor with practical insights\n\n"
|
404 |
+
f"Create exactly 18+ professional exchanges in this JSON format:\n{template}"
|
405 |
)
|
406 |
|
407 |
return base_prompt
|
|
|
559 |
|
560 |
|
561 |
def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
|
562 |
+
"""Build prompt for conversation generation with enhanced professional podcast style"""
|
563 |
# ํ
์คํธ ๊ธธ์ด ์ ํ
|
564 |
max_text_length = 4500 if search_context else 6000
|
565 |
if len(text) > max_text_length:
|
|
|
593 |
base_prompt = (
|
594 |
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
595 |
f"{context_part}"
|
596 |
+
f"์ ๋ด์ฉ์ผ๋ก ์ ๋ฌธ์ ์ด๊ณ ์ฌ์ธต์ ์ธ ํ์บ์คํธ ๋๋ด ํ๋ก๊ทธ๋จ ๋๋ณธ์ ์์ฑํด์ฃผ์ธ์.\n\n"
|
597 |
f"## ํต์ฌ ์ง์นจ:\n"
|
598 |
+
f"1. **๋ํ ์คํ์ผ**: ์ ๋ฌธ์ ์ด๋ฉด์๋ ์ดํดํ๊ธฐ ์ฌ์ด ํ์บ์คํธ ๋๋ด\n"
|
599 |
f"2. **ํ์ ์ญํ **:\n"
|
600 |
+
f" - ์ค์: ์งํ์/ํธ์คํธ (ํต์ฌ์ ์ง๋ ์ง๋ฌธ, ์ฒญ์ทจ์ ๊ด์ ์์ ๊ถ๊ธํ ์ ์ง๋ฌธ)\n"
|
601 |
+
f" - ๋ฏผํธ: ์ ๋ฌธ๊ฐ (๊น์ด ์๋ ์ค๋ช
, ๊ตฌ์ฒด์ ์ฌ๋ก์ ๋ฐ์ดํฐ ์ ์)\n"
|
602 |
+
f"3. **์ค์ํ ๋ต๋ณ ๊ท์น**:\n"
|
603 |
+
f" - ์ค์: 1-2๋ฌธ์ฅ์ ๋ช
ํํ ์ง๋ฌธ (\"๊ทธ๋ ๋ค๋ฉด ๊ตฌ์ฒด์ ์ผ๋ก ์ด๋ค ์๋ฏธ์ธ๊ฐ์?\", \"์ค์ ์ฌ๋ก๋ฅผ ๋ค์ด์ฃผ์๊ฒ ์ด์?\")\n"
|
604 |
+
f" - ๋ฏผํธ: **๋ฐ๋์ 2-4๋ฌธ์ฅ์ผ๋ก ์ถฉ์คํ ๋ต๋ณ** (๊ฐ๋
์ค๋ช
+ ๊ตฌ์ฒด์ ์ค๋ช
+ ์์๋ ํจ์)\n"
|
605 |
+
f" - ์: \"์ด๊ฒ์ ~๋ฅผ ์๋ฏธํฉ๋๋ค. ๊ตฌ์ฒด์ ์ผ๋ก ~ํ ์ธก๋ฉด์์ ์ค์ํ๋ฐ์. ์ค์ ๋ก ์ต๊ทผ ~ํ ์ฌ๋ก๊ฐ ์์๊ณ , ์ด๋ ~๋ฅผ ๋ณด์ฌ์ค๋๋ค.\"\n"
|
606 |
+
f"4. **์ ๋ฌธ์ฑ ์์**:\n"
|
607 |
+
f" - ํต๊ณ๋ ์ฐ๊ตฌ ๊ฒฐ๊ณผ ์ธ์ฉ\n"
|
608 |
+
f" - ์ค์ ์ฌ๋ก์ ์ผ์ด์ค ์คํฐ๋\n"
|
609 |
+
f" - ์ ๋ฌธ ์ฉ์ด๋ฅผ ์ฝ๊ฒ ํ์ด์ ์ค๋ช
\n"
|
610 |
+
f" - ๋ค์ํ ๊ด์ ๊ณผ ์๊ฐ ์ ์\n"
|
611 |
f"5. **ํ์ ๊ท์น**: ์๋ก ์กด๋๋ง ์ฌ์ฉ, 12-15ํ ๋ํ ๊ตํ\n\n"
|
612 |
f"JSON ํ์์ผ๋ก๋ง ๋ฐํ:\n{template}"
|
613 |
)
|
|
|
642 |
base_prompt = (
|
643 |
f"# Content:\n{text}\n\n"
|
644 |
f"{context_part}"
|
645 |
+
f"Create a professional and insightful podcast conversation.\n\n"
|
646 |
f"## Key Guidelines:\n"
|
647 |
+
f"1. **Style**: Professional yet accessible podcast discussion\n"
|
648 |
f"2. **Roles**:\n"
|
649 |
+
f" - Alex: Host (insightful questions, audience perspective)\n"
|
650 |
+
f" - Jordan: Expert (in-depth explanations, concrete examples and data)\n"
|
651 |
+
f"3. **Critical Response Rules**:\n"
|
652 |
+
f" - Alex: 1-2 sentence clear questions (\"Could you elaborate on that?\", \"What's a real-world example?\")\n"
|
653 |
+
f" - Jordan: **Must answer in 2-4 sentences** (concept + detailed explanation + example/implication)\n"
|
654 |
+
f" - Example: \"This refers to... Specifically, it's important because... For instance, recent studies show... This demonstrates...\"\n"
|
655 |
+
f"4. **Professional Elements**:\n"
|
656 |
+
f" - Cite statistics and research\n"
|
657 |
+
f" - Real cases and case studies\n"
|
658 |
+
f" - Explain technical terms clearly\n"
|
659 |
+
f" - Present multiple perspectives\n"
|
660 |
f"5. **Length**: 12-15 exchanges total\n\n"
|
661 |
f"Return JSON only:\n{template}"
|
662 |
)
|
|
|
666 |
|
667 |
|
668 |
def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
|
669 |
+
"""Build messages for local LLM with enhanced professional podcast style"""
|
670 |
if language == "Korean":
|
671 |
system_message = (
|
672 |
+
"๋น์ ์ ํ๊ตญ ์ต๊ณ ์ ์ ๋ฌธ ํ์บ์คํธ ์๊ฐ์
๋๋ค. "
|
673 |
+
"์ฒญ์ทจ์๋ค์ด ์ ๋ฌธ ์ง์์ ์ฝ๊ฒ ์ดํดํ ์ ์๋ ๊ณ ํ์ง ๋๋ด์ ๋ง๋ค์ด๋
๋๋ค.\n\n"
|
674 |
"ํต์ฌ ์์น:\n"
|
675 |
+
"1. ์งํ์(์ค์)๋ ํต์ฌ์ ์ง๋ ํต์ฐฐ๋ ฅ ์๋ ์ง๋ฌธ์ผ๋ก ๋ํ๋ฅผ ์ด๋์ด๊ฐ๋๋ค\n"
|
676 |
+
"2. ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋ ๋ฐ๋์ 2-4๋ฌธ์ฅ์ผ๋ก ๊น์ด ์๊ฒ ๋ต๋ณํฉ๋๋ค (๊ฐ๋
+์ค๋ช
+์์)\n"
|
677 |
+
"3. ๊ตฌ์ฒด์ ์ธ ๋ฐ์ดํฐ, ์ฐ๊ตฌ ๊ฒฐ๊ณผ, ์ค์ ์ฌ๋ก๋ฅผ ํฌํจํฉ๋๋ค\n"
|
678 |
+
"4. ์ ๋ฌธ ์ฉ์ด๋ ์ฝ๊ฒ ํ์ด์ ์ค๋ช
ํ๋, ์ ํ์ฑ์ ์ ์งํฉ๋๋ค\n"
|
679 |
+
"5. ๋ค์ํ ๊ด์ ์ ์ ์ํ์ฌ ๊ท ํ์กํ ์๊ฐ์ ์ ๊ณตํฉ๋๋ค\n"
|
680 |
+
"6. ๋ฐ๋์ ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ๋ฉฐ, ์ ๋ฌธ์ ์ด๋ฉด์๋ ์น๊ทผํ ํค์ ์ ์งํฉ๋๋ค"
|
681 |
)
|
682 |
else:
|
683 |
system_message = (
|
684 |
+
"You are an expert podcast scriptwriter who creates high-quality, "
|
685 |
+
"professional discussions that make complex topics accessible.\n\n"
|
686 |
"Key principles:\n"
|
687 |
+
"1. The host (Alex) asks insightful questions that drive the conversation\n"
|
688 |
+
"2. The expert (Jordan) MUST answer in 2-4 sentences (concept+explanation+example)\n"
|
689 |
+
"3. Include specific data, research findings, and real cases\n"
|
690 |
+
"4. Explain technical terms clearly while maintaining accuracy\n"
|
691 |
+
"5. Present multiple perspectives for balanced views\n"
|
692 |
+
"6. Maintain a professional yet approachable tone"
|
693 |
)
|
694 |
|
695 |
return [
|
|
|
699 |
|
700 |
@spaces.GPU(duration=120)
|
701 |
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
702 |
+
"""Extract conversation using new local LLM with enhanced professional style"""
|
703 |
try:
|
704 |
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ (ํค์๋ ๊ธฐ๋ฐ์ด ์๋ ๊ฒฝ์ฐ)
|
705 |
search_context = ""
|
|
|
719 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
720 |
provider = LlamaCppPythonProvider(self.local_llm)
|
721 |
|
722 |
+
# ๊ฐํ๋ ์ ๋ฌธ ํ์บ์คํธ ์คํ์ผ ์์คํ
๋ฉ์์ง
|
723 |
if language == "Korean":
|
724 |
system_message = (
|
725 |
+
"๋น์ ์ ํ๊ตญ์ ์ ๋ช
ํ์บ์คํธ ์ ๋ฌธ ์๊ฐ์
๋๋ค. "
|
726 |
+
"์ฒญ์ทจ์๋ค์ด ๊น์ด ์๋ ์ ๋ฌธ ์ง์์ ์ป์ ์ ์๋ ๊ณ ํ์ง ๋๋ด์ ๋ง๋ญ๋๋ค.\n\n"
|
727 |
"์์ฑ ๊ท์น:\n"
|
728 |
+
"1. ์งํ์(์ค์)๋ ํต์ฌ์ ์ง๋ 1-2๋ฌธ์ฅ ์ง๋ฌธ์ ํฉ๋๋ค\n"
|
729 |
+
"2. ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋ ๋ฐ๋์ 2-4๋ฌธ์ฅ์ผ๋ก ์ถฉ์คํ ๋ต๋ณํฉ๋๋ค:\n"
|
730 |
+
" - ์ฒซ ๋ฌธ์ฅ: ํต์ฌ ๊ฐ๋
์ค๋ช
\n"
|
731 |
+
" - ๋์งธ ๋ฌธ์ฅ: ๊ตฌ์ฒด์ ์ธ ์ค๋ช
์ด๋ ๋งฅ๋ฝ\n"
|
732 |
+
" - ์
์งธ-๋ท์งธ ๋ฌธ์ฅ: ์ค์ ์์, ๋ฐ์ดํฐ, ํจ์\n"
|
733 |
+
"3. ํต๊ณ, ์ฐ๊ตฌ ๊ฒฐ๊ณผ, ์ค์ ์ฌ๋ก๋ฅผ ์ ๊ทน ํ์ฉํ์ธ์\n"
|
734 |
+
"4. ์ ๋ฌธ์ฑ์ ์ ์งํ๋ฉด์๋ ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํ์ธ์\n"
|
735 |
+
"5. 12-15ํ์ ๋ํ ๊ตํ์ผ๋ก ๊ตฌ์ฑํ์ธ์\n"
|
736 |
+
"6. JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์"
|
737 |
)
|
738 |
else:
|
739 |
system_message = (
|
740 |
+
"You are a professional podcast scriptwriter creating high-quality, "
|
741 |
+
"insightful discussions that provide deep expertise to listeners.\n\n"
|
742 |
"Writing rules:\n"
|
743 |
+
"1. Host (Alex) asks focused 1-2 sentence questions\n"
|
744 |
+
"2. Expert (Jordan) MUST answer in 2-4 substantial sentences:\n"
|
745 |
+
" - First sentence: Core concept explanation\n"
|
746 |
+
" - Second sentence: Specific details or context\n"
|
747 |
+
" - Third-fourth sentences: Real examples, data, implications\n"
|
748 |
+
"3. Actively use statistics, research findings, real cases\n"
|
749 |
+
"4. Maintain expertise while keeping explanations accessible\n"
|
750 |
"5. Create 12-15 conversation exchanges\n"
|
751 |
"6. Respond only in JSON format"
|
752 |
)
|
|
|
759 |
)
|
760 |
|
761 |
settings = provider.get_provider_default_settings()
|
762 |
+
settings.temperature = 0.75 # ์ฝ๊ฐ ๋ฎ์ถฐ์ ๋ ์ผ๊ด๋ ์ ๋ฌธ์ ๋ต๋ณ
|
763 |
settings.top_k = 40
|
764 |
settings.top_p = 0.95
|
765 |
settings.max_tokens = self.config.max_tokens # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
|
|
797 |
|
798 |
@spaces.GPU(duration=120)
|
799 |
def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
|
800 |
+
"""Extract conversation using legacy local model with enhanced professional style"""
|
801 |
try:
|
802 |
self.initialize_legacy_local_mode()
|
803 |
|
804 |
+
# ๊ฐํ๋ ์ ๋ฌธ ํ์บ์คํธ ์คํ์ผ ์์คํ
๋ฉ์์ง
|
805 |
if language == "Korean":
|
806 |
system_message = (
|
807 |
+
"๋น์ ์ ์ ๋ฌธ ํ์บ์คํธ ์๊ฐ์
๋๋ค. "
|
808 |
+
"์งํ์(์ค์)๋ ํต์ฐฐ๋ ฅ ์๋ ์ง๋ฌธ์, ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋ 2-4๋ฌธ์ฅ์ ์์ธํ ๋ต๋ณ์ ํฉ๋๋ค. "
|
809 |
+
"๊ตฌ์ฒด์ ์ธ ๋ฐ์ดํฐ์ ์ฌ๋ก๋ฅผ ํฌํจํ์ฌ ์ ๋ฌธ์ ์ด๋ฉด์๋ ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํ์ธ์. "
|
810 |
+
"12-15ํ ๋ํ ๊ตํ์ผ๋ก ๊ตฌ์ฑํ์ธ์."
|
811 |
)
|
812 |
else:
|
813 |
system_message = (
|
814 |
+
"You are a professional podcast scriptwriter. "
|
815 |
+
"Create insightful dialogue where the host (Alex) asks focused questions "
|
816 |
+
"and the expert (Jordan) gives detailed 2-4 sentence answers. "
|
817 |
+
"Include specific data and examples. Create 12-15 exchanges."
|
818 |
)
|
819 |
|
820 |
chat = [
|
|
|
841 |
streamer=streamer,
|
842 |
max_new_tokens=self.config.max_new_tokens, # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
843 |
do_sample=True,
|
844 |
+
temperature=0.75,
|
845 |
eos_token_id=terminators,
|
846 |
)
|
847 |
|
|
|
869 |
return self._get_default_english_conversation()
|
870 |
|
871 |
def _get_default_korean_conversation(self) -> Dict:
|
872 |
+
"""๋ ์ ๋ฌธ์ ์ธ ๊ธฐ๋ณธ ํ๊ตญ์ด ๋ํ ํ
ํ๋ฆฟ"""
|
873 |
return {
|
874 |
"conversation": [
|
875 |
+
{"speaker": "์ค์", "text": "์๋
ํ์ธ์, ์ฌ๋ฌ๋ถ! ์ค๋์ ์ ๋ง ์ค์ํ๊ณ ํฅ๋ฏธ๋ก์ด ์ฃผ์ ๋ฅผ ๋ค๋ค๋ณด๋ ค๊ณ ํฉ๋๋ค. ๋ฏผํธ ๋ฐ์ฌ๋, ๋จผ์ ์ด ์ฃผ์ ๊ฐ ์ ์ง๊ธ ์ด๋ ๊ฒ ์ฃผ๋ชฉ๋ฐ๊ณ ์๋์ง ์ค๋ช
ํด์ฃผ์๊ฒ ์ด์?"},
|
876 |
+
{"speaker": "๋ฏผํธ", "text": "๋ค, ์๋
ํ์ธ์. ์ต๊ทผ ์ด ๋ถ์ผ์์ ํ๊ธฐ์ ์ธ ๋ฐ์ ์ด ์์์ต๋๋ค. ํนํ ์๋
MIT ์ฐ๊ตฌํ์ ๋ฐํ์ ๋ฐ๋ฅด๋ฉด, ์ด ๊ธฐ์ ์ ํจ์จ์ฑ์ด ๊ธฐ์กด ๋๋น 300% ํฅ์๋์๋ค๊ณ ํฉ๋๋ค. ์ด๋ ๋จ์ํ ๊ธฐ์ ์ ์ง๋ณด๋ฅผ ๋์ด์ ์ฐ๋ฆฌ ์ผ์์ํ์ ์ง์ ์ ์ธ ์ํฅ์ ๋ฏธ์น ์ ์๋ ๋ณํ์ธ๋ฐ์. ์ค์ ๋ก ๊ตฌ๊ธ๊ณผ ๋ง์ดํฌ๋ก์ํํธ ๊ฐ์ ๋น
ํ
ํฌ ๊ธฐ์
๋ค์ด ์ด๋ฏธ ์์ญ์ต ๋ฌ๋ฌ๋ฅผ ํฌ์ํ๊ณ ์์ต๋๋ค."},
|
877 |
+
{"speaker": "์ค์", "text": "์, 300% ํฅ์์ด๋ผ๋ ์ ๋ง ๋๋ผ์ด๋ฐ์. ๊ทธ๋ ๋ค๋ฉด ์ด๋ฐ ๊ธฐ์ ๋ฐ์ ์ด ์ผ๋ฐ์ธ๋ค์๊ฒ๋ ๊ตฌ์ฒด์ ์ผ๋ก ์ด๋ค ํํ์ ๊ฐ์ ธ๋ค์ค ์ ์์๊น์?"},
|
878 |
+
{"speaker": "๋ฏผํธ", "text": "๊ฐ์ฅ ์ง์ ์ ์ธ ํํ์ ๋น์ฉ ์ ๊ฐ๊ณผ ์ ๊ทผ์ฑ ํฅ์์
๋๋ค. ์๋ฅผ ๋ค์ด, ์ด์ ์๋ ์ ๋ฌธ๊ฐ๋ง ์ฌ์ฉํ ์ ์๋ ๊ณ ๊ธ ๊ธฐ๋ฅ๋ค์ด ์ด์ ๋ ์ค๋งํธํฐ ์ฑ์ผ๋ก๋ ๊ตฌํ ๊ฐ๋ฅํด์ก์ต๋๋ค. ๋งฅํจ์ง ๋ณด๊ณ ์์ ๋ฐ๋ฅด๋ฉด, 2025๋
๊น์ง ์ด ๊ธฐ์ ๋ก ์ธํด ์ ์ธ๊ณ์ ์ผ๋ก ์ฝ 2์กฐ ๋ฌ๋ฌ์ ๊ฒฝ์ ์ ๊ฐ์น๊ฐ ์ฐฝ์ถ๋ ๊ฒ์ผ๋ก ์์๋ฉ๋๋ค. ํนํ ์๋ฃ, ๊ต์ก, ๊ธ์ต ๋ถ์ผ์์ ํ์ ์ ์ธ ๋ณํ๊ฐ ์ผ์ด๋ ๊ฒ์ผ๋ก ๋ณด์
๋๋ค."},
|
879 |
+
{"speaker": "์ค์", "text": "2์กฐ ๋ฌ๋ฌ๋ผ๋ ์์ฒญ๋ ๊ท๋ชจ๋ค์. ์๋ฃ ๋ถ์ผ์์๋ ์ด๋ค ๋ณํ๊ฐ ์์๋๋์?"},
|
880 |
+
{"speaker": "๋ฏผํธ", "text": "์๋ฃ ๋ถ์ผ์ ๋ณํ๋ ์ ๋ง ํ๋ช
์ ์ผ ๊ฒ์ผ๋ก ์์๋ฉ๋๋ค. ์ด๋ฏธ ์คํ ํฌ๋ ๋ํ๋ณ์์์๋ ์ด ๊ธฐ์ ์ ํ์ฉํด ์ ์ง๋จ ์ ํ๋๋ฅผ 95%๊น์ง ๋์์ต๋๋ค. ๊ธฐ์กด์๋ ์๋ จ๋ ์์ฌ๋ ๋์น ์ ์๋ ๋ฏธ์ธํ ๋ณ๋ณ๋ค์ AI๊ฐ ๊ฐ์งํด๋ด๋ ๊ฒ์ด์ฃ . ๋ ๋๋ผ์ด ๊ฒ์ ์ด๋ฐ ์ง๋จ์ด ๋จ ๋ช ๋ถ ๋ง์ ์ด๋ค์ง๋ค๋ ์ ์
๋๋ค. WHO ์ถ์ฐ์ผ๋ก๋ ์ด ๊ธฐ์ ์ด ์ ์ธ๊ณ์ ์ผ๋ก ๋ณด๊ธ๋๋ฉด ์ฐ๊ฐ ์๋ฐฑ๋ง ๋ช
์ ์๋ช
์ ๊ตฌํ ์ ์์ ๊ฒ์ผ๋ก ์์ธกํ๊ณ ์์ต๋๋ค."},
|
881 |
+
{"speaker": "์ค์", "text": "์ ๋ง ์ธ์์ ์ด๋ค์. ํ์ง๋ง ์ด๋ฐ ๊ธ๊ฒฉํ ๊ธฐ์ ๋ฐ์ ์ ๋ํ ์ฐ๋ ค์ ๋ชฉ์๋ฆฌ๋ ์์ ๊ฒ ๊ฐ์๋ฐ์?"},
|
882 |
+
{"speaker": "๋ฏผํธ", "text": "๋ง์ต๋๋ค. ์ฃผ์ ์ฐ๋ ค์ฌํญ์ ํฌ๊ฒ ์ธ ๊ฐ์ง์
๋๋ค. ์ฒซ์งธ๋ ์ผ์๋ฆฌ ๋์ฒด ๋ฌธ์ ๋ก, ์ฅ์คํฌ๋ ๋ํ ์ฐ๊ตฌ์ ๋ฐ๋ฅด๋ฉด ํฅํ 20๋
๋ด์ ํ์ฌ ์ง์
์ 47%๊ฐ ์๋ํ๋ ์ํ์ด ์์ต๋๋ค. ๋์งธ๋ ํ๋ผ์ด๋ฒ์์ ๋ณด์ ๋ฌธ์ ์
๋๋ค. ์
์งธ๋ ๊ธฐ์ ๊ฒฉ์ฐจ๋ก ์ธํ ๋ถํ๋ฑ ์ฌํ์
๋๋ค. ํ์ง๋ง ์ญ์ฌ์ ์ผ๋ก ๋ณด๋ฉด ์๋ก์ด ๊ธฐ์ ์ ํญ์ ์๋ก์ด ๊ธฐํ๋ ํจ๊ป ๋ง๋ค์ด์๊ธฐ ๋๋ฌธ์, ์ ์ ํ ์ ์ฑ
๊ณผ ๊ต์ก์ผ๋ก ์ด๋ฐ ๋ฌธ์ ๋ค์ ํด๊ฒฐํ ์ ์์ ๊ฒ์ผ๋ก ๋ด
๋๋ค."},
|
883 |
+
{"speaker": "์ค์", "text": "๊ท ํ์กํ ์๊ฐ์ด ์ค์ํ๊ฒ ๋ค์. ๊ทธ๋ ๋ค๋ฉด ์ฐ๋ฆฌ๊ฐ ์ด๋ฐ ๋ณํ์ ์ด๋ป๊ฒ ๋๋นํด์ผ ํ ๊น์?"},
|
884 |
+
{"speaker": "๋ฏผํธ", "text": "๊ฐ์ฅ ์ค์ํ ๊ฒ์ ์ง์์ ์ธ ํ์ต๊ณผ ์ ์๋ ฅ์
๋๋ค. ์ธ๊ณ๊ฒฝ์ ํฌ๋ผ์ 2025๋
๊น์ง ์ ์ธ๊ณ ๊ทผ๋ก์์ 50%๊ฐ ์ฌ๊ต์ก์ด ํ์ํ ๊ฒ์ผ๋ก ์์ธกํ์ต๋๋ค. ํนํ ๋์งํธ ๋ฆฌํฐ๋ฌ์, ๋นํ์ ์ฌ๊ณ ๋ ฅ, ์ฐฝ์์ฑ ๊ฐ์ ๋ฅ๋ ฅ์ด ์ค์ํด์ง ๊ฒ์
๋๋ค. ๊ฐ์ธ์ ์ผ๋ก๋ ์จ๋ผ์ธ ๊ต์ก ํ๋ซํผ์ ํ์ฉํ ์๊ธฐ๊ณ๋ฐ์ ์ถ์ฒํฉ๋๋ค. ์๋ฅผ ๋ค์ด Coursera๋ edX ๊ฐ์ ํ๋ซํผ์์๋ ์ธ๊ณ ์ต๊ณ ๋ํ์ ๊ฐ์๋ฅผ ๋ฌด๋ฃ๋ก ๋ค์ ์ ์์ต๋๋ค."},
|
885 |
+
{"speaker": "์ค์", "text": "์ค์ฉ์ ์ธ ์กฐ์ธ ๊ฐ์ฌํฉ๋๋ค. ๋ง์ง๋ง์ผ๋ก ์ด ๋ถ์ผ์ ๋ฏธ๋ ์ ๋ง์ ์ด๋ป๊ฒ ๋ณด์๋์?"},
|
886 |
+
{"speaker": "๋ฏผํธ", "text": "ํฅํ 10๋
์ ์ธ๋ฅ ์ญ์ฌ์ ๊ฐ์ฅ ๊ธ๊ฒฉํ ๊ธฐ์ ๋ฐ์ ์ ๊ฒฝํํ๋ ์๊ธฐ๊ฐ ๋ ๊ฒ์
๋๋ค. ๊ฐํธ๋์ ํ์ดํ ์ฌ์ดํด ๋ถ์์ ๋ฐ๋ฅด๋ฉด, ํ์ฌ ์ฐ๋ฆฌ๋ ์ด ๊ธฐ์ ์ ์ด๊ธฐ ๋จ๊ณ์ ๋ถ๊ณผํฉ๋๋ค. 2030๋
๊น์ง๋ ์ง๊ธ์ผ๋ก์๋ ์์ํ๊ธฐ ์ด๋ ค์ด ์์ค์ ํ์ ์ด ์ผ์ด๋ ๊ฒ์ผ๋ก ์์๋ฉ๋๋ค. ์ค์ํ ๊ฒ์ ์ด๋ฐ ๋ณํ๋ฅผ ๋๋ ค์ํ๊ธฐ๋ณด๋ค๋ ๊ธฐํ๋ก ์ผ์ ๋ ๋์ ๋ฏธ๋๋ฅผ ๋ง๋ค์ด๊ฐ๋ ๊ฒ์ด๋ผ๊ณ ์๊ฐํฉ๋๋ค."},
|
887 |
+
{"speaker": "์ค์", "text": "์ ๋ง ํต์ฐฐ๋ ฅ ์๋ ๋ง์์ด๋ค์. ์ค๋ ๋๋ฌด๋ ์ ์ตํ ์๊ฐ์ด์์ต๋๋ค. ์ฒญ์ทจ์ ์ฌ๋ฌ๋ถ๋ ์ค๋ ๋
ผ์๋ ๋ด์ฉ์ ๋ฐํ์ผ๋ก ๋ฏธ๋๋ฅผ ์ค๋นํ์๊ธธ ๋ฐ๋๋๋ค. ๋ฏผํธ ๋ฐ์ฌ๋, ๊ท์คํ ์๊ฐ ๋ด์ฃผ์
์ ๊ฐ์ฌํฉ๋๋ค!"},
|
888 |
+
{"speaker": "๋ฏผํธ", "text": "๊ฐ์ฌํฉ๋๋ค. ์ฒญ์ทจ์ ์ฌ๋ฌ๋ถ๋ค์ด ์ด ๋ณํ์ ์๋๋ฅผ ํ๋ช
ํ๊ฒ ํค์ณ๋๊ฐ์๊ธธ ๋ฐ๋๋๋ค. ๊ธฐ์ ์ ๋๊ตฌ์ผ ๋ฟ์ด๊ณ , ๊ทธ๊ฒ์ ์ด๋ป๊ฒ ํ์ฉํ๋์ง๋ ์ฐ๋ฆฌ์๊ฒ ๋ฌ๋ ค์๋ค๋ ์ ์ ๊ธฐ์ตํด์ฃผ์ธ์. ์ค๋ ๋ง์๋๋ฆฐ ๋ด์ฉ์ ๋ํด ๋ ๊ถ๊ธํ์ ์ ์ด ์์ผ์๋ฉด ์ ๊ฐ ์ด์ํ๋ ๋ธ๋ก๊ทธ๋ ์ต๊ทผ ์ถ๊ฐํ ์ฑ
์์ ๋ ์์ธํ ์ ๋ณด๋ฅผ ์ฐพ์ผ์ค ์ ์์ต๋๋ค."}
|
889 |
]
|
890 |
}
|
891 |
|
892 |
def _get_default_english_conversation(self) -> Dict:
|
893 |
+
"""Enhanced professional English conversation template"""
|
894 |
return {
|
895 |
"conversation": [
|
896 |
+
{"speaker": "Alex", "text": "Welcome everyone to our podcast! Today we're diving into a topic that's reshaping our world. Dr. Jordan, could you start by explaining why this subject has become so critical right now?"},
|
897 |
+
{"speaker": "Jordan", "text": "Thanks, Alex. We're witnessing an unprecedented convergence of technological breakthroughs. According to a recent Nature publication, advances in this field have accelerated by 400% in just the past two years. This isn't just incremental progress - it's a fundamental shift in how we approach problem-solving. Major institutions like Harvard and Stanford are completely restructuring their research programs to focus on this area, with combined investments exceeding $5 billion annually."},
|
898 |
+
{"speaker": "Alex", "text": "400% acceleration is staggering! What does this mean for everyday people who might not be tech-savvy?"},
|
899 |
+
{"speaker": "Jordan", "text": "The impact will be profound yet accessible. Think about how smartphones revolutionized communication - this will be similar but across every aspect of life. McKinsey's latest report projects that by 2026, these technologies will create $4.4 trillion in annual value globally. For individuals, this translates to personalized healthcare that can predict illnesses years in advance, educational systems that adapt to each student's learning style, and financial tools that democratize wealth-building strategies previously available only to the ultra-wealthy."},
|
900 |
+
{"speaker": "Alex", "text": "Those applications sound transformative. Can you give us a concrete example of how this is already being implemented?"},
|
901 |
+
{"speaker": "Jordan", "text": "Absolutely. Let me share a compelling case from Johns Hopkins Hospital. They've deployed an AI system that analyzes patient data in real-time, reducing diagnostic errors by 85% and cutting average diagnosis time from days to hours. In one documented case, the system identified a rare genetic disorder in a child that had been misdiagnosed for three years. The accuracy comes from analyzing patterns across millions of cases - something impossible for even the most experienced doctors to do manually. This technology is now being rolled out to rural hospitals, bringing world-class diagnostic capabilities to underserved communities."},
|
902 |
+
{"speaker": "Alex", "text": "That's truly life-changing technology. But I imagine there are significant challenges and risks we need to consider?"},
|
903 |
+
{"speaker": "Jordan", "text": "You're absolutely right to raise this. The challenges are as significant as the opportunities. The World Economic Forum identifies three critical risks: First, algorithmic bias could perpetuate or amplify existing inequalities if not carefully managed. Second, cybersecurity threats become exponentially more dangerous when AI systems control critical infrastructure. Third, there's the socioeconomic disruption - PwC estimates that 30% of jobs could be automated by 2030. However, history shows us that technological revolutions create new opportunities even as they displace old ones. The key is proactive adaptation and responsible development."},
|
904 |
+
{"speaker": "Alex", "text": "How should individuals and organizations prepare for these changes?"},
|
905 |
+
{"speaker": "Jordan", "text": "Preparation requires a multi-faceted approach. For individuals, I recommend focusing on skills that complement rather than compete with AI: critical thinking, emotional intelligence, and creative problem-solving. MIT's recent study shows that professionals who combine domain expertise with AI literacy see salary increases of 40% on average. Organizations need to invest in continuous learning programs - Amazon's $700 million worker retraining initiative is a good model. Most importantly, we need to cultivate an adaptive mindset. The half-life of specific technical skills is shrinking, but the ability to learn and unlearn quickly is becoming invaluable."},
|
906 |
+
{"speaker": "Alex", "text": "That's practical advice. What about the ethical considerations? How do we ensure this technology benefits humanity as a whole?"},
|
907 |
+
{"speaker": "Jordan", "text": "Ethics must be at the forefront of development. The EU's AI Act and similar regulations worldwide are establishing important guardrails. We need transparent AI systems where decisions can be explained and audited. Companies like IBM and Google have established AI ethics boards, but we need industry-wide standards. Additionally, we must address the digital divide - UNESCO reports that 37% of the global population still lacks internet access. Without inclusive development, these technologies could exacerbate global inequality rather than reduce it. The solution requires collaboration between technologists, ethicists, policymakers, and communities."},
|
908 |
+
{"speaker": "Alex", "text": "Looking ahead, what's your vision for how this technology will shape the next decade?"},
|
909 |
+
{"speaker": "Jordan", "text": "The next decade will be transformative beyond our current imagination. Ray Kurzweil's prediction of technological singularity by 2045 seems increasingly plausible. By 2035, I expect we'll see autonomous systems managing entire cities, personalized medicine extending human lifespan by 20-30 years, and educational AI that makes world-class education universally accessible. The convergence of AI with quantum computing, biotechnology, and nanotechnology will unlock possibilities we can barely conceive of today. However, the future isn't predetermined - it's shaped by the choices we make now about development priorities, ethical frameworks, and inclusive access."},
|
910 |
+
{"speaker": "Alex", "text": "That's both exciting and sobering. Any final thoughts for our listeners?"},
|
911 |
+
{"speaker": "Jordan", "text": "I'd encourage everyone to view this as humanity's next great adventure. Yes, there are risks and challenges, but we're also on the cusp of solving problems that have plagued us for millennia - disease, poverty, environmental degradation. The key is engaged participation rather than passive observation. Stay informed through reliable sources, experiment with new technologies, and most importantly, contribute to the conversation about what kind of future we want to build. The decisions we make in the next five years will reverberate for generations."},
|
912 |
+
{"speaker": "Alex", "text": "Dr. Jordan, this has been an incredibly enlightening discussion. Thank you for sharing your expertise and insights with us today."},
|
913 |
+
{"speaker": "Jordan", "text": "Thank you, Alex. It's been a pleasure discussing these crucial topics. For listeners wanting to dive deeper, I've compiled additional resources on my website, including links to the studies we discussed today. Remember, the future isn't something that happens to us - it's something we create together. I look forward to seeing how each of you contributes to shaping this exciting new era."}
|
914 |
]
|
915 |
}
|
916 |
|
917 |
def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
|
918 |
+
"""Extract conversation using API with enhanced professional style"""
|
919 |
if not self.llm_client:
|
920 |
raise RuntimeError("API mode not initialized")
|
921 |
|
|
|
932 |
except Exception as e:
|
933 |
print(f"Search failed, continuing without context: {e}")
|
934 |
|
935 |
+
# ๊ฐํ๋ ์ ๋ฌธ ํ์บ์คํธ ์คํ์ผ ํ๋กฌํํธ
|
936 |
if language == "Korean":
|
937 |
system_message = (
|
938 |
+
"๋น์ ์ ํ๊ตญ์ ์ต๊ณ ์ ๋ฌธ ํ์บ์คํธ ์๊ฐ์
๋๋ค. "
|
939 |
+
"์ฒญ์ทจ์๋ค์ด ๊น์ด ์๋ ์ธ์ฌ์ดํธ๋ฅผ ์ป์ ์ ์๋ ๊ณ ํ์ง ๋๋ด์ ๋ง๋์ธ์.\n"
|
940 |
+
"์ค์(์งํ์)๋ ํต์ฌ์ ์ง๋ 1-2๋ฌธ์ฅ ์ง๋ฌธ์ ํ๊ณ , "
|
941 |
+
"๋ฏผํธ(์ ๋ฌธ๊ฐ)๋ ๋ฐ๋์ 2-4๋ฌธ์ฅ์ผ๋ก ์์ธํ ๋ต๋ณํฉ๋๋ค. "
|
942 |
+
"๊ตฌ์ฒด์ ์ธ ๋ฐ์ดํฐ, ์ฐ๊ตฌ ๊ฒฐ๊ณผ, ์ค์ ์ฌ๋ก๋ฅผ ํฌํจํ์ธ์. "
|
943 |
+
"์ ๋ฌธ ์ฉ์ด๋ ์ฝ๊ฒ ์ค๋ช
ํ๊ณ , ๋ฐ๋์ ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ์ธ์. "
|
944 |
+
"12-15ํ์ ๊น์ด ์๋ ๋ํ ๊ตํ์ผ๋ก ๊ตฌ์ฑํ์ธ์."
|
945 |
)
|
946 |
else:
|
947 |
system_message = (
|
948 |
+
"You are a top professional podcast scriptwriter. "
|
949 |
+
"Create high-quality discussions that provide deep insights to listeners. "
|
950 |
+
"Alex (host) asks focused 1-2 sentence questions, "
|
951 |
+
"while Jordan (expert) MUST answer in 2-4 detailed sentences. "
|
952 |
+
"Include specific data, research findings, and real cases. "
|
953 |
+
"Explain technical terms clearly. "
|
954 |
+
"Create 12-15 insightful conversation exchanges."
|
955 |
)
|
956 |
|
957 |
chat_completion = self.llm_client.chat.completions.create(
|
|
|
960 |
{"role": "user", "content": self._build_prompt(text, language, search_context)}
|
961 |
],
|
962 |
model=self.config.api_model_name,
|
963 |
+
temperature=0.75,
|
964 |
)
|
965 |
|
966 |
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
|
|
1250 |
else: # API mode (now secondary)
|
1251 |
api_key = os.environ.get("TOGETHER_API_KEY")
|
1252 |
if not api_key:
|
1253 |
+
|
1254 |
+
print("API key not found, falling back to local mode")
|
1255 |
conversation_json = converter.extract_conversation_local(text, language)
|
1256 |
else:
|
1257 |
try:
|
|
|
1366 |
|
1367 |
# Gradio Interface
|
1368 |
with gr.Blocks(theme='soft', title="AI Podcast Generator") as demo:
|
1369 |
+
gr.Markdown("# ๐๏ธ AI Podcast Generator - Professional Edition")
|
1370 |
+
gr.Markdown("Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation with in-depth analysis!")
|
1371 |
|
1372 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
1373 |
with gr.Row():
|
1374 |
gr.Markdown(f"""
|
1375 |
+
### ๐ค Enhanced Professional Configuration:
|
1376 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
1377 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
1378 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
1379 |
+
- **Conversation Style**: Professional podcast with 2-4 sentence detailed answers
|
1380 |
+
- **Conversation Length**: {converter.config.min_conversation_turns}-{converter.config.max_conversation_turns} exchanges (professional depth)
|
1381 |
- **Search**: {"โ
Brave Search Enabled" if BRAVE_KEY else "โ Brave Search Not Available - Set BSEARCH_API"}
|
1382 |
+
- **Features**: ๐ฏ Keyword input | ๐ Data-driven insights | ๐ฌ Expert analysis
|
1383 |
""")
|
1384 |
|
1385 |
with gr.Row():
|
|
|
1443 |
)
|
1444 |
|
1445 |
gr.Markdown("""
|
1446 |
+
**๐ป Professional Podcast Style:**
|
1447 |
+
- In-depth expert discussions
|
1448 |
+
- Host asks insightful questions
|
1449 |
+
- Expert provides detailed 2-4 sentence answers
|
1450 |
+
- Includes data, research, and real examples
|
1451 |
+
- 12-15 professional exchanges
|
1452 |
|
1453 |
**๐ Keyword Feature:**
|
1454 |
- Enter any topic to generate a podcast
|
1455 |
- Automatically searches latest information
|
1456 |
+
- Creates expert discussion from search results
|
1457 |
|
1458 |
+
**๐ฐ๐ท ํ๊ตญ์ด ์ ๋ฌธ ํ์บ์คํธ:**
|
1459 |
+
- ์ฌ์ธต์ ์ธ ์ ๋ฌธ๊ฐ ๋๋ด
|
1460 |
+
- ์งํ์(์ค์)๊ฐ ํต์ฐฐ๋ ฅ ์๋ ์ง๋ฌธ
|
1461 |
+
- ์ ๋ฌธ๊ฐ(๋ฏผํธ)๊ฐ 2-4๋ฌธ์ฅ์ผ๋ก ์์ธ ๋ต๋ณ
|
1462 |
+
- ๋ฐ์ดํฐ์ ์ฌ๋ก๋ฅผ ํฌํจํ ์ ๋ฌธ์ ๋ด์ฉ
|
1463 |
""")
|
1464 |
|
1465 |
+
convert_btn = gr.Button("๐ฏ Generate Professional Conversation / ์ ๋ฌธ ๋ํ ์์ฑ", variant="primary", size="lg")
|
1466 |
|
1467 |
with gr.Row():
|
1468 |
with gr.Column():
|
1469 |
conversation_output = gr.Textbox(
|
1470 |
+
label="Generated Professional Conversation (Editable) / ์์ฑ๋ ์ ๋ฌธ ๋๏ฟฝ๏ฟฝ๏ฟฝ (ํธ์ง ๊ฐ๋ฅ)",
|
1471 |
+
lines=35, # ๋ ๊ธด ์ ๋ฌธ์ ๋ํ๋ฅผ ์ํด ์ฆ๊ฐ
|
1472 |
+
max_lines=70,
|
1473 |
interactive=True,
|
1474 |
+
placeholder="Professional podcast conversation will appear here. You can edit it before generating audio.\n์ ๋ฌธ ํ์บ์คํธ ๋ํ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค. ์ค๋์ค ์์ฑ ์ ์ ํธ์งํ ์ ์์ต๋๋ค.\n\n์ฌ์ธต์ ์ด๊ณ ์ ๋ฌธ์ ์ธ ๋๋ด ํ์์ผ๋ก ์งํ๋ฉ๋๋ค.",
|
1475 |
info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ์์ ๋ฐ๋ผ ๋ํ๋ฅผ ํธ์งํ์ธ์. ํ์: 'ํ์ ์ด๋ฆ: ํ
์คํธ'"
|
1476 |
)
|
1477 |
|
|
|
1482 |
|
1483 |
with gr.Column():
|
1484 |
audio_output = gr.Audio(
|
1485 |
+
label="Professional Podcast Audio / ์ ๋ฌธ ํ์บ์คํธ ์ค๋์ค",
|
1486 |
type="filepath",
|
1487 |
interactive=False
|
1488 |
)
|
|
|
1498 |
gr.Examples(
|
1499 |
examples=[
|
1500 |
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
1501 |
+
["quantum computing breakthroughs", "Keyword", "Local", "Edge-TTS", "English"], # Professional keyword example
|
1502 |
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
1503 |
+
["์ธ๊ณต์ง๋ฅ ์ค๋ฆฌ์ ๊ท์ ", "Keyword", "Local", "Edge-TTS", "Korean"], # Korean professional keyword
|
1504 |
],
|
1505 |
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
1506 |
outputs=[conversation_output, status_output],
|
|
|
1554 |
share=False,
|
1555 |
server_name="0.0.0.0",
|
1556 |
server_port=7860
|
1557 |
+
)
|