openfree commited on
Commit
cd31bc6
·
verified ·
1 Parent(s): 44ed247

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -351
app.py CHANGED
@@ -79,6 +79,118 @@ load_dotenv()
79
  BRAVE_KEY = os.getenv("BSEARCH_API")
80
  BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  @dataclass
83
  class ConversationConfig:
84
  max_words: int = 8000 # 4000에서 6000으로 증가 (1.5배)
@@ -286,126 +398,6 @@ These factors are making the future of {keyword} increasingly promising.
286
  return intro + compiled
287
 
288
 
289
- def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
290
- """Build prompt for conversation generation with enhanced radio talk show style"""
291
- # 텍스트 길이 제한
292
- max_text_length = 4500 if search_context else 6000
293
- if len(text) > max_text_length:
294
- text = text[:max_text_length] + "..."
295
-
296
- if language == "Korean":
297
- # 대화 템플릿을 더 많은 턴으로 확장 (15-20회)
298
- template = """
299
- {
300
- "conversation": [
301
- {"speaker": "준수", "text": ""},
302
- {"speaker": "민호", "text": ""},
303
- {"speaker": "준수", "text": ""},
304
- {"speaker": "민호", "text": ""},
305
- {"speaker": "준수", "text": ""},
306
- {"speaker": "민호", "text": ""},
307
- {"speaker": "준수", "text": ""},
308
- {"speaker": "민호", "text": ""},
309
- {"speaker": "준수", "text": ""},
310
- {"speaker": "민호", "text": ""},
311
- {"speaker": "준수", "text": ""},
312
- {"speaker": "민호", "text": ""},
313
- {"speaker": "준수", "text": ""},
314
- {"speaker": "민호", "text": ""},
315
- {"speaker": "준수", "text": ""},
316
- {"speaker": "민호", "text": ""},
317
- {"speaker": "준수", "text": ""},
318
- {"speaker": "민호", "text": ""}
319
- ]
320
- }
321
- """
322
-
323
- context_part = ""
324
- if search_context:
325
- context_part = f"# 최신 관련 정보:\n{search_context}\n"
326
-
327
- base_prompt = (
328
- f"# 원본 콘텐츠:\n{text}\n\n"
329
- f"{context_part}"
330
- f"위 내용으로 전문적이고 심층적인 라디오 팟캐스트 대담 프로그램 대본을 작성해주세요.\n\n"
331
- f"## 필수 요구사항:\n"
332
- f"1. **최소 18회 이상의 대화 교환** (준수 9회, 민호 9회 이상)\n"
333
- f"2. **대화 스타일**: 전문적이고 깊이 있는 팟캐스트 대담\n"
334
- f"3. **화자 역할**:\n"
335
- f" - 준수: 진행자 (통찰력 있는 질문, 핵심 포인트 정리, 청취자 관점 대변)\n"
336
- f" - 민호: 전문가 (상세하고 전문적인 설명, 구체적 예시, 데이터 기반 분석)\n"
337
- f"4. **답변 규칙**:\n"
338
- f" - 준수: 1-2문장의 명확한 질문이나 요약\n"
339
- f" - 민호: **반드시 2-4문장으로 충실하게 답변** (핵심 개념 설명 + 부연 설명 + 예시/근거)\n"
340
- f" - 전문 용어는 쉽게 풀어서 설명\n"
341
- f" - 구체적인 수치, 사례, 연구 결과 인용\n"
342
- f"5. **내용 구성**:\n"
343
- f" - 도입부 (2-3회): 주제의 중요성과 배경 설명\n"
344
- f" - 전개부 (12-14회): 핵심 내용을 다각도로 심층 분석\n"
345
- f" - 마무리 (2-3회): 핵심 요약과 미래 전망\n"
346
- f"6. **전문성**: 학술적 근거와 실무적 통찰을 균형있게 포함\n"
347
- f"7. **필수**: 서로 존댓말 사용, 청취자가 전문 지식을 얻을 수 있도록 상세히 설명\n\n"
348
- f"반드시 위 JSON 형식으로 18회 이상의 전문적인 대화를 작성하세요:\n{template}"
349
- )
350
-
351
- return base_prompt
352
-
353
- else:
354
- # 영어 템플릿도 확장
355
- template = """
356
- {
357
- "conversation": [
358
- {"speaker": "Alex", "text": ""},
359
- {"speaker": "Jordan", "text": ""},
360
- {"speaker": "Alex", "text": ""},
361
- {"speaker": "Jordan", "text": ""},
362
- {"speaker": "Alex", "text": ""},
363
- {"speaker": "Jordan", "text": ""},
364
- {"speaker": "Alex", "text": ""},
365
- {"speaker": "Jordan", "text": ""},
366
- {"speaker": "Alex", "text": ""},
367
- {"speaker": "Jordan", "text": ""},
368
- {"speaker": "Alex", "text": ""},
369
- {"speaker": "Jordan", "text": ""},
370
- {"speaker": "Alex", "text": ""},
371
- {"speaker": "Jordan", "text": ""},
372
- {"speaker": "Alex", "text": ""},
373
- {"speaker": "Jordan", "text": ""},
374
- {"speaker": "Alex", "text": ""},
375
- {"speaker": "Jordan", "text": ""}
376
- ]
377
- }
378
- """
379
-
380
- context_part = ""
381
- if search_context:
382
- context_part = f"# Latest Information:\n{search_context}\n"
383
-
384
- base_prompt = (
385
- f"# Content:\n{text}\n\n"
386
- f"{context_part}"
387
- f"Create a professional and in-depth podcast conversation.\n\n"
388
- f"## Requirements:\n"
389
- f"1. **Minimum 18 conversation exchanges** (Alex 9+, Jordan 9+)\n"
390
- f"2. **Style**: Professional, insightful podcast discussion\n"
391
- f"3. **Roles**:\n"
392
- f" - Alex: Host (insightful questions, key point summaries, audience perspective)\n"
393
- f" - Jordan: Expert (detailed explanations, concrete examples, data-driven analysis)\n"
394
- f"4. **Response Rules**:\n"
395
- f" - Alex: 1-2 sentence clear questions or summaries\n"
396
- f" - Jordan: **Must answer in 2-4 sentences** (core concept + elaboration + example/evidence)\n"
397
- f" - Explain technical terms clearly\n"
398
- f" - Include specific data, cases, research findings\n"
399
- f"5. **Structure**:\n"
400
- f" - Introduction (2-3 exchanges): Topic importance and context\n"
401
- f" - Main content (12-14 exchanges): Multi-angle deep analysis\n"
402
- f" - Conclusion (2-3 exchanges): Key takeaways and future outlook\n"
403
- f"6. **Expertise**: Balance academic rigor with practical insights\n\n"
404
- f"Create exactly 18+ professional exchanges in this JSON format:\n{template}"
405
- )
406
-
407
- return base_prompt
408
-
409
  class UnifiedAudioConverter:
410
  def __init__(self, config: ConversationConfig):
411
  self.config = config
@@ -557,7 +549,6 @@ class UnifiedAudioConverter:
557
  else:
558
  return MessagesFormatterType.LLAMA_3
559
 
560
-
561
  def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
562
  """Build prompt for conversation generation with enhanced professional podcast style"""
563
  # 텍스트 길이 제한
@@ -565,31 +556,59 @@ class UnifiedAudioConverter:
565
  if len(text) > max_text_length:
566
  text = text[:max_text_length] + "..."
567
 
 
568
  if language == "Korean":
569
- # 대화 템플릿을 많은 턴으로 확장
570
- template = """
571
- {
572
- "conversation": [
573
- {"speaker": "준수", "text": ""},
574
- {"speaker": "민호", "text": ""},
575
- {"speaker": "준수", "text": ""},
576
- {"speaker": "민호", "text": ""},
577
- {"speaker": "준수", "text": ""},
578
- {"speaker": "민호", "text": ""},
579
- {"speaker": "준수", "text": ""},
580
- {"speaker": "민호", "text": ""},
581
- {"speaker": "준수", "text": ""},
582
- {"speaker": "민호", "text": ""},
583
- {"speaker": "준수", "text": ""},
584
- {"speaker": "민호", "text": ""}
585
- ]
586
- }
587
- """
588
-
589
- context_part = ""
590
- if search_context:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  context_part = f"# 최신 관련 정보:\n{search_context}\n"
 
 
592
 
 
593
  base_prompt = (
594
  f"# 원본 콘텐츠:\n{text}\n\n"
595
  f"{context_part}"
@@ -597,99 +616,48 @@ class UnifiedAudioConverter:
597
  f"## 핵심 지침:\n"
598
  f"1. **대화 스타일**: 전문적이면서도 이해하기 쉬운 팟캐스트 대담\n"
599
  f"2. **화자 역할**:\n"
600
- f" - 준수: 진행자/호스트 (핵심을 짚는 질문, 청취자 관점에서 궁금한 점 질문)\n"
601
- f" - 민호: 전문가 (깊이 있는 설명, 구체적 사례와 데이터 제시)\n"
602
  f"3. **중요한 답변 규칙**:\n"
603
- f" - 준수: 1-2문장의 명확한 질문 (\"그렇다면 구체적으로 어떤 의미인가요?\", \"실제 사례를 들어주시겠어요?\")\n"
604
- f" - 민호: **반드시 2-4문장으로 충실�� 답변** (개념 설명 + 구체적 설명 + 예시나 함의)\n"
605
- f" - 예: \"이것은 ~를 의미합니다. 구체적으로 ~한 측면에서 중요한데요. 실제로 최근 ~한 사례가 있었고, 이는 ~를 보여줍니다.\"\n"
606
- f"4. **전문성 요소**:\n"
607
- f" - 통계나 연구 결과 인용\n"
608
- f" - 실제 사례와 케이스 스터디\n"
609
- f" - 전문 용어를 쉽게 풀어서 설명\n"
610
- f" - 다양한 관점과 시각 제시\n"
611
- f"5. **필수 규칙**: 서로 존댓말 사용, 12-15회 대화 교환\n\n"
612
  f"JSON 형식으로만 반환:\n{template}"
613
  )
614
-
615
- return base_prompt
616
-
617
  else:
618
- # 영어 템플릿도 확장
619
- template = """
620
- {
621
- "conversation": [
622
- {"speaker": "Alex", "text": ""},
623
- {"speaker": "Jordan", "text": ""},
624
- {"speaker": "Alex", "text": ""},
625
- {"speaker": "Jordan", "text": ""},
626
- {"speaker": "Alex", "text": ""},
627
- {"speaker": "Jordan", "text": ""},
628
- {"speaker": "Alex", "text": ""},
629
- {"speaker": "Jordan", "text": ""},
630
- {"speaker": "Alex", "text": ""},
631
- {"speaker": "Jordan", "text": ""},
632
- {"speaker": "Alex", "text": ""},
633
- {"speaker": "Jordan", "text": ""}
634
- ]
635
- }
636
- """
637
-
638
- context_part = ""
639
- if search_context:
640
- context_part = f"# Latest Information:\n{search_context}\n"
641
-
642
  base_prompt = (
643
  f"# Content:\n{text}\n\n"
644
  f"{context_part}"
645
- f"Create a professional and insightful podcast conversation.\n\n"
646
  f"## Key Guidelines:\n"
647
  f"1. **Style**: Professional yet accessible podcast discussion\n"
648
  f"2. **Roles**:\n"
649
- f" - Alex: Host (insightful questions, audience perspective)\n"
650
- f" - Jordan: Expert (in-depth explanations, concrete examples and data)\n"
651
  f"3. **Critical Response Rules**:\n"
652
- f" - Alex: 1-2 sentence clear questions (\"Could you elaborate on that?\", \"What's a real-world example?\")\n"
653
- f" - Jordan: **Must answer in 2-4 sentences** (concept + detailed explanation + example/implication)\n"
654
- f" - Example: \"This refers to... Specifically, it's important because... For instance, recent studies show... This demonstrates...\"\n"
655
- f"4. **Professional Elements**:\n"
656
- f" - Cite statistics and research\n"
657
- f" - Real cases and case studies\n"
658
- f" - Explain technical terms clearly\n"
659
- f" - Present multiple perspectives\n"
660
- f"5. **Length**: 12-15 exchanges total\n\n"
661
  f"Return JSON only:\n{template}"
662
  )
663
-
664
- return base_prompt
665
-
666
-
667
 
668
  def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
669
  """Build messages for local LLM with enhanced professional podcast style"""
670
  if language == "Korean":
671
  system_message = (
672
  "당신은 한국 최고의 전문 팟캐스트 작가입니다. "
673
- "청취자들이 전문 지식을 쉽게 이해할 수 있는 고품질 대담을 만들어냅니다.\n\n"
674
- "핵심 원칙:\n"
675
- "1. 진행자(준수)는 핵심을 짚는 통찰력 있는 질문으로 대화를 이끌어갑니다\n"
676
- "2. 전문가(민호)는 반드시 2-4문��으로 깊이 있게 답변합니다 (개념+설명+예시)\n"
677
- "3. 구체적인 데이터, 연구 결과, 실제 사례를 포함합니다\n"
678
- "4. 전문 용어는 쉽게 풀어서 설명하되, 정확성을 유지합니다\n"
679
- "5. 다양한 관점을 제시하여 균형잡힌 시각을 제공합니다\n"
680
- "6. 반드시 서로 존댓말을 사용하며, 전문적이면서도 친근한 톤을 유지합니다"
681
  )
682
  else:
683
  system_message = (
684
- "You are an expert podcast scriptwriter who creates high-quality, "
685
- "professional discussions that make complex topics accessible.\n\n"
686
- "Key principles:\n"
687
- "1. The host (Alex) asks insightful questions that drive the conversation\n"
688
- "2. The expert (Jordan) MUST answer in 2-4 sentences (concept+explanation+example)\n"
689
- "3. Include specific data, research findings, and real cases\n"
690
- "4. Explain technical terms clearly while maintaining accuracy\n"
691
- "5. Present multiple perspectives for balanced views\n"
692
- "6. Maintain a professional yet approachable tone"
693
  )
694
 
695
  return [
@@ -719,36 +687,19 @@ class UnifiedAudioConverter:
719
  chat_template = self._get_messages_formatter_type(self.config.local_model_name)
720
  provider = LlamaCppPythonProvider(self.local_llm)
721
 
722
- # 강화된 전문 팟캐스트 스타일 시스템 메시지
723
  if language == "Korean":
724
  system_message = (
725
  "당신은 한국의 유명 팟캐스트 전문 작가입니다. "
726
- "청취자들이 깊이 있는 전문 지식을 얻을 수 있는 고품질 대담을 만듭니다.\n\n"
727
- "작성 규칙:\n"
728
- "1. 진행자(준수)는 핵심을 짚는 1-2문장 질문을 합니다\n"
729
- "2. 전문가(민호)는 반드시 2-4문장으로 충실히 답변합니다:\n"
730
- " - 첫 문장: 핵심 개념 설명\n"
731
- " - 둘째 문장: 구체적인 설명이나 맥락\n"
732
- " - 셋째-넷째 문장: 실제 예시, 데이터, 함의\n"
733
- "3. 통계, 연구 결과, 실제 사례를 적극 활용하세요\n"
734
- "4. 전문성을 유지하면서도 이해하기 쉽게 설명하세요\n"
735
- "5. 12-15회의 대화 교환으로 구성하세요\n"
736
- "6. JSON 형식으로만 응답하세요"
737
  )
738
  else:
739
  system_message = (
740
- "You are a professional podcast scriptwriter creating high-quality, "
741
- "insightful discussions that provide deep expertise to listeners.\n\n"
742
- "Writing rules:\n"
743
- "1. Host (Alex) asks focused 1-2 sentence questions\n"
744
- "2. Expert (Jordan) MUST answer in 2-4 substantial sentences:\n"
745
- " - First sentence: Core concept explanation\n"
746
- " - Second sentence: Specific details or context\n"
747
- " - Third-fourth sentences: Real examples, data, implications\n"
748
- "3. Actively use statistics, research findings, real cases\n"
749
- "4. Maintain expertise while keeping explanations accessible\n"
750
- "5. Create 12-15 conversation exchanges\n"
751
- "6. Respond only in JSON format"
752
  )
753
 
754
  agent = LlamaCppAgent(
@@ -759,10 +710,10 @@ class UnifiedAudioConverter:
759
  )
760
 
761
  settings = provider.get_provider_default_settings()
762
- settings.temperature = 0.75 # 약간 낮춰서 더 일관된 전문적 답변
763
  settings.top_k = 40
764
  settings.top_p = 0.95
765
- settings.max_tokens = self.config.max_tokens # 증가된 토큰 수 사용
766
  settings.repeat_penalty = 1.1
767
  settings.stream = False
768
 
@@ -783,10 +734,6 @@ class UnifiedAudioConverter:
783
 
784
  if json_match:
785
  conversation_data = json.loads(json_match.group())
786
- # 대화 길이 확인 및 조정
787
- if len(conversation_data["conversation"]) < self.config.min_conversation_turns:
788
- print(f"Conversation too short ({len(conversation_data['conversation'])} turns), regenerating...")
789
- # 재시도 로직 추가 가능
790
  return conversation_data
791
  else:
792
  raise ValueError("No valid JSON found in local LLM response")
@@ -797,24 +744,20 @@ class UnifiedAudioConverter:
797
 
798
  @spaces.GPU(duration=120)
799
  def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
800
- """Extract conversation using legacy local model with enhanced professional style"""
801
  try:
802
  self.initialize_legacy_local_mode()
803
 
804
- # 강화된 전문 팟캐스트 스타일 시스템 메시지
805
  if language == "Korean":
806
  system_message = (
807
  "당신은 전문 팟캐스트 작가입니다. "
808
- "진행자(준수)는 통찰력 있는 질문을, 전문가(민호)는 2-4문장의 상세한 답변을 합니다. "
809
- "구체적인 데이터와 사례를 포함하여 전문적이면서도 이해하기 쉽게 설명하세요. "
810
- "12-15회 대화 교환으로 구성하세요."
811
  )
812
  else:
813
  system_message = (
814
- "You are a professional podcast scriptwriter. "
815
- "Create insightful dialogue where the host (Alex) asks focused questions "
816
- "and the expert (Jordan) gives detailed 2-4 sentence answers. "
817
- "Include specific data and examples. Create 12-15 exchanges."
818
  )
819
 
820
  chat = [
@@ -839,7 +782,7 @@ class UnifiedAudioConverter:
839
  generate_kwargs = dict(
840
  model_inputs,
841
  streamer=streamer,
842
- max_new_tokens=self.config.max_new_tokens, # 증가된 토큰 수 사용
843
  do_sample=True,
844
  temperature=0.75,
845
  eos_token_id=terminators,
@@ -862,14 +805,17 @@ class UnifiedAudioConverter:
862
 
863
  except Exception as e:
864
  print(f"Legacy local model also failed: {e}")
865
- # Return enhanced default template
866
- if language == "Korean":
867
- return self._get_default_korean_conversation()
868
- else:
869
- return self._get_default_english_conversation()
 
 
 
870
 
871
  def _get_default_korean_conversation(self) -> Dict:
872
- """더 전문적인 기본 한국어 대화 템플릿"""
873
  return {
874
  "conversation": [
875
  {"speaker": "준수", "text": "안녕하세요, 여러분! 오늘은 정말 중요하고 흥미로운 주제를 다뤄보려고 합니다. 민호 박사님, 먼저 이 주제가 왜 지금 이렇게 주목받고 있는지 설명해주시겠어요?"},
@@ -885,12 +831,12 @@ class UnifiedAudioConverter:
885
  {"speaker": "준수", "text": "실용적인 조언 감사합니다. 마지막으로 이 분야의 미래 전망은 어떻게 보시나요?"},
886
  {"speaker": "민호", "text": "향후 10년은 인류 역사상 가장 급격한 기술 발전을 경험하는 시기가 될 것입니다. 가트너의 하이프 사이클 분석에 따르면, 현재 우리는 이 기술의 초기 단계에 불과합니다. 2030년까지는 지금으로서는 상상하기 어려운 수준의 혁신이 일어날 것으로 예상됩니다. 중요한 것은 이런 변화를 두려워하기보다는 기회로 삼아 더 나은 미래를 만들어가는 것이라고 생각합니다."},
887
  {"speaker": "준수", "text": "정말 통찰력 있�� 말씀이네요. 오늘 너무나 유익한 시간이었습니다. 청취자 여러분도 오늘 논의된 내용을 바탕으로 미래를 준비하시길 바랍니다. 민호 박사님, 귀중한 시간 내주셔서 감사합니다!"},
888
- {"speaker": "민호", "text": "감사합니다. 청취자 여러분들이 이 변화의 시대를 현명하게 헤쳐나가시길 바랍니다. 기술은 도구일 뿐이고, 그것을 어떻게 활용하는지는 우리에게 달려있다는 점을 기억해주세요. 오늘 말씀드린 내용에 대해 더 궁금하신 점이 있으시면 제가 운영하는 블로그나 최근 출간한 책에서 더 자세한 정보를 찾으실 수 있습니다."}
889
  ]
890
  }
891
 
892
  def _get_default_english_conversation(self) -> Dict:
893
- """Enhanced professional English conversation template"""
894
  return {
895
  "conversation": [
896
  {"speaker": "Alex", "text": "Welcome everyone to our podcast! Today we're diving into a topic that's reshaping our world. Dr. Jordan, could you start by explaining why this subject has become so critical right now?"},
@@ -898,24 +844,22 @@ class UnifiedAudioConverter:
898
  {"speaker": "Alex", "text": "400% acceleration is staggering! What does this mean for everyday people who might not be tech-savvy?"},
899
  {"speaker": "Jordan", "text": "The impact will be profound yet accessible. Think about how smartphones revolutionized communication - this will be similar but across every aspect of life. McKinsey's latest report projects that by 2026, these technologies will create $4.4 trillion in annual value globally. For individuals, this translates to personalized healthcare that can predict illnesses years in advance, educational systems that adapt to each student's learning style, and financial tools that democratize wealth-building strategies previously available only to the ultra-wealthy."},
900
  {"speaker": "Alex", "text": "Those applications sound transformative. Can you give us a concrete example of how this is already being implemented?"},
901
- {"speaker": "Jordan", "text": "Absolutely. Let me share a compelling case from Johns Hopkins Hospital. They've deployed an AI system that analyzes patient data in real-time, reducing diagnostic errors by 85% and cutting average diagnosis time from days to hours. In one documented case, the system identified a rare genetic disorder in a child that had been misdiagnosed for three years. The accuracy comes from analyzing patterns across millions of cases - something impossible for even the most experienced doctors to do manually. This technology is now being rolled out to rural hospitals, bringing world-class diagnostic capabilities to underserved communities."},
902
  {"speaker": "Alex", "text": "That's truly life-changing technology. But I imagine there are significant challenges and risks we need to consider?"},
903
- {"speaker": "Jordan", "text": "You're absolutely right to raise this. The challenges are as significant as the opportunities. The World Economic Forum identifies three critical risks: First, algorithmic bias could perpetuate or amplify existing inequalities if not carefully managed. Second, cybersecurity threats become exponentially more dangerous when AI systems control critical infrastructure. Third, there's the socioeconomic disruption - PwC estimates that 30% of jobs could be automated by 2030. However, history shows us that technological revolutions create new opportunities even as they displace old ones. The key is proactive adaptation and responsible development."},
904
  {"speaker": "Alex", "text": "How should individuals and organizations prepare for these changes?"},
905
- {"speaker": "Jordan", "text": "Preparation requires a multi-faceted approach. For individuals, I recommend focusing on skills that complement rather than compete with AI: critical thinking, emotional intelligence, and creative problem-solving. MIT's recent study shows that professionals who combine domain expertise with AI literacy see salary increases of 40% on average. Organizations need to invest in continuous learning programs - Amazon's $700 million worker retraining initiative is a good model. Most importantly, we need to cultivate an adaptive mindset. The half-life of specific technical skills is shrinking, but the ability to learn and unlearn quickly is becoming invaluable."},
906
  {"speaker": "Alex", "text": "That's practical advice. What about the ethical considerations? How do we ensure this technology benefits humanity as a whole?"},
907
- {"speaker": "Jordan", "text": "Ethics must be at the forefront of development. The EU's AI Act and similar regulations worldwide are establishing important guardrails. We need transparent AI systems where decisions can be explained and audited. Companies like IBM and Google have established AI ethics boards, but we need industry-wide standards. Additionally, we must address the digital divide - UNESCO reports that 37% of the global population still lacks internet access. Without inclusive development, these technologies could exacerbate global inequality rather than reduce it. The solution requires collaboration between technologists, ethicists, policymakers, and communities."},
908
  {"speaker": "Alex", "text": "Looking ahead, what's your vision for how this technology will shape the next decade?"},
909
- {"speaker": "Jordan", "text": "The next decade will be transformative beyond our current imagination. Ray Kurzweil's prediction of technological singularity by 2045 seems increasingly plausible. By 2035, I expect we'll see autonomous systems managing entire cities, personalized medicine extending human lifespan by 20-30 years, and educational AI that makes world-class education universally accessible. The convergence of AI with quantum computing, biotechnology, and nanotechnology will unlock possibilities we can barely conceive of today. However, the future isn't predetermined - it's shaped by the choices we make now about development priorities, ethical frameworks, and inclusive access."},
910
- {"speaker": "Alex", "text": "That's both exciting and sobering. Any final thoughts for our listeners?"},
911
- {"speaker": "Jordan", "text": "I'd encourage everyone to view this as humanity's next great adventure. Yes, there are risks and challenges, but we're also on the cusp of solving problems that have plagued us for millennia - disease, poverty, environmental degradation. The key is engaged participation rather than passive observation. Stay informed through reliable sources, experiment with new technologies, and most importantly, contribute to the conversation about what kind of future we want to build. The decisions we make in the next five years will reverberate for generations."},
912
  {"speaker": "Alex", "text": "Dr. Jordan, this has been an incredibly enlightening discussion. Thank you for sharing your expertise and insights with us today."},
913
- {"speaker": "Jordan", "text": "Thank you, Alex. It's been a pleasure discussing these crucial topics. For listeners wanting to dive deeper, I've compiled additional resources on my website, including links to the studies we discussed today. Remember, the future isn't something that happens to us - it's something we create together. I look forward to seeing how each of you contributes to shaping this exciting new era."}
914
  ]
915
  }
916
 
917
  def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
918
- """Extract conversation using API with enhanced professional style"""
919
  if not self.llm_client:
920
  raise RuntimeError("API mode not initialized")
921
 
@@ -932,26 +876,18 @@ class UnifiedAudioConverter:
932
  except Exception as e:
933
  print(f"Search failed, continuing without context: {e}")
934
 
935
- # 강화된 전문 팟캐스트 스타일 프롬프트
936
  if language == "Korean":
937
  system_message = (
938
  "당신은 한국의 최고 전문 팟캐스트 작가입니다. "
939
- "청취자들이 깊이 있는 인사이트를 얻을 있는 고품질 대담을 만드세요.\n"
940
- "준수(진행자)는 핵심을 짚는 1-2문장 질문을 하고, "
941
- "민호(전문가)는 반드시 2-4문장으로 상세히 답변합니다. "
942
- "구체적인 데이터, 연구 결과, 실제 사례를 포함하세요. "
943
- "전문 용어는 쉽게 설명하고, 반드시 서로 존댓말을 사용하세요. "
944
- "12-15회의 깊이 있는 대화 교환으로 구성하세요."
945
  )
946
  else:
947
  system_message = (
948
- "You are a top professional podcast scriptwriter. "
949
- "Create high-quality discussions that provide deep insights to listeners. "
950
- "Alex (host) asks focused 1-2 sentence questions, "
951
- "while Jordan (expert) MUST answer in 2-4 detailed sentences. "
952
- "Include specific data, research findings, and real cases. "
953
- "Explain technical terms clearly. "
954
- "Create 12-15 insightful conversation exchanges."
955
  )
956
 
957
  chat_completion = self.llm_client.chat.completions.create(
@@ -994,17 +930,8 @@ class UnifiedAudioConverter:
994
  filenames = []
995
 
996
  try:
997
- # 언어별 음성 설정 - 한국어는 모두 남성 음성
998
- if language == "Korean":
999
- voices = [
1000
- "ko-KR-HyunsuNeural", # 남성 음성 1 (차분하고 신뢰감 있는)
1001
- "ko-KR-InJoonNeural" # 남성 음성 2 (활기차고 친근한)
1002
- ]
1003
- else:
1004
- voices = [
1005
- "en-US-AndrewMultilingualNeural", # 남성 음성 1
1006
- "en-US-BrianMultilingualNeural" # 남성 음성 2
1007
- ]
1008
 
1009
  for i, turn in enumerate(conversation_json["conversation"]):
1010
  filename = output_dir / f"output_{i}.wav"
@@ -1055,13 +982,13 @@ class UnifiedAudioConverter:
1055
  # Create different voice characteristics for different speakers
1056
  if language == "Korean":
1057
  voice_configs = [
1058
- {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 준수입니다. 여러분과 함께 흥미로운 이야기를 나눠보겠습니다.", "gender": "male"},
1059
- {"prompt_text": "안녕하세요, 저는 오늘 이 주제에 대해 설명드릴 민호입니다. 쉽고 재미있게 설명드릴게요.", "gender": "male"}
1060
  ]
1061
  else:
1062
  voice_configs = [
1063
- {"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast. Let's explore this fascinating topic together.", "gender": "male"},
1064
- {"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights on this subject with you all today.", "gender": "male"}
1065
  ]
1066
 
1067
  for i, turn in enumerate(conversation_json["conversation"]):
@@ -1069,12 +996,9 @@ class UnifiedAudioConverter:
1069
  if not text.strip():
1070
  continue
1071
 
1072
- # Use different voice config for each speaker
1073
  voice_config = voice_configs[i % len(voice_configs)]
1074
-
1075
  output_file = os.path.join(output_dir, f"spark_output_{i}.wav")
1076
 
1077
- # Run Spark TTS CLI inference
1078
  cmd = [
1079
  "python", "-m", "cli.inference",
1080
  "--text", text,
@@ -1086,33 +1010,29 @@ class UnifiedAudioConverter:
1086
  ]
1087
 
1088
  try:
1089
- # Run the command
1090
  result = subprocess.run(
1091
  cmd,
1092
  capture_output=True,
1093
  text=True,
1094
  timeout=60,
1095
- cwd="." # Make sure we're in the right directory
1096
  )
1097
 
1098
  if result.returncode == 0:
1099
  audio_files.append(output_file)
1100
  else:
1101
  print(f"Spark TTS error for turn {i}: {result.stderr}")
1102
- # Create a short silence as fallback
1103
- silence = np.zeros(int(22050 * 1.0)) # 1 second of silence
1104
  sf.write(output_file, silence, 22050)
1105
  audio_files.append(output_file)
1106
 
1107
  except subprocess.TimeoutExpired:
1108
  print(f"Spark TTS timeout for turn {i}")
1109
- # Create silence as fallback
1110
  silence = np.zeros(int(22050 * 1.0))
1111
  sf.write(output_file, silence, 22050)
1112
  audio_files.append(output_file)
1113
  except Exception as e:
1114
  print(f"Error running Spark TTS for turn {i}: {e}")
1115
- # Create silence as fallback
1116
  silence = np.zeros(int(22050 * 1.0))
1117
  sf.write(output_file, silence, 22050)
1118
  audio_files.append(output_file)
@@ -1124,7 +1044,6 @@ class UnifiedAudioConverter:
1124
  else:
1125
  raise RuntimeError("No audio files generated")
1126
 
1127
- # Generate conversation text
1128
  conversation_text = "\n".join(
1129
  f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
1130
  for i, turn in enumerate(conversation_json["conversation"])
@@ -1150,7 +1069,6 @@ class UnifiedAudioConverter:
1150
  speaker = speakers[i % 2]
1151
  speaker_id = self.melo_models["EN"].hps.data.spk2id[speaker]
1152
 
1153
- # Generate audio
1154
  self.melo_models["EN"].tts_to_file(
1155
  text, speaker_id, bio, speed=1.0,
1156
  pbar=progress.tqdm if progress else None,
@@ -1161,11 +1079,9 @@ class UnifiedAudioConverter:
1161
  audio_segment = AudioSegment.from_file(bio, format="wav")
1162
  combined_audio += audio_segment
1163
 
1164
- # Save final audio
1165
  final_audio_path = "melo_podcast.mp3"
1166
  combined_audio.export(final_audio_path, format="mp3")
1167
 
1168
- # Generate conversation text
1169
  conversation_text = "\n".join(
1170
  f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
1171
  for i, turn in enumerate(conversation_json["conversation"])
@@ -1224,10 +1140,9 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
1224
  else: # Keyword
1225
  if not article_input or not isinstance(article_input, str):
1226
  return "Please provide a keyword or topic.", None
1227
- # 키워드로 검색하여 콘텐츠 생성
1228
  text = search_and_compile_content(article_input, language)
1229
- text = f"Keyword-based content:\n{text}" # 마커 추가
1230
-
1231
  # Limit text to max words
1232
  words = text.split()
1233
  if len(words) > converter.config.max_words:
@@ -1235,19 +1150,17 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
1235
 
1236
  # Extract conversation based on mode
1237
  if mode == "Local":
1238
- # 로컬 모드가 기본 (새로운 Local LLM 사용)
1239
  try:
1240
  conversation_json = converter.extract_conversation_local(text, language)
1241
  except Exception as e:
1242
  print(f"Local mode failed: {e}, trying API fallback")
1243
- # API 폴백
1244
  api_key = os.environ.get("TOGETHER_API_KEY")
1245
  if api_key:
1246
  converter.initialize_api_mode(api_key)
1247
  conversation_json = converter.extract_conversation_api(text, language)
1248
  else:
1249
  raise RuntimeError("Local mode failed and no API key available for fallback")
1250
- else: # API mode (now secondary)
1251
  api_key = os.environ.get("TOGETHER_API_KEY")
1252
  if not api_key:
1253
  print("API key not found, falling back to local mode")
@@ -1278,15 +1191,14 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
1278
  return "Please provide conversation text.", None
1279
 
1280
  try:
1281
- # Parse the conversation text back to JSON format
1282
  conversation_json = converter.parse_conversation_text(conversation_text)
1283
 
1284
  if not conversation_json["conversation"]:
1285
  return "No valid conversation found in the text.", None
1286
 
1287
- # 한국어인 경우 Edge-TTS 사용 (다른 TTS는 한국어 지원이 제한적)
1288
- if language == "Korean" and tts_engine != "Edge-TTS":
1289
- tts_engine = "Edge-TTS" # 자동으로 Edge-TTS로 변경
1290
 
1291
  # Generate audio based on TTS engine
1292
  if tts_engine == "Edge-TTS":
@@ -1299,8 +1211,8 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
1299
  else: # MeloTTS
1300
  if not MELO_AVAILABLE:
1301
  return "MeloTTS not available. Please install required dependencies.", None
1302
- if language == "Korean":
1303
- return "MeloTTS does not support Korean. Please use Edge-TTS for Korean.", None
1304
  converter.initialize_melo_tts()
1305
  output_file, _ = converter.text_to_speech_melo(conversation_json)
1306
 
@@ -1320,14 +1232,34 @@ def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS",
1320
  return asyncio.run(regenerate_audio(conversation_text, tts_engine, language))
1321
 
1322
 
1323
- def update_tts_engine_for_korean(language):
1324
- """한국어 선택 시 TTS 엔진 옵션 업데이트"""
1325
- if language == "Korean":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1326
  return gr.Radio(
1327
  choices=["Edge-TTS"],
1328
  value="Edge-TTS",
1329
  label="TTS Engine",
1330
- info="한국어는 Edge-TTS만 지원됩니다",
1331
  interactive=False
1332
  )
1333
  else:
@@ -1363,7 +1295,7 @@ if LLAMA_CPP_AVAILABLE:
1363
  print(f"Failed to download model at startup: {e}")
1364
 
1365
 
1366
- # Gradio Interface - 개선된 레이아웃
1367
  with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1368
  .container {max-width: 1200px; margin: auto; padding: 20px;}
1369
  .header-text {text-align: center; margin-bottom: 30px;}
@@ -1375,8 +1307,8 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1375
  # 헤더
1376
  with gr.Row(elem_classes="header-text"):
1377
  gr.Markdown("""
1378
- # 🎙️ AI Podcast Generator - Professional Edition
1379
- ### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation with in-depth analysis!
1380
  """)
1381
 
1382
  with gr.Row(elem_classes="discord-badge"):
@@ -1388,8 +1320,6 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1388
  </p>
1389
  """)
1390
 
1391
-
1392
-
1393
  # 상태 표시 섹션
1394
  with gr.Row():
1395
  with gr.Column(scale=1):
@@ -1402,11 +1332,11 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1402
  """)
1403
  with gr.Column(scale=1):
1404
  gr.Markdown("""
1405
- #### 📻 Podcast Features
1406
- - **Length**: 12-15 professional exchanges
1407
- - **Style**: Expert discussions with data & insights
1408
- - **Languages**: English & Korean (한국어)
1409
- - **Input**: URL, PDF, or Keywords
1410
  """)
1411
 
1412
  # 메인 입력 섹션
@@ -1441,7 +1371,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1441
  # 키워드 입력
1442
  keyword_input = gr.Textbox(
1443
  label="🔍 Topic/Keyword",
1444
- placeholder="Enter a topic (e.g., 'AI trends 2024', '인공지능 최신 동향')",
1445
  value="",
1446
  visible=False,
1447
  info="System will search and compile latest information",
@@ -1452,10 +1382,16 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1452
  with gr.Column(scale=1):
1453
  # 언어 선택
1454
  language_selector = gr.Radio(
1455
- choices=["English", "Korean"],
 
 
 
 
 
 
1456
  value="English",
1457
- label="🌐 Language / 언어",
1458
- info="Output language"
1459
  )
1460
 
1461
  # 처리 모드
@@ -1493,7 +1429,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1493
  lines=25,
1494
  max_lines=50,
1495
  interactive=True,
1496
- placeholder="Professional podcast conversation will appear here...\n전문 팟캐스트 대화가 여기에 표시됩니다...",
1497
  info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
1498
  )
1499
 
@@ -1524,20 +1460,24 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1524
  gr.Markdown("""
1525
  #### 💡 Quick Tips:
1526
  - **URL**: Paste any article link
1527
- - **PDF**: Upload documents directly
1528
  - **Keyword**: Enter topics for AI research
 
1529
  - Edit conversation before audio generation
1530
- - Korean (한국어) fully supported
1531
  """)
1532
 
1533
  # 예제 섹션
1534
- with gr.Accordion("📚 Examples", open=False):
1535
  gr.Examples(
1536
  examples=[
1537
- ["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
1538
  ["quantum computing breakthroughs", "Keyword", "Local", "Edge-TTS", "English"],
1539
- ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
1540
  ["인공지능 윤리와 규제", "Keyword", "Local", "Edge-TTS", "Korean"],
 
 
 
 
1541
  ],
1542
  inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
1543
  outputs=[conversation_output, status_output],
@@ -1554,7 +1494,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1554
 
1555
  # 언어 변경 시 TTS 엔진 옵션 업데이트
1556
  language_selector.change(
1557
- fn=update_tts_engine_for_korean,
1558
  inputs=[language_selector],
1559
  outputs=[tts_selector]
1560
  )
@@ -1591,4 +1531,4 @@ if __name__ == "__main__":
1591
  share=False,
1592
  server_name="0.0.0.0",
1593
  server_port=7860
1594
- )
 
79
  BRAVE_KEY = os.getenv("BSEARCH_API")
80
  BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
81
 
82
+ # Edge TTS 전용 언어 목록 (English 제외)
83
+ EDGE_TTS_ONLY_LANGUAGES = [
84
+ "Korean", "Japanese", "French", "German", "Spanish", "Italian",
85
+ "Portuguese", "Dutch", "Thai", "Vietnamese", "Arabic", "Hebrew",
86
+ "Indonesian", "Hindi", "Russian", "Chinese", "Norwegian", "Swedish",
87
+ "Finnish", "Danish", "Polish", "Turkish", "Greek", "Czech"
88
+ ]
89
+
90
+ # 언어별 Edge TTS 음성 설정
91
+ EDGE_TTS_VOICES = {
92
+ "English": [
93
+ "en-US-AndrewMultilingualNeural", # 남성 음성 1
94
+ "en-US-BrianMultilingualNeural" # 남성 음성 2
95
+ ],
96
+ "Korean": [
97
+ "ko-KR-HyunsuNeural", # 남성 음성 1 (차분하고 신뢰감 있는)
98
+ "ko-KR-InJoonNeural" # 남성 음성 2 (활기차고 친근한)
99
+ ],
100
+ "Japanese": [
101
+ "ja-JP-KeitaNeural", # 남성 음성 1
102
+ "ja-JP-NanamiNeural" # 여성 음성 (백업용)
103
+ ],
104
+ "French": [
105
+ "fr-FR-HenriNeural", # 남성 음성 1
106
+ "fr-FR-DeniseNeural" # 여성 음성 (백업용)
107
+ ],
108
+ "German": [
109
+ "de-DE-ConradNeural", # 남성 음성 1
110
+ "de-DE-KillianNeural" # 남성 음성 2
111
+ ],
112
+ "Spanish": [
113
+ "es-ES-AlvaroNeural", # 남성 음성 1
114
+ "es-ES-ElviraNeural" # 여성 음성 (백업용)
115
+ ],
116
+ "Italian": [
117
+ "it-IT-DiegoNeural", # 남성 음성 1
118
+ "it-IT-IsabellaNeural" # 여성 음성 (백업용)
119
+ ],
120
+ "Portuguese": [
121
+ "pt-BR-AntonioNeural", # 남성 음성 1
122
+ "pt-BR-FranciscaNeural" # 여성 음성 (백업용)
123
+ ],
124
+ "Dutch": [
125
+ "nl-NL-MaartenNeural", # 남성 음성 1
126
+ "nl-NL-ColetteNeural" # 여성 음성 (백업용)
127
+ ],
128
+ "Thai": [
129
+ "th-TH-NiwatNeural", # 남성 음성 1
130
+ "th-TH-PremwadeeNeural" # 여성 음성 (백업용)
131
+ ],
132
+ "Vietnamese": [
133
+ "vi-VN-NamMinhNeural", # 남성 음성 1
134
+ "vi-VN-HoaiMyNeural" # 여성 음성 (백업용)
135
+ ],
136
+ "Arabic": [
137
+ "ar-SA-HamedNeural", # 남성 음성 1
138
+ "ar-SA-ZariyahNeural" # 여성 음성 (백업용)
139
+ ],
140
+ "Hebrew": [
141
+ "he-IL-AvriNeural", # 남성 음성 1
142
+ "he-IL-HilaNeural" # 여성 음성 (백업용)
143
+ ],
144
+ "Indonesian": [
145
+ "id-ID-ArdiNeural", # 남성 음성 1
146
+ "id-ID-GadisNeural" # 여성 음성 (백업용)
147
+ ],
148
+ "Hindi": [
149
+ "hi-IN-MadhurNeural", # 남성 음성 1
150
+ "hi-IN-SwaraNeural" # 여성 음성 (백업용)
151
+ ],
152
+ "Russian": [
153
+ "ru-RU-DmitryNeural", # 남성 음성 1
154
+ "ru-RU-SvetlanaNeural" # 여성 음성 (백업용)
155
+ ],
156
+ "Chinese": [
157
+ "zh-CN-YunxiNeural", # 남성 음성 1
158
+ "zh-CN-XiaoxiaoNeural" # 여성 음성 (백업용)
159
+ ],
160
+ "Norwegian": [
161
+ "nb-NO-FinnNeural", # 남성 음성 1
162
+ "nb-NO-PernilleNeural" # 여성 음성 (백업용)
163
+ ],
164
+ "Swedish": [
165
+ "sv-SE-MattiasNeural", # 남성 음성 1
166
+ "sv-SE-SofieNeural" # 여성 음성 (백업용)
167
+ ],
168
+ "Finnish": [
169
+ "fi-FI-HarriNeural", # 남성 음성 1
170
+ "fi-FI-NooraNeural" # 여성 음성 (백업용)
171
+ ],
172
+ "Danish": [
173
+ "da-DK-JeppeNeural", # 남성 음성 1
174
+ "da-DK-ChristelNeural" # 여성 음성 (백업용)
175
+ ],
176
+ "Polish": [
177
+ "pl-PL-MarekNeural", # 남성 음성 1
178
+ "pl-PL-ZofiaNeural" # 여성 음성 (백업용)
179
+ ],
180
+ "Turkish": [
181
+ "tr-TR-AhmetNeural", # 남성 음성 1
182
+ "tr-TR-EmelNeural" # 여성 음성 (백업용)
183
+ ],
184
+ "Greek": [
185
+ "el-GR-NestorasNeural", # 남성 음성 1
186
+ "el-GR-AthinaNeural" # 여성 음성 (백업용)
187
+ ],
188
+ "Czech": [
189
+ "cs-CZ-AntoninNeural", # 남성 음성 1
190
+ "cs-CZ-VlastaNeural" # 여성 음성 (백업용)
191
+ ]
192
+ }
193
+
194
  @dataclass
195
  class ConversationConfig:
196
  max_words: int = 8000 # 4000에서 6000으로 증가 (1.5배)
 
398
  return intro + compiled
399
 
400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  class UnifiedAudioConverter:
402
  def __init__(self, config: ConversationConfig):
403
  self.config = config
 
549
  else:
550
  return MessagesFormatterType.LLAMA_3
551
 
 
552
  def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
553
  """Build prompt for conversation generation with enhanced professional podcast style"""
554
  # 텍스트 길이 제한
 
556
  if len(text) > max_text_length:
557
  text = text[:max_text_length] + "..."
558
 
559
+ # 언어별 화자 이름 설정
560
  if language == "Korean":
561
+ speaker1, speaker2 = "준수", "민호"
562
+ elif language == "Japanese":
563
+ speaker1, speaker2 = "Hiroshi", "Takeshi"
564
+ elif language == "French":
565
+ speaker1, speaker2 = "Pierre", "Marc"
566
+ elif language == "German":
567
+ speaker1, speaker2 = "Klaus", "Stefan"
568
+ elif language == "Spanish":
569
+ speaker1, speaker2 = "Carlos", "Miguel"
570
+ elif language == "Italian":
571
+ speaker1, speaker2 = "Marco", "Giuseppe"
572
+ elif language == "Portuguese":
573
+ speaker1, speaker2 = "João", "Pedro"
574
+ elif language == "Dutch":
575
+ speaker1, speaker2 = "Jan", "Pieter"
576
+ elif language == "Thai":
577
+ speaker1, speaker2 = "Somchai", "Prasert"
578
+ elif language == "Vietnamese":
579
+ speaker1, speaker2 = "Minh", "Duc"
580
+ elif language == "Arabic":
581
+ speaker1, speaker2 = "Ahmed", "Mohammed"
582
+ elif language == "Hebrew":
583
+ speaker1, speaker2 = "David", "Michael"
584
+ elif language == "Indonesian":
585
+ speaker1, speaker2 = "Budi", "Andi"
586
+ elif language == "Hindi":
587
+ speaker1, speaker2 = "Raj", "Amit"
588
+ elif language == "Russian":
589
+ speaker1, speaker2 = "Alexei", "Dmitri"
590
+ elif language == "Chinese":
591
+ speaker1, speaker2 = "Wei", "Jun"
592
+ else: # English and others
593
+ speaker1, speaker2 = "Alex", "Jordan"
594
+
595
+ # 대화 템플릿 생성
596
+ template = "{\n \"conversation\": [\n"
597
+ for i in range(12): # 12번의 교환
598
+ template += f" {{\"speaker\": \"{speaker1 if i % 2 == 0 else speaker2}\", \"text\": \"\"}}"
599
+ if i < 11:
600
+ template += ","
601
+ template += "\n"
602
+ template += " ]\n}"
603
+
604
+ context_part = ""
605
+ if search_context:
606
+ if language == "Korean":
607
  context_part = f"# 최신 관련 정보:\n{search_context}\n"
608
+ else:
609
+ context_part = f"# Latest Information:\n{search_context}\n"
610
 
611
+ if language == "Korean":
612
  base_prompt = (
613
  f"# 원본 콘텐츠:\n{text}\n\n"
614
  f"{context_part}"
 
616
  f"## 핵심 지침:\n"
617
  f"1. **대화 스타일**: 전문적이면서도 이해하기 쉬운 팟캐스트 대담\n"
618
  f"2. **화자 역할**:\n"
619
+ f" - {speaker1}: 진행자/호스트 (핵심을 짚는 질문, 청취자 관점에서 궁금한 점 질문)\n"
620
+ f" - {speaker2}: 전문가 (깊이 있는 설명, 구체적 사례와 데이터 제시)\n"
621
  f"3. **중요한 답변 규칙**:\n"
622
+ f" - {speaker1}: 1-2문장의 명확한 질문\n"
623
+ f" - {speaker2}: **반드시 2-4문장으로 충실히 답변** (개념 설명 + 구체적 설명 + 예시나 함의)\n"
624
+ f"4. **전문성 요소**: 통계나 연구 결과 인용, 실제 사례와 케이스 스터디, 전문 용어를 쉽게 풀어서 설명\n"
625
+ f"5. **필수 규칙**: 서로 존댓말 사용, 12회 대화 교환\n\n"
 
 
 
 
 
626
  f"JSON 형식으로만 반환:\n{template}"
627
  )
 
 
 
628
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
  base_prompt = (
630
  f"# Content:\n{text}\n\n"
631
  f"{context_part}"
632
+ f"Create a professional and insightful podcast conversation in {language}.\n\n"
633
  f"## Key Guidelines:\n"
634
  f"1. **Style**: Professional yet accessible podcast discussion\n"
635
  f"2. **Roles**:\n"
636
+ f" - {speaker1}: Host (insightful questions, audience perspective)\n"
637
+ f" - {speaker2}: Expert (in-depth explanations, concrete examples and data)\n"
638
  f"3. **Critical Response Rules**:\n"
639
+ f" - {speaker1}: 1-2 sentence clear questions\n"
640
+ f" - {speaker2}: **Must answer in 2-4 sentences** (concept + detailed explanation + example/implication)\n"
641
+ f"4. **Professional Elements**: Cite statistics and research, real cases and case studies, explain technical terms clearly\n"
642
+ f"5. **Length**: 12 exchanges total\n\n"
 
 
 
 
 
643
  f"Return JSON only:\n{template}"
644
  )
645
+
646
+ return base_prompt
 
 
647
 
648
  def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
649
  """Build messages for local LLM with enhanced professional podcast style"""
650
  if language == "Korean":
651
  system_message = (
652
  "당신은 한국 최고의 전문 팟캐스트 작가입니다. "
653
+ "청취자들이 전문 지식을 쉽게 이해할 수 있는 고품질 대담을 만들어냅니다. "
654
+ "반드시 서로 존댓말을 사용하며, 전문적이면서도 친근한 톤을 유지합니다."
 
 
 
 
 
 
655
  )
656
  else:
657
  system_message = (
658
+ f"You are an expert podcast scriptwriter creating high-quality "
659
+ f"professional discussions in {language}. Make complex topics accessible "
660
+ f"while maintaining expertise and a professional yet approachable tone."
 
 
 
 
 
 
661
  )
662
 
663
  return [
 
687
  chat_template = self._get_messages_formatter_type(self.config.local_model_name)
688
  provider = LlamaCppPythonProvider(self.local_llm)
689
 
690
+ # 언어별 시스템 메시지
691
  if language == "Korean":
692
  system_message = (
693
  "당신은 한국의 유명 팟캐스트 전문 작가입니다. "
694
+ "청취자들이 깊이 있는 전문 지식을 얻을 수 있는 고품질 대담을 만듭니다. "
695
+ "반드시 서로 존댓말을 사용하며, 12회의 대화 교환으로 구성하세요. "
696
+ "JSON 형식으로만 응답하세요."
 
 
 
 
 
 
 
 
697
  )
698
  else:
699
  system_message = (
700
+ f"You are a professional podcast scriptwriter creating high-quality, "
701
+ f"insightful discussions in {language}. Create exactly 12 conversation exchanges "
702
+ f"with professional expertise. Respond only in JSON format."
 
 
 
 
 
 
 
 
 
703
  )
704
 
705
  agent = LlamaCppAgent(
 
710
  )
711
 
712
  settings = provider.get_provider_default_settings()
713
+ settings.temperature = 0.75
714
  settings.top_k = 40
715
  settings.top_p = 0.95
716
+ settings.max_tokens = self.config.max_tokens
717
  settings.repeat_penalty = 1.1
718
  settings.stream = False
719
 
 
734
 
735
  if json_match:
736
  conversation_data = json.loads(json_match.group())
 
 
 
 
737
  return conversation_data
738
  else:
739
  raise ValueError("No valid JSON found in local LLM response")
 
744
 
745
  @spaces.GPU(duration=120)
746
  def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
747
+ """Extract conversation using legacy local model"""
748
  try:
749
  self.initialize_legacy_local_mode()
750
 
751
+ # 언어별 시스템 메시지
752
  if language == "Korean":
753
  system_message = (
754
  "당신은 전문 팟캐스트 작가입니다. "
755
+ "12회의 대화 교환으로 구성된 전문적인 대담을 만드세요."
 
 
756
  )
757
  else:
758
  system_message = (
759
+ f"You are a professional podcast scriptwriter. "
760
+ f"Create a professional dialogue in {language} with 12 exchanges."
 
 
761
  )
762
 
763
  chat = [
 
782
  generate_kwargs = dict(
783
  model_inputs,
784
  streamer=streamer,
785
+ max_new_tokens=self.config.max_new_tokens,
786
  do_sample=True,
787
  temperature=0.75,
788
  eos_token_id=terminators,
 
805
 
806
  except Exception as e:
807
  print(f"Legacy local model also failed: {e}")
808
+ return self._get_default_conversation(language)
809
+
810
+ def _get_default_conversation(self, language: str) -> Dict:
811
+ """언어별 기본 대화 템플릿"""
812
+ if language == "Korean":
813
+ return self._get_default_korean_conversation()
814
+ else:
815
+ return self._get_default_english_conversation()
816
 
817
  def _get_default_korean_conversation(self) -> Dict:
818
+ """기본 한국어 대화 템플릿"""
819
  return {
820
  "conversation": [
821
  {"speaker": "준수", "text": "안녕하세요, 여러분! 오늘은 정말 중요하고 흥미로운 주제를 다뤄보려고 합니다. 민호 박사님, 먼저 이 주제가 왜 지금 이렇게 주목받고 있는지 설명해주시겠어요?"},
 
831
  {"speaker": "준수", "text": "실용적인 조언 감사합니다. 마지막으로 이 분야의 미래 전망은 어떻게 보시나요?"},
832
  {"speaker": "민호", "text": "향후 10년은 인류 역사상 가장 급격한 기술 발전을 경험하는 시기가 될 것입니다. 가트너의 하이프 사이클 분석에 따르면, 현재 우리는 이 기술의 초기 단계에 불과합니다. 2030년까지는 지금으로서는 상상하기 어려운 수준의 혁신이 일어날 것으로 예상됩니다. 중요한 것은 이런 변화를 두려워하기보다는 기회로 삼아 더 나은 미래를 만들어가는 것이라고 생각합니다."},
833
  {"speaker": "준수", "text": "정말 통찰력 있�� 말씀이네요. 오늘 너무나 유익한 시간이었습니다. 청취자 여러분도 오늘 논의된 내용을 바탕으로 미래를 준비하시길 바랍니다. 민호 박사님, 귀중한 시간 내주셔서 감사합니다!"},
834
+ {"speaker": "민호", "text": "감사합니다. 청취자 여러분들이 이 변화의 시대를 현명하게 헤쳐나가시길 바랍니다. 기술은 도구일 뿐이고, 그것을 어떻게 활용하는지는 우리에게 달려있다는 점을 기억해주세요."}
835
  ]
836
  }
837
 
838
  def _get_default_english_conversation(self) -> Dict:
839
+ """기본 영어 대화 템플릿"""
840
  return {
841
  "conversation": [
842
  {"speaker": "Alex", "text": "Welcome everyone to our podcast! Today we're diving into a topic that's reshaping our world. Dr. Jordan, could you start by explaining why this subject has become so critical right now?"},
 
844
  {"speaker": "Alex", "text": "400% acceleration is staggering! What does this mean for everyday people who might not be tech-savvy?"},
845
  {"speaker": "Jordan", "text": "The impact will be profound yet accessible. Think about how smartphones revolutionized communication - this will be similar but across every aspect of life. McKinsey's latest report projects that by 2026, these technologies will create $4.4 trillion in annual value globally. For individuals, this translates to personalized healthcare that can predict illnesses years in advance, educational systems that adapt to each student's learning style, and financial tools that democratize wealth-building strategies previously available only to the ultra-wealthy."},
846
  {"speaker": "Alex", "text": "Those applications sound transformative. Can you give us a concrete example of how this is already being implemented?"},
847
+ {"speaker": "Jordan", "text": "Absolutely. Let me share a compelling case from Johns Hopkins Hospital. They've deployed an AI system that analyzes patient data in real-time, reducing diagnostic errors by 85% and cutting average diagnosis time from days to hours. In one documented case, the system identified a rare genetic disorder in a child that had been misdiagnosed for three years. The accuracy comes from analyzing patterns across millions of cases - something impossible for even the most experienced doctors to do manually."},
848
  {"speaker": "Alex", "text": "That's truly life-changing technology. But I imagine there are significant challenges and risks we need to consider?"},
849
+ {"speaker": "Jordan", "text": "You're absolutely right to raise this. The challenges are as significant as the opportunities. The World Economic Forum identifies three critical risks: algorithmic bias could perpetuate existing inequalities, cybersecurity threats become exponentially more dangerous, and there's the socioeconomic disruption with PwC estimating that 30% of jobs could be automated by 2030. However, history shows us that technological revolutions create new opportunities even as they displace old ones. The key is proactive adaptation and responsible development."},
850
  {"speaker": "Alex", "text": "How should individuals and organizations prepare for these changes?"},
851
+ {"speaker": "Jordan", "text": "Preparation requires a multi-faceted approach. For individuals, I recommend focusing on skills that complement rather than compete with AI: critical thinking, emotional intelligence, and creative problem-solving. MIT's recent study shows that professionals who combine domain expertise with AI literacy see salary increases of 40% on average. Organizations need to invest in continuous learning programs - Amazon's $700 million worker retraining initiative is a good model. Most importantly, we need to cultivate an adaptive mindset."},
852
  {"speaker": "Alex", "text": "That's practical advice. What about the ethical considerations? How do we ensure this technology benefits humanity as a whole?"},
853
+ {"speaker": "Jordan", "text": "Ethics must be at the forefront of development. The EU's AI Act and similar regulations worldwide are establishing important guardrails. We need transparent AI systems where decisions can be explained and audited. Companies like IBM and Google have established AI ethics boards, but we need industry-wide standards. Additionally, we must address the digital divide - UNESCO reports that 37% of the global population still lacks internet access. Without inclusive development, these technologies could exacerbate global inequality."},
854
  {"speaker": "Alex", "text": "Looking ahead, what's your vision for how this technology will shape the next decade?"},
855
+ {"speaker": "Jordan", "text": "The next decade will be transformative beyond our current imagination. By 2035, I expect we'll see autonomous systems managing entire cities, personalized medicine extending human lifespan by 20-30 years, and educational AI that makes world-class education universally accessible. The convergence of AI with quantum computing, biotechnology, and nanotechnology will unlock possibilities we can barely conceive of today. However, the future isn't predetermined - it's shaped by the choices we make now about development priorities and ethical frameworks."},
 
 
856
  {"speaker": "Alex", "text": "Dr. Jordan, this has been an incredibly enlightening discussion. Thank you for sharing your expertise and insights with us today."},
857
+ {"speaker": "Jordan", "text": "Thank you, Alex. For listeners wanting to dive deeper, I've compiled additional resources on my website. Remember, the future isn't something that happens to us - it's something we create together. I look forward to seeing how each of you contributes to shaping this exciting new era."}
858
  ]
859
  }
860
 
861
  def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
862
+ """Extract conversation using API"""
863
  if not self.llm_client:
864
  raise RuntimeError("API mode not initialized")
865
 
 
876
  except Exception as e:
877
  print(f"Search failed, continuing without context: {e}")
878
 
879
+ # 언어별 시스템 메시지
880
  if language == "Korean":
881
  system_message = (
882
  "당신은 한국의 최고 전문 팟캐스트 작가입니다. "
883
+ "12회의 깊이 있는 대화 교환으로 구성된 고품질 대담을 만드세요. "
884
+ "반드시 서로 존댓말을 사용하세요."
 
 
 
 
885
  )
886
  else:
887
  system_message = (
888
+ f"You are a top professional podcast scriptwriter. "
889
+ f"Create high-quality discussions in {language} with exactly 12 exchanges. "
890
+ f"Include specific data, research findings, and real cases."
 
 
 
 
891
  )
892
 
893
  chat_completion = self.llm_client.chat.completions.create(
 
930
  filenames = []
931
 
932
  try:
933
+ # 언어별 음성 설정
934
+ voices = EDGE_TTS_VOICES.get(language, EDGE_TTS_VOICES["English"])
 
 
 
 
 
 
 
 
 
935
 
936
  for i, turn in enumerate(conversation_json["conversation"]):
937
  filename = output_dir / f"output_{i}.wav"
 
982
  # Create different voice characteristics for different speakers
983
  if language == "Korean":
984
  voice_configs = [
985
+ {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 준수입니다.", "gender": "male"},
986
+ {"prompt_text": "안녕하세요, 저는 오늘 이 주제에 대해 설명드릴 민호입니다.", "gender": "male"}
987
  ]
988
  else:
989
  voice_configs = [
990
+ {"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast.", "gender": "male"},
991
+ {"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights with you.", "gender": "male"}
992
  ]
993
 
994
  for i, turn in enumerate(conversation_json["conversation"]):
 
996
  if not text.strip():
997
  continue
998
 
 
999
  voice_config = voice_configs[i % len(voice_configs)]
 
1000
  output_file = os.path.join(output_dir, f"spark_output_{i}.wav")
1001
 
 
1002
  cmd = [
1003
  "python", "-m", "cli.inference",
1004
  "--text", text,
 
1010
  ]
1011
 
1012
  try:
 
1013
  result = subprocess.run(
1014
  cmd,
1015
  capture_output=True,
1016
  text=True,
1017
  timeout=60,
1018
+ cwd="."
1019
  )
1020
 
1021
  if result.returncode == 0:
1022
  audio_files.append(output_file)
1023
  else:
1024
  print(f"Spark TTS error for turn {i}: {result.stderr}")
1025
+ silence = np.zeros(int(22050 * 1.0))
 
1026
  sf.write(output_file, silence, 22050)
1027
  audio_files.append(output_file)
1028
 
1029
  except subprocess.TimeoutExpired:
1030
  print(f"Spark TTS timeout for turn {i}")
 
1031
  silence = np.zeros(int(22050 * 1.0))
1032
  sf.write(output_file, silence, 22050)
1033
  audio_files.append(output_file)
1034
  except Exception as e:
1035
  print(f"Error running Spark TTS for turn {i}: {e}")
 
1036
  silence = np.zeros(int(22050 * 1.0))
1037
  sf.write(output_file, silence, 22050)
1038
  audio_files.append(output_file)
 
1044
  else:
1045
  raise RuntimeError("No audio files generated")
1046
 
 
1047
  conversation_text = "\n".join(
1048
  f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
1049
  for i, turn in enumerate(conversation_json["conversation"])
 
1069
  speaker = speakers[i % 2]
1070
  speaker_id = self.melo_models["EN"].hps.data.spk2id[speaker]
1071
 
 
1072
  self.melo_models["EN"].tts_to_file(
1073
  text, speaker_id, bio, speed=1.0,
1074
  pbar=progress.tqdm if progress else None,
 
1079
  audio_segment = AudioSegment.from_file(bio, format="wav")
1080
  combined_audio += audio_segment
1081
 
 
1082
  final_audio_path = "melo_podcast.mp3"
1083
  combined_audio.export(final_audio_path, format="mp3")
1084
 
 
1085
  conversation_text = "\n".join(
1086
  f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
1087
  for i, turn in enumerate(conversation_json["conversation"])
 
1140
  else: # Keyword
1141
  if not article_input or not isinstance(article_input, str):
1142
  return "Please provide a keyword or topic.", None
 
1143
  text = search_and_compile_content(article_input, language)
1144
+ text = f"Keyword-based content:\n{text}"
1145
+
1146
  # Limit text to max words
1147
  words = text.split()
1148
  if len(words) > converter.config.max_words:
 
1150
 
1151
  # Extract conversation based on mode
1152
  if mode == "Local":
 
1153
  try:
1154
  conversation_json = converter.extract_conversation_local(text, language)
1155
  except Exception as e:
1156
  print(f"Local mode failed: {e}, trying API fallback")
 
1157
  api_key = os.environ.get("TOGETHER_API_KEY")
1158
  if api_key:
1159
  converter.initialize_api_mode(api_key)
1160
  conversation_json = converter.extract_conversation_api(text, language)
1161
  else:
1162
  raise RuntimeError("Local mode failed and no API key available for fallback")
1163
+ else: # API mode
1164
  api_key = os.environ.get("TOGETHER_API_KEY")
1165
  if not api_key:
1166
  print("API key not found, falling back to local mode")
 
1191
  return "Please provide conversation text.", None
1192
 
1193
  try:
 
1194
  conversation_json = converter.parse_conversation_text(conversation_text)
1195
 
1196
  if not conversation_json["conversation"]:
1197
  return "No valid conversation found in the text.", None
1198
 
1199
+ # Edge TTS 전용 언어는 자동으로 Edge-TTS 사용
1200
+ if language in EDGE_TTS_ONLY_LANGUAGES and tts_engine != "Edge-TTS":
1201
+ tts_engine = "Edge-TTS"
1202
 
1203
  # Generate audio based on TTS engine
1204
  if tts_engine == "Edge-TTS":
 
1211
  else: # MeloTTS
1212
  if not MELO_AVAILABLE:
1213
  return "MeloTTS not available. Please install required dependencies.", None
1214
+ if language in EDGE_TTS_ONLY_LANGUAGES:
1215
+ return f"MeloTTS does not support {language}. Please use Edge-TTS for this language.", None
1216
  converter.initialize_melo_tts()
1217
  output_file, _ = converter.text_to_speech_melo(conversation_json)
1218
 
 
1232
  return asyncio.run(regenerate_audio(conversation_text, tts_engine, language))
1233
 
1234
 
1235
+ def update_tts_engine_for_language(language):
1236
+ """언어별 TTS 엔진 옵션 업데이트"""
1237
+ if language in EDGE_TTS_ONLY_LANGUAGES:
1238
+ language_info = {
1239
+ "Korean": "한국어는 Edge-TTS만 지원됩니다",
1240
+ "Japanese": "日本語はEdge-TTSのみサポートされています",
1241
+ "French": "Le français n'est pris en charge que par Edge-TTS",
1242
+ "German": "Deutsch wird nur von Edge-TTS unterstützt",
1243
+ "Spanish": "El español solo es compatible con Edge-TTS",
1244
+ "Italian": "L'italiano è supportato solo da Edge-TTS",
1245
+ "Portuguese": "O português é suportado apenas pelo Edge-TTS",
1246
+ "Dutch": "Nederlands wordt alleen ondersteund door Edge-TTS",
1247
+ "Thai": "ภาษาไทยรองรับเฉพาะ Edge-TTS เท่านั้น",
1248
+ "Vietnamese": "Tiếng Việt chỉ được hỗ trợ bởi Edge-TTS",
1249
+ "Arabic": "العربية مدعومة فقط من Edge-TTS",
1250
+ "Hebrew": "עברית נתמכת רק על ידי Edge-TTS",
1251
+ "Indonesian": "Bahasa Indonesia hanya didukung oleh Edge-TTS",
1252
+ "Hindi": "हिंदी केवल Edge-TTS द्वारा समर्थित है",
1253
+ "Russian": "Русский поддерживается только Edge-TTS",
1254
+ "Chinese": "中文仅支持Edge-TTS"
1255
+ }
1256
+ info_text = language_info.get(language, f"{language} is only supported by Edge-TTS")
1257
+
1258
  return gr.Radio(
1259
  choices=["Edge-TTS"],
1260
  value="Edge-TTS",
1261
  label="TTS Engine",
1262
+ info=info_text,
1263
  interactive=False
1264
  )
1265
  else:
 
1295
  print(f"Failed to download model at startup: {e}")
1296
 
1297
 
1298
+ # Gradio Interface - 개선된 다국어 레이아웃
1299
  with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
1300
  .container {max-width: 1200px; margin: auto; padding: 20px;}
1301
  .header-text {text-align: center; margin-bottom: 30px;}
 
1307
  # 헤더
1308
  with gr.Row(elem_classes="header-text"):
1309
  gr.Markdown("""
1310
+ # 🎙️ AI Podcast Generator - Professional Multi-Language Edition
1311
+ ### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation in 24+ languages!
1312
  """)
1313
 
1314
  with gr.Row(elem_classes="discord-badge"):
 
1320
  </p>
1321
  """)
1322
 
 
 
1323
  # 상태 표시 섹션
1324
  with gr.Row():
1325
  with gr.Column(scale=1):
 
1332
  """)
1333
  with gr.Column(scale=1):
1334
  gr.Markdown("""
1335
+ #### 🌍 Multi-Language Support
1336
+ - **24+ Languages**: Korean, Japanese, French, German, Spanish, Italian, etc.
1337
+ - **Native Voices**: Optimized for each language
1338
+ - **Professional Style**: Expert discussions with data & insights
1339
+ - **Auto-TTS Selection**: Best engine per language
1340
  """)
1341
 
1342
  # 메인 입력 섹션
 
1371
  # 키워드 입력
1372
  keyword_input = gr.Textbox(
1373
  label="🔍 Topic/Keyword",
1374
+ placeholder="Enter a topic (e.g., 'AI trends 2024', '인공지능', 'IA tendances', 'KI Trends')",
1375
  value="",
1376
  visible=False,
1377
  info="System will search and compile latest information",
 
1382
  with gr.Column(scale=1):
1383
  # 언어 선택
1384
  language_selector = gr.Radio(
1385
+ choices=[
1386
+ "English", "Korean", "Japanese", "French", "German",
1387
+ "Spanish", "Italian", "Portuguese", "Dutch", "Thai",
1388
+ "Vietnamese", "Arabic", "Hebrew", "Indonesian", "Hindi",
1389
+ "Russian", "Chinese", "Norwegian", "Swedish", "Finnish",
1390
+ "Danish", "Polish", "Turkish", "Greek", "Czech"
1391
+ ],
1392
  value="English",
1393
+ label="🌐 Language / 언어 / 语言",
1394
+ info="Select podcast language"
1395
  )
1396
 
1397
  # 처리 모드
 
1429
  lines=25,
1430
  max_lines=50,
1431
  interactive=True,
1432
+ placeholder="Professional podcast conversation will appear here...\n전문 팟캐스트 대화가 여기에 표시됩니다...\nLa conversation professionnelle du podcast apparaîtra ici...",
1433
  info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
1434
  )
1435
 
 
1460
  gr.Markdown("""
1461
  #### 💡 Quick Tips:
1462
  - **URL**: Paste any article link
1463
+ - **PDF**: Upload documents directly
1464
  - **Keyword**: Enter topics for AI research
1465
+ - **24+ Languages** fully supported
1466
  - Edit conversation before audio generation
1467
+ - Auto TTS engine selection per language
1468
  """)
1469
 
1470
  # 예제 섹션
1471
+ with gr.Accordion("📚 Multi-Language Examples", open=False):
1472
  gr.Examples(
1473
  examples=[
1474
+ ["https://huggingface.co/blog/openfreeai/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
1475
  ["quantum computing breakthroughs", "Keyword", "Local", "Edge-TTS", "English"],
 
1476
  ["인공지능 윤리와 규제", "Keyword", "Local", "Edge-TTS", "Korean"],
1477
+ ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Japanese"],
1478
+ ["intelligence artificielle tendances", "Keyword", "Local", "Edge-TTS", "French"],
1479
+ ["künstliche intelligenz entwicklung", "Keyword", "Local", "Edge-TTS", "German"],
1480
+ ["inteligencia artificial avances", "Keyword", "Local", "Edge-TTS", "Spanish"],
1481
  ],
1482
  inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
1483
  outputs=[conversation_output, status_output],
 
1494
 
1495
  # 언어 변경 시 TTS 엔진 옵션 업데이트
1496
  language_selector.change(
1497
+ fn=update_tts_engine_for_language,
1498
  inputs=[language_selector],
1499
  outputs=[tts_selector]
1500
  )
 
1531
  share=False,
1532
  server_name="0.0.0.0",
1533
  server_port=7860
1534
+ )