Live-Podcast

Running on Zero

App Files Files Community

openfree commited on May 26

Commit

cd31bc6

verified ·

1 Parent(s): 44ed247

Update app.py

Browse files

Files changed (1) hide show

app.py +291 -351

app.py CHANGED Viewed

@@ -79,6 +79,118 @@ load_dotenv()
 BRAVE_KEY = os.getenv("BSEARCH_API")
 BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
 @dataclass
 class ConversationConfig:
     max_words: int = 8000  # 4000에서 6000으로 증가 (1.5배)
@@ -286,126 +398,6 @@ These factors are making the future of {keyword} increasingly promising.
     return intro + compiled
-def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
-    """Build prompt for conversation generation with enhanced radio talk show style"""
-    # 텍스트 길이 제한
-    max_text_length = 4500 if search_context else 6000
-    if len(text) > max_text_length:
-        text = text[:max_text_length] + "..."
-    if language == "Korean":
-        # 대화 템플릿을 더 많은 턴으로 확장 (15-20회)
-        template = """
-        {
-            "conversation": [
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""},
-                {"speaker": "준수", "text": ""},
-                {"speaker": "민호", "text": ""}
-            ]
-        }
-        """
-        context_part = ""
-        if search_context:
-            context_part = f"# 최신 관련 정보:\n{search_context}\n"
-        base_prompt = (
-            f"# 원본 콘텐츠:\n{text}\n\n"
-            f"{context_part}"
-            f"위 내용으로 전문적이고 심층적인 라디오 팟캐스트 대담 프로그램 대본을 작성해주세요.\n\n"
-            f"## 필수 요구사항:\n"
-            f"1. **최소 18회 이상의 대화 교환** (준수 9회, 민호 9회 이상)\n"
-            f"2. **대화 스타일**: 전문적이고 깊이 있는 팟캐스트 대담\n"
-            f"3. **화자 역할**:\n"
-            f"   - 준수: 진행자 (통찰력 있는 질문, 핵심 포인트 정리, 청취자 관점 대변)\n"
-            f"   - 민호: 전문가 (상세하고 전문적인 설명, 구체적 예시, 데이터 기반 분석)\n"
-            f"4. **답변 규칙**:\n"
-            f"   - 준수: 1-2문장의 명확한 질문이나 요약\n"
-            f"   - 민호: **반드시 2-4문장으로 충실하게 답변** (핵심 개념 설명 + 부연 설명 + 예시/근거)\n"
-            f"   - 전문 용어는 쉽게 풀어서 설명\n"
-            f"   - 구체적인 수치, 사례, 연구 결과 인용\n"
-            f"5. **내용 구성**:\n"
-            f"   - 도입부 (2-3회): 주제의 중요성과 배경 설명\n"
-            f"   - 전개부 (12-14회): 핵심 내용을 다각도로 심층 분석\n"
-            f"   - 마무리 (2-3회): 핵심 요약과 미래 전망\n"
-            f"6. **전문성**: 학술적 근거와 실무적 통찰을 균형있게 포함\n"
-            f"7. **필수**: 서로 존댓말 사용, 청취자가 전문 지식을 얻을 수 있도록 상세히 설명\n\n"
-            f"반드시 위 JSON 형식으로 18회 이상의 전문적인 대화를 작성하세요:\n{template}"
-        )
-        return base_prompt
-    else:
-        # 영어 템플릿도 확장
-        template = """
-        {
-            "conversation": [
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""},
-                {"speaker": "Alex", "text": ""},
-                {"speaker": "Jordan", "text": ""}
-            ]
-        }
-        """
-        context_part = ""
-        if search_context:
-            context_part = f"# Latest Information:\n{search_context}\n"
-        base_prompt = (
-            f"# Content:\n{text}\n\n"
-            f"{context_part}"
-            f"Create a professional and in-depth podcast conversation.\n\n"
-            f"## Requirements:\n"
-            f"1. **Minimum 18 conversation exchanges** (Alex 9+, Jordan 9+)\n"
-            f"2. **Style**: Professional, insightful podcast discussion\n"
-            f"3. **Roles**:\n"
-            f"   - Alex: Host (insightful questions, key point summaries, audience perspective)\n"
-            f"   - Jordan: Expert (detailed explanations, concrete examples, data-driven analysis)\n"
-            f"4. **Response Rules**:\n"
-            f"   - Alex: 1-2 sentence clear questions or summaries\n"
-            f"   - Jordan: **Must answer in 2-4 sentences** (core concept + elaboration + example/evidence)\n"
-            f"   - Explain technical terms clearly\n"
-            f"   - Include specific data, cases, research findings\n"
-            f"5. **Structure**:\n"
-            f"   - Introduction (2-3 exchanges): Topic importance and context\n"
-            f"   - Main content (12-14 exchanges): Multi-angle deep analysis\n"
-            f"   - Conclusion (2-3 exchanges): Key takeaways and future outlook\n"
-            f"6. **Expertise**: Balance academic rigor with practical insights\n\n"
-            f"Create exactly 18+ professional exchanges in this JSON format:\n{template}"
-        )
-        return base_prompt
 class UnifiedAudioConverter:
     def __init__(self, config: ConversationConfig):
         self.config = config
@@ -557,7 +549,6 @@ class UnifiedAudioConverter:
         else:
             return MessagesFormatterType.LLAMA_3
     def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
         """Build prompt for conversation generation with enhanced professional podcast style"""
         # 텍스트 길이 제한
@@ -565,31 +556,59 @@ class UnifiedAudioConverter:
         if len(text) > max_text_length:
             text = text[:max_text_length] + "..."
         if language == "Korean":
-            # 대화 템플릿을 더 많은 턴으로 확장
-            template = """
-            {
-                "conversation": [
-                    {"speaker": "준수", "text": ""},
-                    {"speaker": "민호", "text": ""},
-                    {"speaker": "준수", "text": ""},
-                    {"speaker": "민호", "text": ""},
-                    {"speaker": "준수", "text": ""},
-                    {"speaker": "민호", "text": ""},
-                    {"speaker": "준수", "text": ""},
-                    {"speaker": "민호", "text": ""},
-                    {"speaker": "준수", "text": ""},
-                    {"speaker": "민호", "text": ""},
-                    {"speaker": "준수", "text": ""},
-                    {"speaker": "민호", "text": ""}
-                ]
-            }
-            """
-            context_part = ""
-            if search_context:
                 context_part = f"# 최신 관련 정보:\n{search_context}\n"
             base_prompt = (
                 f"# 원본 콘텐츠:\n{text}\n\n"
                 f"{context_part}"
@@ -597,99 +616,48 @@ class UnifiedAudioConverter:
                 f"## 핵심 지침:\n"
                 f"1. **대화 스타일**: 전문적이면서도 이해하기 쉬운 팟캐스트 대담\n"
                 f"2. **화자 역할**:\n"
-                f"   - 준수: 진행자/호스트 (핵심을 짚는 질문, 청취자 관점에서 궁금한 점 질문)\n"
-                f"   - 민호: 전문가 (깊이 있는 설명, 구체적 사례와 데이터 제시)\n"
                 f"3. **중요한 답변 규칙**:\n"
-                f"   - 준수: 1-2문장의 명확한 질문 (\"그렇다면 구체적으로 어떤 의미인가요?\", \"실제 사례를 들어주시겠어요?\")\n"
-                f"   - 민호: **반드시 2-4문장으로 충실�� 답변** (개념 설명 + 구체적 설명 + 예시나 함의)\n"
-                f"   - 예: \"이것은 ~를 의미합니다. 구체적으로 ~한 측면에서 중요한데요. 실제로 최근 ~한 사례가 있었고, 이는 ~를 보여줍니다.\"\n"
-                f"4. **전문성 요소**:\n"
-                f"   - 통계나 연구 결과 인용\n"
-                f"   - 실제 사례와 케이스 스터디\n"
-                f"   - 전문 용어를 쉽게 풀어서 설명\n"
-                f"   - 다양한 관점과 시각 제시\n"
-                f"5. **필수 규칙**: 서로 존댓말 사용, 12-15회 대화 교환\n\n"
                 f"JSON 형식으로만 반환:\n{template}"
             )
-            return base_prompt
         else:
-            # 영어 템플릿도 확장
-            template = """
-            {
-                "conversation": [
-                    {"speaker": "Alex", "text": ""},
-                    {"speaker": "Jordan", "text": ""},
-                    {"speaker": "Alex", "text": ""},
-                    {"speaker": "Jordan", "text": ""},
-                    {"speaker": "Alex", "text": ""},
-                    {"speaker": "Jordan", "text": ""},
-                    {"speaker": "Alex", "text": ""},
-                    {"speaker": "Jordan", "text": ""},
-                    {"speaker": "Alex", "text": ""},
-                    {"speaker": "Jordan", "text": ""},
-                    {"speaker": "Alex", "text": ""},
-                    {"speaker": "Jordan", "text": ""}
-                ]
-            }
-            """
-            context_part = ""
-            if search_context:
-                context_part = f"# Latest Information:\n{search_context}\n"
             base_prompt = (
                 f"# Content:\n{text}\n\n"
                 f"{context_part}"
-                f"Create a professional and insightful podcast conversation.\n\n"
                 f"## Key Guidelines:\n"
                 f"1. **Style**: Professional yet accessible podcast discussion\n"
                 f"2. **Roles**:\n"
-                f"   - Alex: Host (insightful questions, audience perspective)\n"
-                f"   - Jordan: Expert (in-depth explanations, concrete examples and data)\n"
                 f"3. **Critical Response Rules**:\n"
-                f"   - Alex: 1-2 sentence clear questions (\"Could you elaborate on that?\", \"What's a real-world example?\")\n"
-                f"   - Jordan: **Must answer in 2-4 sentences** (concept + detailed explanation + example/implication)\n"
-                f"   - Example: \"This refers to... Specifically, it's important because... For instance, recent studies show... This demonstrates...\"\n"
-                f"4. **Professional Elements**:\n"
-                f"   - Cite statistics and research\n"
-                f"   - Real cases and case studies\n"
-                f"   - Explain technical terms clearly\n"
-                f"   - Present multiple perspectives\n"
-                f"5. **Length**: 12-15 exchanges total\n\n"
                 f"Return JSON only:\n{template}"
             )
-            return base_prompt
     def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
         """Build messages for local LLM with enhanced professional podcast style"""
         if language == "Korean":
             system_message = (
                 "당신은 한국 최고의 전문 팟캐스트 작가입니다. "
-                "청취자들이 전문 지식을 쉽게 이해할 수 있는 고품질 대담을 만들어냅니다.\n\n"
-                "핵심 원칙:\n"
-                "1. 진행자(준수)는 핵심을 짚는 통찰력 있는 질문으로 대화를 이끌어갑니다\n"
-                "2. 전문가(민호)는 반드시 2-4문��으로 깊이 있게 답변합니다 (개념+설명+예시)\n"
-                "3. 구체적인 데이터, 연구 결과, 실제 사례를 포함합니다\n"
-                "4. 전문 용어는 쉽게 풀어서 설명하되, 정확성을 유지합니다\n"
-                "5. 다양한 관점을 제시하여 균형잡힌 시각을 제공합니다\n"
-                "6. 반드시 서로 존댓말을 사용하며, 전문적이면서도 친근한 톤을 유지합니다"
             )
         else:
             system_message = (
-                "You are an expert podcast scriptwriter who creates high-quality, "
-                "professional discussions that make complex topics accessible.\n\n"
-                "Key principles:\n"
-                "1. The host (Alex) asks insightful questions that drive the conversation\n"
-                "2. The expert (Jordan) MUST answer in 2-4 sentences (concept+explanation+example)\n"
-                "3. Include specific data, research findings, and real cases\n"
-                "4. Explain technical terms clearly while maintaining accuracy\n"
-                "5. Present multiple perspectives for balanced views\n"
-                "6. Maintain a professional yet approachable tone"
             )
         return [
@@ -719,36 +687,19 @@ class UnifiedAudioConverter:
             chat_template = self._get_messages_formatter_type(self.config.local_model_name)
             provider = LlamaCppPythonProvider(self.local_llm)
-            # 강화된 전문 팟캐스트 스타일 시스템 메시지
             if language == "Korean":
                 system_message = (
                     "당신은 한국의 유명 팟캐스트 전문 작가입니다. "
-                    "청취자들이 깊이 있는 전문 지식을 얻을 수 있는 고품질 대담을 만듭니다.\n\n"
-                    "작성 규칙:\n"
-                    "1. 진행자(준수)는 핵심을 짚는 1-2문장 질문을 합니다\n"
-                    "2. 전문가(민호)는 반드시 2-4문장으로 충실히 답변합니다:\n"
-                    "   - 첫 문장: 핵심 개념 설명\n"
-                    "   - 둘째 문장: 구체적인 설명이나 맥락\n"
-                    "   - 셋째-넷째 문장: 실제 예시, 데이터, 함의\n"
-                    "3. 통계, 연구 결과, 실제 사례를 적극 활용하세요\n"
-                    "4. 전문성을 유지하면서도 이해하기 쉽게 설명하세요\n"
-                    "5. 12-15회의 대화 교환으로 구성하세요\n"
-                    "6. JSON 형식으로만 응답하세요"
                 )
             else:
                 system_message = (
-                    "You are a professional podcast scriptwriter creating high-quality, "
-                    "insightful discussions that provide deep expertise to listeners.\n\n"
-                    "Writing rules:\n"
-                    "1. Host (Alex) asks focused 1-2 sentence questions\n"
-                    "2. Expert (Jordan) MUST answer in 2-4 substantial sentences:\n"
-                    "   - First sentence: Core concept explanation\n"
-                    "   - Second sentence: Specific details or context\n"
-                    "   - Third-fourth sentences: Real examples, data, implications\n"
-                    "3. Actively use statistics, research findings, real cases\n"
-                    "4. Maintain expertise while keeping explanations accessible\n"
-                    "5. Create 12-15 conversation exchanges\n"
-                    "6. Respond only in JSON format"
                 )
             agent = LlamaCppAgent(
@@ -759,10 +710,10 @@ class UnifiedAudioConverter:
             )
             settings = provider.get_provider_default_settings()
-            settings.temperature = 0.75  # 약간 낮춰서 더 일관된 전문적 답변
             settings.top_k = 40
             settings.top_p = 0.95
-            settings.max_tokens = self.config.max_tokens  # 증가된 토큰 수 사용
             settings.repeat_penalty = 1.1
             settings.stream = False
@@ -783,10 +734,6 @@ class UnifiedAudioConverter:
             if json_match:
                 conversation_data = json.loads(json_match.group())
-                # 대화 길이 확인 및 조정
-                if len(conversation_data["conversation"]) < self.config.min_conversation_turns:
-                    print(f"Conversation too short ({len(conversation_data['conversation'])} turns), regenerating...")
-                    # 재시도 로직 추가 가능
                 return conversation_data
             else:
                 raise ValueError("No valid JSON found in local LLM response")
@@ -797,24 +744,20 @@ class UnifiedAudioConverter:
     @spaces.GPU(duration=120)
     def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
-        """Extract conversation using legacy local model with enhanced professional style"""
         try:
             self.initialize_legacy_local_mode()
-            # 강화된 전문 팟캐스트 스타일 시스템 메시지
             if language == "Korean":
                 system_message = (
                     "당신은 전문 팟캐스트 작가입니다. "
-                    "진행자(준수)는 통찰력 있는 질문을, 전문가(민호)는 2-4문장의 상세한 답변을 합니다. "
-                    "구체적인 데이터와 사례를 포함하여 전문적이면서도 이해하기 쉽게 설명하세요. "
-                    "12-15회 대화 교환으로 구성하세요."
                 )
             else:
                 system_message = (
-                    "You are a professional podcast scriptwriter. "
-                    "Create insightful dialogue where the host (Alex) asks focused questions "
-                    "and the expert (Jordan) gives detailed 2-4 sentence answers. "
-                    "Include specific data and examples. Create 12-15 exchanges."
                 )
             chat = [
@@ -839,7 +782,7 @@ class UnifiedAudioConverter:
             generate_kwargs = dict(
                 model_inputs,
                 streamer=streamer,
-                max_new_tokens=self.config.max_new_tokens,  # 증가된 토큰 수 사용
                 do_sample=True,
                 temperature=0.75,
                 eos_token_id=terminators,
@@ -862,14 +805,17 @@ class UnifiedAudioConverter:
         except Exception as e:
             print(f"Legacy local model also failed: {e}")
-            # Return enhanced default template
-            if language == "Korean":
-                return self._get_default_korean_conversation()
-            else:
-                return self._get_default_english_conversation()
     def _get_default_korean_conversation(self) -> Dict:
-        """더 전문적인 기본 한국어 대화 템플릿"""
         return {
             "conversation": [
                 {"speaker": "준수", "text": "안녕하세요, 여러분! 오늘은 정말 중요하고 흥미로운 주제를 다뤄보려고 합니다. 민호 박사님, 먼저 이 주제가 왜 지금 이렇게 주목받고 있는지 설명해주시겠어요?"},
@@ -885,12 +831,12 @@ class UnifiedAudioConverter:
                 {"speaker": "준수", "text": "실용적인 조언 감사합니다. 마지막으로 이 분야의 미래 전망은 어떻게 보시나요?"},
                 {"speaker": "민호", "text": "향후 10년은 인류 역사상 가장 급격한 기술 발전을 경험하는 시기가 될 것입니다. 가트너의 하이프 사이클 분석에 따르면, 현재 우리는 이 기술의 초기 단계에 불과합니다. 2030년까지는 지금으로서는 상상하기 어려운 수준의 혁신이 일어날 것으로 예상됩니다. 중요한 것은 이런 변화를 두려워하기보다는 기회로 삼아 더 나은 미래를 만들어가는 것이라고 생각합니다."},
                 {"speaker": "준수", "text": "정말 통찰력 있�� 말씀이네요. 오늘 너무나 유익한 시간이었습니다. 청취자 여러분도 오늘 논의된 내용을 바탕으로 미래를 준비하시길 바랍니다. 민호 박사님, 귀중한 시간 내주셔서 감사합니다!"},
-                {"speaker": "민호", "text": "감사합니다. 청취자 여러분들이 이 변화의 시대를 현명하게 헤쳐나가시길 바랍니다. 기술은 도구일 뿐이고, 그것을 어떻게 활용하는지는 우리에게 달려있다는 점을 기억해주세요. 오늘 말씀드린 내용에 대해 더 궁금하신 점이 있으시면 제가 운영하는 블로그나 최근 출간한 책에서 더 자세한 정보를 찾으실 수 있습니다."}
             ]
         }
     def _get_default_english_conversation(self) -> Dict:
-        """Enhanced professional English conversation template"""
         return {
             "conversation": [
                 {"speaker": "Alex", "text": "Welcome everyone to our podcast! Today we're diving into a topic that's reshaping our world. Dr. Jordan, could you start by explaining why this subject has become so critical right now?"},
@@ -898,24 +844,22 @@ class UnifiedAudioConverter:
                 {"speaker": "Alex", "text": "400% acceleration is staggering! What does this mean for everyday people who might not be tech-savvy?"},
                 {"speaker": "Jordan", "text": "The impact will be profound yet accessible. Think about how smartphones revolutionized communication - this will be similar but across every aspect of life. McKinsey's latest report projects that by 2026, these technologies will create $4.4 trillion in annual value globally. For individuals, this translates to personalized healthcare that can predict illnesses years in advance, educational systems that adapt to each student's learning style, and financial tools that democratize wealth-building strategies previously available only to the ultra-wealthy."},
                 {"speaker": "Alex", "text": "Those applications sound transformative. Can you give us a concrete example of how this is already being implemented?"},
-                {"speaker": "Jordan", "text": "Absolutely. Let me share a compelling case from Johns Hopkins Hospital. They've deployed an AI system that analyzes patient data in real-time, reducing diagnostic errors by 85% and cutting average diagnosis time from days to hours. In one documented case, the system identified a rare genetic disorder in a child that had been misdiagnosed for three years. The accuracy comes from analyzing patterns across millions of cases - something impossible for even the most experienced doctors to do manually. This technology is now being rolled out to rural hospitals, bringing world-class diagnostic capabilities to underserved communities."},
                 {"speaker": "Alex", "text": "That's truly life-changing technology. But I imagine there are significant challenges and risks we need to consider?"},
-                {"speaker": "Jordan", "text": "You're absolutely right to raise this. The challenges are as significant as the opportunities. The World Economic Forum identifies three critical risks: First, algorithmic bias could perpetuate or amplify existing inequalities if not carefully managed. Second, cybersecurity threats become exponentially more dangerous when AI systems control critical infrastructure. Third, there's the socioeconomic disruption - PwC estimates that 30% of jobs could be automated by 2030. However, history shows us that technological revolutions create new opportunities even as they displace old ones. The key is proactive adaptation and responsible development."},
                 {"speaker": "Alex", "text": "How should individuals and organizations prepare for these changes?"},
-                {"speaker": "Jordan", "text": "Preparation requires a multi-faceted approach. For individuals, I recommend focusing on skills that complement rather than compete with AI: critical thinking, emotional intelligence, and creative problem-solving. MIT's recent study shows that professionals who combine domain expertise with AI literacy see salary increases of 40% on average. Organizations need to invest in continuous learning programs - Amazon's $700 million worker retraining initiative is a good model. Most importantly, we need to cultivate an adaptive mindset. The half-life of specific technical skills is shrinking, but the ability to learn and unlearn quickly is becoming invaluable."},
                 {"speaker": "Alex", "text": "That's practical advice. What about the ethical considerations? How do we ensure this technology benefits humanity as a whole?"},
-                {"speaker": "Jordan", "text": "Ethics must be at the forefront of development. The EU's AI Act and similar regulations worldwide are establishing important guardrails. We need transparent AI systems where decisions can be explained and audited. Companies like IBM and Google have established AI ethics boards, but we need industry-wide standards. Additionally, we must address the digital divide - UNESCO reports that 37% of the global population still lacks internet access. Without inclusive development, these technologies could exacerbate global inequality rather than reduce it. The solution requires collaboration between technologists, ethicists, policymakers, and communities."},
                 {"speaker": "Alex", "text": "Looking ahead, what's your vision for how this technology will shape the next decade?"},
-                {"speaker": "Jordan", "text": "The next decade will be transformative beyond our current imagination. Ray Kurzweil's prediction of technological singularity by 2045 seems increasingly plausible. By 2035, I expect we'll see autonomous systems managing entire cities, personalized medicine extending human lifespan by 20-30 years, and educational AI that makes world-class education universally accessible. The convergence of AI with quantum computing, biotechnology, and nanotechnology will unlock possibilities we can barely conceive of today. However, the future isn't predetermined - it's shaped by the choices we make now about development priorities, ethical frameworks, and inclusive access."},
-                {"speaker": "Alex", "text": "That's both exciting and sobering. Any final thoughts for our listeners?"},
-                {"speaker": "Jordan", "text": "I'd encourage everyone to view this as humanity's next great adventure. Yes, there are risks and challenges, but we're also on the cusp of solving problems that have plagued us for millennia - disease, poverty, environmental degradation. The key is engaged participation rather than passive observation. Stay informed through reliable sources, experiment with new technologies, and most importantly, contribute to the conversation about what kind of future we want to build. The decisions we make in the next five years will reverberate for generations."},
                 {"speaker": "Alex", "text": "Dr. Jordan, this has been an incredibly enlightening discussion. Thank you for sharing your expertise and insights with us today."},
-                {"speaker": "Jordan", "text": "Thank you, Alex. It's been a pleasure discussing these crucial topics. For listeners wanting to dive deeper, I've compiled additional resources on my website, including links to the studies we discussed today. Remember, the future isn't something that happens to us - it's something we create together. I look forward to seeing how each of you contributes to shaping this exciting new era."}
             ]
         }
     def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
-        """Extract conversation using API with enhanced professional style"""
         if not self.llm_client:
             raise RuntimeError("API mode not initialized")
@@ -932,26 +876,18 @@ class UnifiedAudioConverter:
                 except Exception as e:
                     print(f"Search failed, continuing without context: {e}")
-            # 강화된 전문 팟캐스트 스타일 프롬프트
             if language == "Korean":
                 system_message = (
                     "당신은 한국의 최고 전문 팟캐스트 작가입니다. "
-                    "청취자들이 깊이 있는 인사이트를 얻을 수 있는 고품질 대담을 만드세요.\n"
-                    "준수(진행자)는 핵심을 짚는 1-2문장 질문을 하고, "
-                    "민호(전문가)는 반드시 2-4문장으로 상세히 답변합니다. "
-                    "구체적인 데이터, 연구 결과, 실제 사례를 포함하세요. "
-                    "전문 용어는 쉽게 설명하고, 반드시 서로 존댓말을 사용하세요. "
-                    "12-15회의 깊이 있는 대화 교환으로 구성하세요."
                 )
             else:
                 system_message = (
-                    "You are a top professional podcast scriptwriter. "
-                    "Create high-quality discussions that provide deep insights to listeners. "
-                    "Alex (host) asks focused 1-2 sentence questions, "
-                    "while Jordan (expert) MUST answer in 2-4 detailed sentences. "
-                    "Include specific data, research findings, and real cases. "
-                    "Explain technical terms clearly. "
-                    "Create 12-15 insightful conversation exchanges."
                 )
             chat_completion = self.llm_client.chat.completions.create(
@@ -994,17 +930,8 @@ class UnifiedAudioConverter:
         filenames = []
         try:
-            # 언어별 음성 설정 - 한국어는 모두 남성 음성
-            if language == "Korean":
-                voices = [
-                    "ko-KR-HyunsuNeural",  # 남성 음성 1 (차분하고 신뢰감 있는)
-                    "ko-KR-InJoonNeural"   # 남성 음성 2 (활기차고 친근한)
-                ]
-            else:
-                voices = [
-                    "en-US-AndrewMultilingualNeural",    # 남성 음성 1
-                    "en-US-BrianMultilingualNeural"      # 남성 음성 2
-                ]
             for i, turn in enumerate(conversation_json["conversation"]):
                 filename = output_dir / f"output_{i}.wav"
@@ -1055,13 +982,13 @@ class UnifiedAudioConverter:
             # Create different voice characteristics for different speakers
             if language == "Korean":
                 voice_configs = [
-                    {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 준수입니다. 여러분과 함께 흥미로운 이야기를 나눠보겠습니다.", "gender": "male"},
-                    {"prompt_text": "안녕하세요, 저는 오늘 이 주제에 대해 설명드릴 민호입니다. 쉽고 재미있게 설명드릴게요.", "gender": "male"}
                 ]
             else:
                 voice_configs = [
-                    {"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast. Let's explore this fascinating topic together.", "gender": "male"},
-                    {"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights on this subject with you all today.", "gender": "male"}
                 ]
             for i, turn in enumerate(conversation_json["conversation"]):
@@ -1069,12 +996,9 @@ class UnifiedAudioConverter:
                 if not text.strip():
                     continue
-                # Use different voice config for each speaker
                 voice_config = voice_configs[i % len(voice_configs)]
                 output_file = os.path.join(output_dir, f"spark_output_{i}.wav")
-                # Run Spark TTS CLI inference
                 cmd = [
                     "python", "-m", "cli.inference",
                     "--text", text,
@@ -1086,33 +1010,29 @@ class UnifiedAudioConverter:
                 ]
                 try:
-                    # Run the command
                     result = subprocess.run(
                         cmd,
                         capture_output=True,
                         text=True,
                         timeout=60,
-                        cwd="."  # Make sure we're in the right directory
                     )
                     if result.returncode == 0:
                         audio_files.append(output_file)
                     else:
                         print(f"Spark TTS error for turn {i}: {result.stderr}")
-                        # Create a short silence as fallback
-                        silence = np.zeros(int(22050 * 1.0))  # 1 second of silence
                         sf.write(output_file, silence, 22050)
                         audio_files.append(output_file)
                 except subprocess.TimeoutExpired:
                     print(f"Spark TTS timeout for turn {i}")
-                    # Create silence as fallback
                     silence = np.zeros(int(22050 * 1.0))
                     sf.write(output_file, silence, 22050)
                     audio_files.append(output_file)
                 except Exception as e:
                     print(f"Error running Spark TTS for turn {i}: {e}")
-                    # Create silence as fallback
                     silence = np.zeros(int(22050 * 1.0))
                     sf.write(output_file, silence, 22050)
                     audio_files.append(output_file)
@@ -1124,7 +1044,6 @@ class UnifiedAudioConverter:
             else:
                 raise RuntimeError("No audio files generated")
-            # Generate conversation text
             conversation_text = "\n".join(
                 f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
                 for i, turn in enumerate(conversation_json["conversation"])
@@ -1150,7 +1069,6 @@ class UnifiedAudioConverter:
             speaker = speakers[i % 2]
             speaker_id = self.melo_models["EN"].hps.data.spk2id[speaker]
-            # Generate audio
             self.melo_models["EN"].tts_to_file(
                 text, speaker_id, bio, speed=1.0,
                 pbar=progress.tqdm if progress else None,
@@ -1161,11 +1079,9 @@ class UnifiedAudioConverter:
             audio_segment = AudioSegment.from_file(bio, format="wav")
             combined_audio += audio_segment
-        # Save final audio
         final_audio_path = "melo_podcast.mp3"
         combined_audio.export(final_audio_path, format="mp3")
-        # Generate conversation text
         conversation_text = "\n".join(
             f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
             for i, turn in enumerate(conversation_json["conversation"])
@@ -1224,10 +1140,9 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
         else:  # Keyword
             if not article_input or not isinstance(article_input, str):
                 return "Please provide a keyword or topic.", None
-            # 키워드로 검색하여 콘텐츠 생성
             text = search_and_compile_content(article_input, language)
-            text = f"Keyword-based content:\n{text}"  # 마커 추가
         # Limit text to max words
         words = text.split()
         if len(words) > converter.config.max_words:
@@ -1235,19 +1150,17 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
         # Extract conversation based on mode
         if mode == "Local":
-            # 로컬 모드가 기본 (새로운 Local LLM 사용)
             try:
                 conversation_json = converter.extract_conversation_local(text, language)
             except Exception as e:
                 print(f"Local mode failed: {e}, trying API fallback")
-                # API 폴백
                 api_key = os.environ.get("TOGETHER_API_KEY")
                 if api_key:
                     converter.initialize_api_mode(api_key)
                     conversation_json = converter.extract_conversation_api(text, language)
                 else:
                     raise RuntimeError("Local mode failed and no API key available for fallback")
-        else:  # API mode (now secondary)
             api_key = os.environ.get("TOGETHER_API_KEY")
             if not api_key:
                 print("API key not found, falling back to local mode")
@@ -1278,15 +1191,14 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
         return "Please provide conversation text.", None
     try:
-        # Parse the conversation text back to JSON format
         conversation_json = converter.parse_conversation_text(conversation_text)
         if not conversation_json["conversation"]:
             return "No valid conversation found in the text.", None
-        # 한국어인 경우 Edge-TTS만 사용 (다른 TTS는 한국어 지원이 제한적)
-        if language == "Korean" and tts_engine != "Edge-TTS":
-            tts_engine = "Edge-TTS"  # 자동으로 Edge-TTS로 변경
         # Generate audio based on TTS engine
         if tts_engine == "Edge-TTS":
@@ -1299,8 +1211,8 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
         else:  # MeloTTS
             if not MELO_AVAILABLE:
                 return "MeloTTS not available. Please install required dependencies.", None
-            if language == "Korean":
-                return "MeloTTS does not support Korean. Please use Edge-TTS for Korean.", None
             converter.initialize_melo_tts()
             output_file, _ = converter.text_to_speech_melo(conversation_json)
@@ -1320,14 +1232,34 @@ def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS",
     return asyncio.run(regenerate_audio(conversation_text, tts_engine, language))
-def update_tts_engine_for_korean(language):
-    """한국어 선택 시 TTS 엔진 옵션 업데이트"""
-    if language == "Korean":
         return gr.Radio(
             choices=["Edge-TTS"],
             value="Edge-TTS",
             label="TTS Engine",
-            info="한국어는 Edge-TTS만 지원됩니다",
             interactive=False
         )
     else:
@@ -1363,7 +1295,7 @@ if LLAMA_CPP_AVAILABLE:
         print(f"Failed to download model at startup: {e}")
-# Gradio Interface - 개선된 레이아웃
 with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
     .container {max-width: 1200px; margin: auto; padding: 20px;}
     .header-text {text-align: center; margin-bottom: 30px;}
@@ -1375,8 +1307,8 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
         # 헤더
         with gr.Row(elem_classes="header-text"):
             gr.Markdown("""
-            # 🎙️ AI Podcast Generator - Professional Edition
-            ### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation with in-depth analysis!
             """)
         with gr.Row(elem_classes="discord-badge"):
@@ -1388,8 +1320,6 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
             </p>
             """)
         # 상태 표시 섹션
         with gr.Row():
             with gr.Column(scale=1):
@@ -1402,11 +1332,11 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
                 """)
             with gr.Column(scale=1):
                 gr.Markdown("""
-                #### 📻 Podcast Features
-                - **Length**: 12-15 professional exchanges
-                - **Style**: Expert discussions with data & insights
-                - **Languages**: English & Korean (한국어)
-                - **Input**: URL, PDF, or Keywords
                 """)
         # 메인 입력 섹션
@@ -1441,7 +1371,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
                     # 키워드 입력
                     keyword_input = gr.Textbox(
                         label="🔍 Topic/Keyword",
-                        placeholder="Enter a topic (e.g., 'AI trends 2024', '인공지능 최신 동향')",
                         value="",
                         visible=False,
                         info="System will search and compile latest information",
@@ -1452,10 +1382,16 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
                 with gr.Column(scale=1):
                     # 언어 선택
                     language_selector = gr.Radio(
-                        choices=["English", "Korean"],
                         value="English",
-                        label="🌐 Language / 언어",
-                        info="Output language"
                     )
                     # 처리 모드
@@ -1493,7 +1429,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
                         lines=25,
                         max_lines=50,
                         interactive=True,
-                        placeholder="Professional podcast conversation will appear here...\n전문 팟캐스트 대화가 여기에 표시됩니다...",
                         info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
                     )
@@ -1524,20 +1460,24 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
                     gr.Markdown("""
                     #### 💡 Quick Tips:
                     - **URL**: Paste any article link
-                    - **PDF**: Upload documents directly
                     - **Keyword**: Enter topics for AI research
                     - Edit conversation before audio generation
-                    - Korean (한국어) fully supported
                     """)
         # 예제 섹션
-        with gr.Accordion("📚 Examples", open=False):
             gr.Examples(
                 examples=[
-                    ["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
                     ["quantum computing breakthroughs", "Keyword", "Local", "Edge-TTS", "English"],
-                    ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
                     ["인공지능 윤리와 규제", "Keyword", "Local", "Edge-TTS", "Korean"],
                 ],
                 inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
                 outputs=[conversation_output, status_output],
@@ -1554,7 +1494,7 @@ with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
     # 언어 변경 시 TTS 엔진 옵션 업데이트
     language_selector.change(
-        fn=update_tts_engine_for_korean,
         inputs=[language_selector],
         outputs=[tts_selector]
     )
@@ -1591,4 +1531,4 @@ if __name__ == "__main__":
         share=False,
         server_name="0.0.0.0",
         server_port=7860
-    )

 BRAVE_KEY = os.getenv("BSEARCH_API")
 BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
+# Edge TTS 전용 언어 목록 (English 제외)
+EDGE_TTS_ONLY_LANGUAGES = [
+    "Korean", "Japanese", "French", "German", "Spanish", "Italian",
+    "Portuguese", "Dutch", "Thai", "Vietnamese", "Arabic", "Hebrew",
+    "Indonesian", "Hindi", "Russian", "Chinese", "Norwegian", "Swedish",
+    "Finnish", "Danish", "Polish", "Turkish", "Greek", "Czech"
+]
+# 언어별 Edge TTS 음성 설정
+EDGE_TTS_VOICES = {
+    "English": [
+        "en-US-AndrewMultilingualNeural",    # 남성 음성 1
+        "en-US-BrianMultilingualNeural"      # 남성 음성 2
+    ],
+    "Korean": [
+        "ko-KR-HyunsuNeural",  # 남성 음성 1 (차분하고 신뢰감 있는)
+        "ko-KR-InJoonNeural"   # 남성 음성 2 (활기차고 친근한)
+    ],
+    "Japanese": [
+        "ja-JP-KeitaNeural",    # 남성 음성 1
+        "ja-JP-NanamiNeural"    # 여성 음성 (백업용)
+    ],
+    "French": [
+        "fr-FR-HenriNeural",    # 남성 음성 1
+        "fr-FR-DeniseNeural"    # 여성 음성 (백업용)
+    ],
+    "German": [
+        "de-DE-ConradNeural",   # 남성 음성 1
+        "de-DE-KillianNeural"   # 남성 음성 2
+    ],
+    "Spanish": [
+        "es-ES-AlvaroNeural",   # 남성 음성 1
+        "es-ES-ElviraNeural"    # 여성 음성 (백업용)
+    ],
+    "Italian": [
+        "it-IT-DiegoNeural",    # 남성 음성 1
+        "it-IT-IsabellaNeural"  # 여성 음성 (백업용)
+    ],
+    "Portuguese": [
+        "pt-BR-AntonioNeural",  # 남성 음성 1
+        "pt-BR-FranciscaNeural" # 여성 음성 (백업용)
+    ],
+    "Dutch": [
+        "nl-NL-MaartenNeural",  # 남성 음성 1
+        "nl-NL-ColetteNeural"   # 여성 음성 (백업용)
+    ],
+    "Thai": [
+        "th-TH-NiwatNeural",    # 남성 음성 1
+        "th-TH-PremwadeeNeural" # 여성 음성 (백업용)
+    ],
+    "Vietnamese": [
+        "vi-VN-NamMinhNeural",  # 남성 음성 1
+        "vi-VN-HoaiMyNeural"    # 여성 음성 (백업용)
+    ],
+    "Arabic": [
+        "ar-SA-HamedNeural",    # 남성 음성 1
+        "ar-SA-ZariyahNeural"   # 여성 음성 (백업용)
+    ],
+    "Hebrew": [
+        "he-IL-AvriNeural",     # 남성 음성 1
+        "he-IL-HilaNeural"      # 여성 음성 (백업용)
+    ],
+    "Indonesian": [
+        "id-ID-ArdiNeural",     # 남성 음성 1
+        "id-ID-GadisNeural"     # 여성 음성 (백업용)
+    ],
+    "Hindi": [
+        "hi-IN-MadhurNeural",   # 남성 음성 1
+        "hi-IN-SwaraNeural"     # 여성 음성 (백업용)
+    ],
+    "Russian": [
+        "ru-RU-DmitryNeural",   # 남성 음성 1
+        "ru-RU-SvetlanaNeural"  # 여성 음성 (백업용)
+    ],
+    "Chinese": [
+        "zh-CN-YunxiNeural",    # 남성 음성 1
+        "zh-CN-XiaoxiaoNeural"  # 여성 음성 (백업용)
+    ],
+    "Norwegian": [
+        "nb-NO-FinnNeural",     # 남성 음성 1
+        "nb-NO-PernilleNeural"  # 여성 음성 (백업용)
+    ],
+    "Swedish": [
+        "sv-SE-MattiasNeural",  # 남성 음성 1
+        "sv-SE-SofieNeural"     # 여성 음성 (백업용)
+    ],
+    "Finnish": [
+        "fi-FI-HarriNeural",    # 남성 음성 1
+        "fi-FI-NooraNeural"     # 여성 음성 (백업용)
+    ],
+    "Danish": [
+        "da-DK-JeppeNeural",    # 남성 음성 1
+        "da-DK-ChristelNeural"  # 여성 음성 (백업용)
+    ],
+    "Polish": [
+        "pl-PL-MarekNeural",    # 남성 음성 1
+        "pl-PL-ZofiaNeural"     # 여성 음성 (백업용)
+    ],
+    "Turkish": [
+        "tr-TR-AhmetNeural",    # 남성 음성 1
+        "tr-TR-EmelNeural"      # 여성 음성 (백업용)
+    ],
+    "Greek": [
+        "el-GR-NestorasNeural", # 남성 음성 1
+        "el-GR-AthinaNeural"    # 여성 음성 (백업용)
+    ],
+    "Czech": [
+        "cs-CZ-AntoninNeural",  # 남성 음성 1
+        "cs-CZ-VlastaNeural"    # 여성 음성 (백업용)
+    ]
+}
 @dataclass
 class ConversationConfig:
     max_words: int = 8000  # 4000에서 6000으로 증가 (1.5배)
     return intro + compiled
 class UnifiedAudioConverter:
     def __init__(self, config: ConversationConfig):
         self.config = config
         else:
             return MessagesFormatterType.LLAMA_3
     def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
         """Build prompt for conversation generation with enhanced professional podcast style"""
         # 텍스트 길이 제한
         if len(text) > max_text_length:
             text = text[:max_text_length] + "..."
+        # 언어별 화자 이름 설정
         if language == "Korean":
+            speaker1, speaker2 = "준수", "민호"
+        elif language == "Japanese":
+            speaker1, speaker2 = "Hiroshi", "Takeshi"
+        elif language == "French":
+            speaker1, speaker2 = "Pierre", "Marc"
+        elif language == "German":
+            speaker1, speaker2 = "Klaus", "Stefan"
+        elif language == "Spanish":
+            speaker1, speaker2 = "Carlos", "Miguel"
+        elif language == "Italian":
+            speaker1, speaker2 = "Marco", "Giuseppe"
+        elif language == "Portuguese":
+            speaker1, speaker2 = "João", "Pedro"
+        elif language == "Dutch":
+            speaker1, speaker2 = "Jan", "Pieter"
+        elif language == "Thai":
+            speaker1, speaker2 = "Somchai", "Prasert"
+        elif language == "Vietnamese":
+            speaker1, speaker2 = "Minh", "Duc"
+        elif language == "Arabic":
+            speaker1, speaker2 = "Ahmed", "Mohammed"
+        elif language == "Hebrew":
+            speaker1, speaker2 = "David", "Michael"
+        elif language == "Indonesian":
+            speaker1, speaker2 = "Budi", "Andi"
+        elif language == "Hindi":
+            speaker1, speaker2 = "Raj", "Amit"
+        elif language == "Russian":
+            speaker1, speaker2 = "Alexei", "Dmitri"
+        elif language == "Chinese":
+            speaker1, speaker2 = "Wei", "Jun"
+        else:  # English and others
+            speaker1, speaker2 = "Alex", "Jordan"
+        # 대화 템플릿 생성
+        template = "{\n    \"conversation\": [\n"
+        for i in range(12):  # 12번의 교환
+            template += f"        {{\"speaker\": \"{speaker1 if i % 2 == 0 else speaker2}\", \"text\": \"\"}}"
+            if i < 11:
+                template += ","
+            template += "\n"
+        template += "    ]\n}"
+        context_part = ""
+        if search_context:
+            if language == "Korean":
                 context_part = f"# 최신 관련 정보:\n{search_context}\n"
+            else:
+                context_part = f"# Latest Information:\n{search_context}\n"
+        if language == "Korean":
             base_prompt = (
                 f"# 원본 콘텐츠:\n{text}\n\n"
                 f"{context_part}"
                 f"## 핵심 지침:\n"
                 f"1. **대화 스타일**: 전문적이면서도 이해하기 쉬운 팟캐스트 대담\n"
                 f"2. **화자 역할**:\n"
+                f"   - {speaker1}: 진행자/호스트 (핵심을 짚는 질문, 청취자 관점에서 궁금한 점 질문)\n"
+                f"   - {speaker2}: 전문가 (깊이 있는 설명, 구체적 사례와 데이터 제시)\n"
                 f"3. **중요한 답변 규칙**:\n"
+                f"   - {speaker1}: 1-2문장의 명확한 질문\n"
+                f"   - {speaker2}: **반드시 2-4문장으로 충실히 답변** (개념 설명 + 구체적 설명 + 예시나 함의)\n"
+                f"4. **전문성 요소**: 통계나 연구 결과 인용, 실제 사례와 케이스 스터디, 전문 용어를 쉽게 풀어서 설명\n"
+                f"5. **필수 규칙**: 서로 존댓말 사용, 12회 대화 교환\n\n"
                 f"JSON 형식으로만 반환:\n{template}"
             )
         else:
             base_prompt = (
                 f"# Content:\n{text}\n\n"
                 f"{context_part}"
+                f"Create a professional and insightful podcast conversation in {language}.\n\n"
                 f"## Key Guidelines:\n"
                 f"1. **Style**: Professional yet accessible podcast discussion\n"
                 f"2. **Roles**:\n"
+                f"   - {speaker1}: Host (insightful questions, audience perspective)\n"
+                f"   - {speaker2}: Expert (in-depth explanations, concrete examples and data)\n"
                 f"3. **Critical Response Rules**:\n"
+                f"   - {speaker1}: 1-2 sentence clear questions\n"
+                f"   - {speaker2}: **Must answer in 2-4 sentences** (concept + detailed explanation + example/implication)\n"
+                f"4. **Professional Elements**: Cite statistics and research, real cases and case studies, explain technical terms clearly\n"
+                f"5. **Length**: 12 exchanges total\n\n"
                 f"Return JSON only:\n{template}"
             )
+        return base_prompt
     def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
         """Build messages for local LLM with enhanced professional podcast style"""
         if language == "Korean":
             system_message = (
                 "당신은 한국 최고의 전문 팟캐스트 작가입니다. "
+                "청취자들이 전문 지식을 쉽게 이해할 수 있는 고품질 대담을 만들어냅니다. "
+                "반드시 서로 존댓말을 사용하며, 전문적이면서도 친근한 톤을 유지합니다."
             )
         else:
             system_message = (
+                f"You are an expert podcast scriptwriter creating high-quality "
+                f"professional discussions in {language}. Make complex topics accessible "
+                f"while maintaining expertise and a professional yet approachable tone."
             )
         return [
             chat_template = self._get_messages_formatter_type(self.config.local_model_name)
             provider = LlamaCppPythonProvider(self.local_llm)
+            # 언어별 시스템 메시지
             if language == "Korean":
                 system_message = (
                     "당신은 한국의 유명 팟캐스트 전문 작가입니다. "
+                    "청취자들이 깊이 있는 전문 지식을 얻을 수 있는 고품질 대담을 만듭니다. "
+                    "반드시 서로 존댓말을 사용하며, 12회의 대화 교환으로 구성하세요. "
+                    "JSON 형식으로만 응답하세요."
                 )
             else:
                 system_message = (
+                    f"You are a professional podcast scriptwriter creating high-quality, "
+                    f"insightful discussions in {language}. Create exactly 12 conversation exchanges "
+                    f"with professional expertise. Respond only in JSON format."
                 )
             agent = LlamaCppAgent(
             )
             settings = provider.get_provider_default_settings()
+            settings.temperature = 0.75
             settings.top_k = 40
             settings.top_p = 0.95
+            settings.max_tokens = self.config.max_tokens
             settings.repeat_penalty = 1.1
             settings.stream = False
             if json_match:
                 conversation_data = json.loads(json_match.group())
                 return conversation_data
             else:
                 raise ValueError("No valid JSON found in local LLM response")
     @spaces.GPU(duration=120)
     def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
+        """Extract conversation using legacy local model"""
         try:
             self.initialize_legacy_local_mode()
+            # 언어별 시스템 메시지
             if language == "Korean":
                 system_message = (
                     "당신은 전문 팟캐스트 작가입니다. "
+                    "12회의 대화 교환으로 구성된 전문적인 대담을 만드세요."
                 )
             else:
                 system_message = (
+                    f"You are a professional podcast scriptwriter. "
+                    f"Create a professional dialogue in {language} with 12 exchanges."
                 )
             chat = [
             generate_kwargs = dict(
                 model_inputs,
                 streamer=streamer,
+                max_new_tokens=self.config.max_new_tokens,
                 do_sample=True,
                 temperature=0.75,
                 eos_token_id=terminators,
         except Exception as e:
             print(f"Legacy local model also failed: {e}")
+            return self._get_default_conversation(language)
+    def _get_default_conversation(self, language: str) -> Dict:
+        """언어별 기본 대화 템플릿"""
+        if language == "Korean":
+            return self._get_default_korean_conversation()
+        else:
+            return self._get_default_english_conversation()
     def _get_default_korean_conversation(self) -> Dict:
+        """기본 한국어 대화 템플릿"""
         return {
             "conversation": [
                 {"speaker": "준수", "text": "안녕하세요, 여러분! 오늘은 정말 중요하고 흥미로운 주제를 다뤄보려고 합니다. 민호 박사님, 먼저 이 주제가 왜 지금 이렇게 주목받고 있는지 설명해주시겠어요?"},
                 {"speaker": "준수", "text": "실용적인 조언 감사합니다. 마지막으로 이 분야의 미래 전망은 어떻게 보시나요?"},
                 {"speaker": "민호", "text": "향후 10년은 인류 역사상 가장 급격한 기술 발전을 경험하는 시기가 될 것입니다. 가트너의 하이프 사이클 분석에 따르면, 현재 우리는 이 기술의 초기 단계에 불과합니다. 2030년까지는 지금으로서는 상상하기 어려운 수준의 혁신이 일어날 것으로 예상됩니다. 중요한 것은 이런 변화를 두려워하기보다는 기회로 삼아 더 나은 미래를 만들어가는 것이라고 생각합니다."},
                 {"speaker": "준수", "text": "정말 통찰력 있�� 말씀이네요. 오늘 너무나 유익한 시간이었습니다. 청취자 여러분도 오늘 논의된 내용을 바탕으로 미래를 준비하시길 바랍니다. 민호 박사님, 귀중한 시간 내주셔서 감사합니다!"},
+                {"speaker": "민호", "text": "감사합니다. 청취자 여러분들이 이 변화의 시대를 현명하게 헤쳐나가시길 바랍니다. 기술은 도구일 뿐이고, 그것을 어떻게 활용하는지는 우리에게 달려있다는 점을 기억해주세요."}
             ]
         }
     def _get_default_english_conversation(self) -> Dict:
+        """기본 영어 대화 템플릿"""
         return {
             "conversation": [
                 {"speaker": "Alex", "text": "Welcome everyone to our podcast! Today we're diving into a topic that's reshaping our world. Dr. Jordan, could you start by explaining why this subject has become so critical right now?"},
                 {"speaker": "Alex", "text": "400% acceleration is staggering! What does this mean for everyday people who might not be tech-savvy?"},
                 {"speaker": "Jordan", "text": "The impact will be profound yet accessible. Think about how smartphones revolutionized communication - this will be similar but across every aspect of life. McKinsey's latest report projects that by 2026, these technologies will create $4.4 trillion in annual value globally. For individuals, this translates to personalized healthcare that can predict illnesses years in advance, educational systems that adapt to each student's learning style, and financial tools that democratize wealth-building strategies previously available only to the ultra-wealthy."},
                 {"speaker": "Alex", "text": "Those applications sound transformative. Can you give us a concrete example of how this is already being implemented?"},
+                {"speaker": "Jordan", "text": "Absolutely. Let me share a compelling case from Johns Hopkins Hospital. They've deployed an AI system that analyzes patient data in real-time, reducing diagnostic errors by 85% and cutting average diagnosis time from days to hours. In one documented case, the system identified a rare genetic disorder in a child that had been misdiagnosed for three years. The accuracy comes from analyzing patterns across millions of cases - something impossible for even the most experienced doctors to do manually."},
                 {"speaker": "Alex", "text": "That's truly life-changing technology. But I imagine there are significant challenges and risks we need to consider?"},
+                {"speaker": "Jordan", "text": "You're absolutely right to raise this. The challenges are as significant as the opportunities. The World Economic Forum identifies three critical risks: algorithmic bias could perpetuate existing inequalities, cybersecurity threats become exponentially more dangerous, and there's the socioeconomic disruption with PwC estimating that 30% of jobs could be automated by 2030. However, history shows us that technological revolutions create new opportunities even as they displace old ones. The key is proactive adaptation and responsible development."},
                 {"speaker": "Alex", "text": "How should individuals and organizations prepare for these changes?"},
+                {"speaker": "Jordan", "text": "Preparation requires a multi-faceted approach. For individuals, I recommend focusing on skills that complement rather than compete with AI: critical thinking, emotional intelligence, and creative problem-solving. MIT's recent study shows that professionals who combine domain expertise with AI literacy see salary increases of 40% on average. Organizations need to invest in continuous learning programs - Amazon's $700 million worker retraining initiative is a good model. Most importantly, we need to cultivate an adaptive mindset."},
                 {"speaker": "Alex", "text": "That's practical advice. What about the ethical considerations? How do we ensure this technology benefits humanity as a whole?"},
+                {"speaker": "Jordan", "text": "Ethics must be at the forefront of development. The EU's AI Act and similar regulations worldwide are establishing important guardrails. We need transparent AI systems where decisions can be explained and audited. Companies like IBM and Google have established AI ethics boards, but we need industry-wide standards. Additionally, we must address the digital divide - UNESCO reports that 37% of the global population still lacks internet access. Without inclusive development, these technologies could exacerbate global inequality."},
                 {"speaker": "Alex", "text": "Looking ahead, what's your vision for how this technology will shape the next decade?"},
+                {"speaker": "Jordan", "text": "The next decade will be transformative beyond our current imagination. By 2035, I expect we'll see autonomous systems managing entire cities, personalized medicine extending human lifespan by 20-30 years, and educational AI that makes world-class education universally accessible. The convergence of AI with quantum computing, biotechnology, and nanotechnology will unlock possibilities we can barely conceive of today. However, the future isn't predetermined - it's shaped by the choices we make now about development priorities and ethical frameworks."},
                 {"speaker": "Alex", "text": "Dr. Jordan, this has been an incredibly enlightening discussion. Thank you for sharing your expertise and insights with us today."},
+                {"speaker": "Jordan", "text": "Thank you, Alex. For listeners wanting to dive deeper, I've compiled additional resources on my website. Remember, the future isn't something that happens to us - it's something we create together. I look forward to seeing how each of you contributes to shaping this exciting new era."}
             ]
         }
     def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
+        """Extract conversation using API"""
         if not self.llm_client:
             raise RuntimeError("API mode not initialized")
                 except Exception as e:
                     print(f"Search failed, continuing without context: {e}")
+            # 언어별 시스템 메시지
             if language == "Korean":
                 system_message = (
                     "당신은 한국의 최고 전문 팟캐스트 작가입니다. "
+                    "12회의 깊이 있는 대화 교환으로 구성된 고품질 대담을 만드세요. "
+                    "반드시 서로 존댓말을 사용하세요."
                 )
             else:
                 system_message = (
+                    f"You are a top professional podcast scriptwriter. "
+                    f"Create high-quality discussions in {language} with exactly 12 exchanges. "
+                    f"Include specific data, research findings, and real cases."
                 )
             chat_completion = self.llm_client.chat.completions.create(
         filenames = []
         try:
+            # 언어별 음성 설정
+            voices = EDGE_TTS_VOICES.get(language, EDGE_TTS_VOICES["English"])
             for i, turn in enumerate(conversation_json["conversation"]):
                 filename = output_dir / f"output_{i}.wav"
             # Create different voice characteristics for different speakers
             if language == "Korean":
                 voice_configs = [
+                    {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 준수입니다.", "gender": "male"},
+                    {"prompt_text": "안녕하세요, 저는 오늘 이 주제에 대해 설명드릴 민호입니다.", "gender": "male"}
                 ]
             else:
                 voice_configs = [
+                    {"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast.", "gender": "male"},
+                    {"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights with you.", "gender": "male"}
                 ]
             for i, turn in enumerate(conversation_json["conversation"]):
                 if not text.strip():
                     continue
                 voice_config = voice_configs[i % len(voice_configs)]
                 output_file = os.path.join(output_dir, f"spark_output_{i}.wav")
                 cmd = [
                     "python", "-m", "cli.inference",
                     "--text", text,
                 ]
                 try:
                     result = subprocess.run(
                         cmd,
                         capture_output=True,
                         text=True,
                         timeout=60,
+                        cwd="."
                     )
                     if result.returncode == 0:
                         audio_files.append(output_file)
                     else:
                         print(f"Spark TTS error for turn {i}: {result.stderr}")
+                        silence = np.zeros(int(22050 * 1.0))
                         sf.write(output_file, silence, 22050)
                         audio_files.append(output_file)
                 except subprocess.TimeoutExpired:
                     print(f"Spark TTS timeout for turn {i}")
                     silence = np.zeros(int(22050 * 1.0))
                     sf.write(output_file, silence, 22050)
                     audio_files.append(output_file)
                 except Exception as e:
                     print(f"Error running Spark TTS for turn {i}: {e}")
                     silence = np.zeros(int(22050 * 1.0))
                     sf.write(output_file, silence, 22050)
                     audio_files.append(output_file)
             else:
                 raise RuntimeError("No audio files generated")
             conversation_text = "\n".join(
                 f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
                 for i, turn in enumerate(conversation_json["conversation"])
             speaker = speakers[i % 2]
             speaker_id = self.melo_models["EN"].hps.data.spk2id[speaker]
             self.melo_models["EN"].tts_to_file(
                 text, speaker_id, bio, speed=1.0,
                 pbar=progress.tqdm if progress else None,
             audio_segment = AudioSegment.from_file(bio, format="wav")
             combined_audio += audio_segment
         final_audio_path = "melo_podcast.mp3"
         combined_audio.export(final_audio_path, format="mp3")
         conversation_text = "\n".join(
             f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
             for i, turn in enumerate(conversation_json["conversation"])
         else:  # Keyword
             if not article_input or not isinstance(article_input, str):
                 return "Please provide a keyword or topic.", None
             text = search_and_compile_content(article_input, language)
+            text = f"Keyword-based content:\n{text}"
         # Limit text to max words
         words = text.split()
         if len(words) > converter.config.max_words:
         # Extract conversation based on mode
         if mode == "Local":
             try:
                 conversation_json = converter.extract_conversation_local(text, language)
             except Exception as e:
                 print(f"Local mode failed: {e}, trying API fallback")
                 api_key = os.environ.get("TOGETHER_API_KEY")
                 if api_key:
                     converter.initialize_api_mode(api_key)
                     conversation_json = converter.extract_conversation_api(text, language)
                 else:
                     raise RuntimeError("Local mode failed and no API key available for fallback")
+        else:  # API mode
             api_key = os.environ.get("TOGETHER_API_KEY")
             if not api_key:
                 print("API key not found, falling back to local mode")
         return "Please provide conversation text.", None
     try:
         conversation_json = converter.parse_conversation_text(conversation_text)
         if not conversation_json["conversation"]:
             return "No valid conversation found in the text.", None
+        # Edge TTS 전용 언어는 자동으로 Edge-TTS 사용
+        if language in EDGE_TTS_ONLY_LANGUAGES and tts_engine != "Edge-TTS":
+            tts_engine = "Edge-TTS"
         # Generate audio based on TTS engine
         if tts_engine == "Edge-TTS":
         else:  # MeloTTS
             if not MELO_AVAILABLE:
                 return "MeloTTS not available. Please install required dependencies.", None
+            if language in EDGE_TTS_ONLY_LANGUAGES:
+                return f"MeloTTS does not support {language}. Please use Edge-TTS for this language.", None
             converter.initialize_melo_tts()
             output_file, _ = converter.text_to_speech_melo(conversation_json)
     return asyncio.run(regenerate_audio(conversation_text, tts_engine, language))
+def update_tts_engine_for_language(language):
+    """언어별 TTS 엔진 옵션 업데이트"""
+    if language in EDGE_TTS_ONLY_LANGUAGES:
+        language_info = {
+            "Korean": "한국어는 Edge-TTS만 지원됩니다",
+            "Japanese": "日本語はEdge-TTSのみサポートされています",
+            "French": "Le français n'est pris en charge que par Edge-TTS",
+            "German": "Deutsch wird nur von Edge-TTS unterstützt",
+            "Spanish": "El español solo es compatible con Edge-TTS",
+            "Italian": "L'italiano è supportato solo da Edge-TTS",
+            "Portuguese": "O português é suportado apenas pelo Edge-TTS",
+            "Dutch": "Nederlands wordt alleen ondersteund door Edge-TTS",
+            "Thai": "ภาษาไทยรองรับเฉพาะ Edge-TTS เท่านั้น",
+            "Vietnamese": "Tiếng Việt chỉ được hỗ trợ bởi Edge-TTS",
+            "Arabic": "العربية مدعومة فقط من Edge-TTS",
+            "Hebrew": "עברית נתמכת רק על ידי Edge-TTS",
+            "Indonesian": "Bahasa Indonesia hanya didukung oleh Edge-TTS",
+            "Hindi": "हिंदी केवल Edge-TTS द्वारा समर्थित है",
+            "Russian": "Русский поддерживается только Edge-TTS",
+            "Chinese": "中文仅支持Edge-TTS"
+        }
+        info_text = language_info.get(language, f"{language} is only supported by Edge-TTS")
         return gr.Radio(
             choices=["Edge-TTS"],
             value="Edge-TTS",
             label="TTS Engine",
+            info=info_text,
             interactive=False
         )
     else:
         print(f"Failed to download model at startup: {e}")
+# Gradio Interface - 개선된 다국어 레이아웃
 with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
     .container {max-width: 1200px; margin: auto; padding: 20px;}
     .header-text {text-align: center; margin-bottom: 30px;}
         # 헤더
         with gr.Row(elem_classes="header-text"):
             gr.Markdown("""
+            # 🎙️ AI Podcast Generator - Professional Multi-Language Edition
+            ### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation in 24+ languages!
             """)
         with gr.Row(elem_classes="discord-badge"):
             </p>
             """)
         # 상태 표시 섹션
         with gr.Row():
             with gr.Column(scale=1):
                 """)
             with gr.Column(scale=1):
                 gr.Markdown("""
+                #### 🌍 Multi-Language Support
+                - **24+ Languages**: Korean, Japanese, French, German, Spanish, Italian, etc.
+                - **Native Voices**: Optimized for each language
+                - **Professional Style**: Expert discussions with data & insights
+                - **Auto-TTS Selection**: Best engine per language
                 """)
         # 메인 입력 섹션
                     # 키워드 입력
                     keyword_input = gr.Textbox(
                         label="🔍 Topic/Keyword",
+                        placeholder="Enter a topic (e.g., 'AI trends 2024', '인공지능', 'IA tendances', 'KI Trends')",
                         value="",
                         visible=False,
                         info="System will search and compile latest information",
                 with gr.Column(scale=1):
                     # 언어 선택
                     language_selector = gr.Radio(
+                        choices=[
+                            "English", "Korean", "Japanese", "French", "German",
+                            "Spanish", "Italian", "Portuguese", "Dutch", "Thai",
+                            "Vietnamese", "Arabic", "Hebrew", "Indonesian", "Hindi",
+                            "Russian", "Chinese", "Norwegian", "Swedish", "Finnish",
+                            "Danish", "Polish", "Turkish", "Greek", "Czech"
+                        ],
                         value="English",
+                        label="🌐 Language / 언어 / 语言",
+                        info="Select podcast language"
                     )
                     # 처리 모드
                         lines=25,
                         max_lines=50,
                         interactive=True,
+                        placeholder="Professional podcast conversation will appear here...\n전문 팟캐스트 대화가 여기에 표시됩니다...\nLa conversation professionnelle du podcast apparaîtra ici...",
                         info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
                     )
                     gr.Markdown("""
                     #### 💡 Quick Tips:
                     - **URL**: Paste any article link
+                    - **PDF**: Upload documents directly
                     - **Keyword**: Enter topics for AI research
+                    - **24+ Languages** fully supported
                     - Edit conversation before audio generation
+                    - Auto TTS engine selection per language
                     """)
         # 예제 섹션
+        with gr.Accordion("📚 Multi-Language Examples", open=False):
             gr.Examples(
                 examples=[
+                    ["https://huggingface.co/blog/openfreeai/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
                     ["quantum computing breakthroughs", "Keyword", "Local", "Edge-TTS", "English"],
                     ["인공지능 윤리와 규제", "Keyword", "Local", "Edge-TTS", "Korean"],
+                    ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Japanese"],
+                    ["intelligence artificielle tendances", "Keyword", "Local", "Edge-TTS", "French"],
+                    ["künstliche intelligenz entwicklung", "Keyword", "Local", "Edge-TTS", "German"],
+                    ["inteligencia artificial avances", "Keyword", "Local", "Edge-TTS", "Spanish"],
                 ],
                 inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
                 outputs=[conversation_output, status_output],
     # 언어 변경 시 TTS 엔진 옵션 업데이트
     language_selector.change(
+        fn=update_tts_engine_for_language,
         inputs=[language_selector],
         outputs=[tts_selector]
     )
         share=False,
         server_name="0.0.0.0",
         server_port=7860
+    )