openfree commited on
Commit
b16cf9f
ยท
verified ยท
1 Parent(s): 9cdb5d5

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +165 -66
app-backup.py CHANGED
@@ -137,31 +137,55 @@ class UnifiedAudioConverter:
137
  except httpx.HTTPError as e:
138
  raise RuntimeError(f"Failed to fetch URL: {e}")
139
 
140
- def _build_prompt(self, text: str) -> str:
141
  """Build prompt for conversation generation"""
142
- template = """
143
- {
144
- "conversation": [
145
- {"speaker": "", "text": ""},
146
- {"speaker": "", "text": ""}
147
- ]
148
- }
149
- """
150
- return (
151
- f"{text}\n\nConvert the provided text into a short, informative and crisp "
152
- f"podcast conversation between two experts. The tone should be "
153
- f"professional and engaging. Please adhere to the following "
154
- f"format and return ONLY the JSON:\n{template}"
155
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- def extract_conversation_api(self, text: str) -> Dict:
158
  """Extract conversation using API"""
159
  if not self.llm_client:
160
  raise RuntimeError("API mode not initialized")
161
 
162
  try:
 
 
 
 
 
 
163
  chat_completion = self.llm_client.chat.completions.create(
164
- messages=[{"role": "user", "content": self._build_prompt(text)}],
 
 
 
165
  model=self.config.model_name,
166
  )
167
 
@@ -175,15 +199,21 @@ class UnifiedAudioConverter:
175
  except Exception as e:
176
  raise RuntimeError(f"Failed to extract conversation: {e}")
177
 
178
- def extract_conversation_local(self, text: str, progress=None) -> Dict:
179
  """Extract conversation using local model"""
180
  if not self.local_model or not self.tokenizer:
181
  raise RuntimeError("Local mode not initialized")
182
 
183
- chat = [{
184
- "role": "user",
185
- "content": self._build_prompt(text)
186
- }]
 
 
 
 
 
 
187
 
188
  terminators = [
189
  self.tokenizer.eos_token_id,
@@ -221,13 +251,21 @@ class UnifiedAudioConverter:
221
  if json_match:
222
  return json.loads(json_match.group())
223
  else:
224
- # Return a default template if no valid JSON found
225
- return {
226
- "conversation": [
227
- {"speaker": "Host", "text": "Welcome to our podcast."},
228
- {"speaker": "Guest", "text": "Thank you for having me."}
229
- ]
230
- }
 
 
 
 
 
 
 
 
231
 
232
  def parse_conversation_text(self, conversation_text: str) -> Dict:
233
  """Parse conversation text back to JSON format"""
@@ -244,15 +282,27 @@ class UnifiedAudioConverter:
244
 
245
  return conversation_data
246
 
247
- async def text_to_speech_edge(self, conversation_json: Dict, voice_1: str, voice_2: str) -> Tuple[str, str]:
248
  """Convert text to speech using Edge TTS"""
249
  output_dir = Path(self._create_output_directory())
250
  filenames = []
251
 
252
  try:
 
 
 
 
 
 
 
 
 
 
 
 
253
  for i, turn in enumerate(conversation_json["conversation"]):
254
  filename = output_dir / f"output_{i}.wav"
255
- voice = voice_1 if i % 2 == 0 else voice_2
256
 
257
  tmp_path = await self._generate_audio_edge(turn["text"], voice)
258
  os.rename(tmp_path, filename)
@@ -286,7 +336,7 @@ class UnifiedAudioConverter:
286
 
287
  return tmp_path
288
 
289
- def text_to_speech_spark(self, conversation_json: Dict, progress=None) -> Tuple[str, str]:
290
  """Convert text to speech using Spark TTS CLI"""
291
  if not SPARK_AVAILABLE or not self.spark_model_dir:
292
  raise RuntimeError("Spark TTS not available")
@@ -296,10 +346,16 @@ class UnifiedAudioConverter:
296
  audio_files = []
297
 
298
  # Create different voice characteristics for different speakers
299
- voice_configs = [
300
- {"prompt_text": "Hello, welcome to our podcast. I'm your host today.", "gender": "female"},
301
- {"prompt_text": "Thank you for having me. I'm excited to be here.", "gender": "male"}
302
- ]
 
 
 
 
 
 
303
 
304
  for i, turn in enumerate(conversation_json["conversation"]):
305
  text = turn["text"]
@@ -445,7 +501,7 @@ class UnifiedAudioConverter:
445
  converter = UnifiedAudioConverter(ConversationConfig())
446
 
447
 
448
- async def synthesize(article_url: str, mode: str = "API", tts_engine: str = "Edge-TTS"):
449
  """Main synthesis function"""
450
  if not article_url:
451
  return "Please provide a valid URL.", None
@@ -465,10 +521,10 @@ async def synthesize(article_url: str, mode: str = "API", tts_engine: str = "Edg
465
  if not api_key:
466
  return "API key not found. Please set TOGETHER_API_KEY environment variable.", None
467
  converter.initialize_api_mode(api_key)
468
- conversation_json = converter.extract_conversation_api(text)
469
  else: # Local mode
470
  converter.initialize_local_mode()
471
- conversation_json = converter.extract_conversation_local(text)
472
 
473
  # Generate conversation text
474
  conversation_text = "\n".join(
@@ -482,7 +538,7 @@ async def synthesize(article_url: str, mode: str = "API", tts_engine: str = "Edg
482
  return f"Error: {str(e)}", None
483
 
484
 
485
- async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS"):
486
  """Regenerate audio from edited conversation text"""
487
  if not conversation_text.strip():
488
  return "Please provide conversation text.", None
@@ -494,21 +550,23 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS")
494
  if not conversation_json["conversation"]:
495
  return "No valid conversation found in the text.", None
496
 
 
 
 
 
497
  # Generate audio based on TTS engine
498
  if tts_engine == "Edge-TTS":
499
- output_file, _ = await converter.text_to_speech_edge(
500
- conversation_json,
501
- "en-US-AvaMultilingualNeural",
502
- "en-US-AndrewMultilingualNeural"
503
- )
504
  elif tts_engine == "Spark-TTS":
505
  if not SPARK_AVAILABLE:
506
  return "Spark TTS not available. Please install required dependencies and clone the Spark-TTS repository.", None
507
  converter.initialize_spark_tts()
508
- output_file, _ = converter.text_to_speech_spark(conversation_json)
509
  else: # MeloTTS
510
  if not MELO_AVAILABLE:
511
  return "MeloTTS not available. Please install required dependencies.", None
 
 
512
  converter.initialize_melo_tts()
513
  output_file, _ = converter.text_to_speech_melo(conversation_json)
514
 
@@ -518,14 +576,34 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS")
518
  return f"Error generating audio: {str(e)}", None
519
 
520
 
521
- def synthesize_sync(article_url: str, mode: str = "API", tts_engine: str = "Edge-TTS"):
522
  """Synchronous wrapper for async synthesis"""
523
- return asyncio.run(synthesize(article_url, mode, tts_engine))
524
 
525
 
526
- def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS"):
527
  """Synchronous wrapper for async audio regeneration"""
528
- return asyncio.run(regenerate_audio(conversation_text, tts_engine))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
 
531
  # Gradio Interface
@@ -541,6 +619,14 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
541
  value=""
542
  )
543
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
544
  mode_selector = gr.Radio(
545
  choices=["API", "Local"],
546
  value="API",
@@ -548,7 +634,7 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
548
  info="API: Faster, requires API key | Local: Slower, runs on device"
549
  )
550
 
551
- # TTS ์—”์ง„ ์„ ํƒ - ๊ธฐ๋ณธ 2๊ฐœ์™€ ์ถ”๊ฐ€ ์˜ต์…˜์œผ๋กœ ๊ตฌ๋ถ„
552
  with gr.Group():
553
  gr.Markdown("### TTS Engine Selection")
554
  tts_selector = gr.Radio(
@@ -565,36 +651,39 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
565
 
566
  **Additional Option:**
567
  - โšก **MeloTTS**: Local processing, GPU recommended
 
 
 
568
  """)
569
 
570
- convert_btn = gr.Button("๐ŸŽฏ Generate Conversation", variant="primary", size="lg")
571
 
572
  with gr.Row():
573
  with gr.Column():
574
  conversation_output = gr.Textbox(
575
- label="Generated Conversation (Editable)",
576
  lines=15,
577
  max_lines=30,
578
  interactive=True,
579
- placeholder="Generated conversation will appear here. You can edit it before generating audio.",
580
- info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
581
  )
582
 
583
  # ์˜ค๋””์˜ค ์ƒ์„ฑ ๋ฒ„ํŠผ ์ถ”๊ฐ€
584
  with gr.Row():
585
- generate_audio_btn = gr.Button("๐ŸŽ™๏ธ Generate Audio from Text", variant="secondary", size="lg")
586
- gr.Markdown("*Edit the conversation above, then click to generate audio*")
587
 
588
  with gr.Column():
589
  audio_output = gr.Audio(
590
- label="Podcast Audio",
591
  type="filepath",
592
  interactive=False
593
  )
594
 
595
  # ์ƒํƒœ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
596
  status_output = gr.Textbox(
597
- label="Status",
598
  interactive=False,
599
  visible=True
600
  )
@@ -602,14 +691,17 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
602
  # TTS ์—”์ง„๋ณ„ ์„ค๋ช… ๋ฐ ์„ค์น˜ ์•ˆ๋‚ด ์ถ”๊ฐ€
603
  with gr.Row():
604
  gr.Markdown("""
605
- ### TTS Engine Details:
606
 
607
  - **Edge-TTS**: Microsoft's cloud TTS service with high-quality natural voices. Requires internet connection.
 
608
  - **Spark-TTS**: SparkAudio's local AI model (0.5B parameters) with zero-shot voice cloning capability.
609
  - **Setup required**: Clone [Spark-TTS repository](https://github.com/SparkAudio/Spark-TTS) in current directory
610
  - Features: Bilingual support (Chinese/English), controllable speech generation
611
  - License: CC BY-NC-SA (Non-commercial use only)
 
612
  - **MeloTTS**: Local TTS with multiple voice options. GPU recommended for better performance.
 
613
 
614
  ### Spark-TTS Setup Instructions:
615
  ```bash
@@ -621,26 +713,33 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
621
 
622
  gr.Examples(
623
  examples=[
624
- ["https://huggingface.co/blog/openfree/cycle-navigator", "API", "Edge-TTS"],
625
- ["https://www.bbc.com/news/technology-67988517", "API", "Spark-TTS"],
626
- ["https://arxiv.org/abs/2301.00810", "API", "Edge-TTS"],
627
  ],
628
- inputs=[url_input, mode_selector, tts_selector],
629
  outputs=[conversation_output, status_output],
630
  fn=synthesize_sync,
631
  cache_examples=False,
632
  )
633
 
 
 
 
 
 
 
 
634
  # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
635
  convert_btn.click(
636
  fn=synthesize_sync,
637
- inputs=[url_input, mode_selector, tts_selector],
638
  outputs=[conversation_output, status_output]
639
  )
640
 
641
  generate_audio_btn.click(
642
  fn=regenerate_audio_sync,
643
- inputs=[conversation_output, tts_selector],
644
  outputs=[status_output, audio_output]
645
  )
646
 
 
137
  except httpx.HTTPError as e:
138
  raise RuntimeError(f"Failed to fetch URL: {e}")
139
 
140
+ def _build_prompt(self, text: str, language: str = "English") -> str:
141
  """Build prompt for conversation generation"""
142
+ if language == "Korean":
143
+ template = """
144
+ {
145
+ "conversation": [
146
+ {"speaker": "", "text": ""},
147
+ {"speaker": "", "text": ""}
148
+ ]
149
+ }
150
+ """
151
+ return (
152
+ f"{text}\n\n์ œ๊ณต๋œ ํ…์ŠคํŠธ๋ฅผ ๋‘ ๋ช…์˜ ์ „๋ฌธ๊ฐ€ ๊ฐ„์˜ ์งง๊ณ  ์œ ์ตํ•˜๋ฉฐ ๋ช…ํ™•ํ•œ "
153
+ f"ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋กœ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”. ํ†ค์€ ์ „๋ฌธ์ ์ด๊ณ  ๋งค๋ ฅ์ ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. "
154
+ f"๋‹ค์Œ ํ˜•์‹์„ ์ค€์ˆ˜ํ•˜๊ณ  JSON๋งŒ ๋ฐ˜ํ™˜ํ•ด์ฃผ์„ธ์š”:\n{template}"
155
+ )
156
+ else:
157
+ template = """
158
+ {
159
+ "conversation": [
160
+ {"speaker": "", "text": ""},
161
+ {"speaker": "", "text": ""}
162
+ ]
163
+ }
164
+ """
165
+ return (
166
+ f"{text}\n\nConvert the provided text into a short, informative and crisp "
167
+ f"podcast conversation between two experts. The tone should be "
168
+ f"professional and engaging. Please adhere to the following "
169
+ f"format and return ONLY the JSON:\n{template}"
170
+ )
171
 
172
+ def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
173
  """Extract conversation using API"""
174
  if not self.llm_client:
175
  raise RuntimeError("API mode not initialized")
176
 
177
  try:
178
+ # ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
179
+ if language == "Korean":
180
+ system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
181
+ else:
182
+ system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
183
+
184
  chat_completion = self.llm_client.chat.completions.create(
185
+ messages=[
186
+ {"role": "system", "content": system_message},
187
+ {"role": "user", "content": self._build_prompt(text, language)}
188
+ ],
189
  model=self.config.model_name,
190
  )
191
 
 
199
  except Exception as e:
200
  raise RuntimeError(f"Failed to extract conversation: {e}")
201
 
202
+ def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
203
  """Extract conversation using local model"""
204
  if not self.local_model or not self.tokenizer:
205
  raise RuntimeError("Local mode not initialized")
206
 
207
+ # ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
208
+ if language == "Korean":
209
+ system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
210
+ else:
211
+ system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
212
+
213
+ chat = [
214
+ {"role": "system", "content": system_message},
215
+ {"role": "user", "content": self._build_prompt(text, language)}
216
+ ]
217
 
218
  terminators = [
219
  self.tokenizer.eos_token_id,
 
251
  if json_match:
252
  return json.loads(json_match.group())
253
  else:
254
+ # Return a default template based on language
255
+ if language == "Korean":
256
+ return {
257
+ "conversation": [
258
+ {"speaker": "์ง„ํ–‰์ž", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ํŒŸ์บ์ŠคํŠธ์— ์˜ค์‹  ๊ฒƒ์„ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค."},
259
+ {"speaker": "๊ฒŒ์ŠคํŠธ", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์ดˆ๋Œ€ํ•ด ์ฃผ์…”์„œ ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค."}
260
+ ]
261
+ }
262
+ else:
263
+ return {
264
+ "conversation": [
265
+ {"speaker": "Host", "text": "Welcome to our podcast."},
266
+ {"speaker": "Guest", "text": "Thank you for having me."}
267
+ ]
268
+ }
269
 
270
  def parse_conversation_text(self, conversation_text: str) -> Dict:
271
  """Parse conversation text back to JSON format"""
 
282
 
283
  return conversation_data
284
 
285
+ async def text_to_speech_edge(self, conversation_json: Dict, language: str = "English") -> Tuple[str, str]:
286
  """Convert text to speech using Edge TTS"""
287
  output_dir = Path(self._create_output_directory())
288
  filenames = []
289
 
290
  try:
291
+ # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ •
292
+ if language == "Korean":
293
+ voices = [
294
+ "ko-KR-SunHiNeural", # ์—ฌ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
295
+ "ko-KR-InJoonNeural" # ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
296
+ ]
297
+ else:
298
+ voices = [
299
+ "en-US-AvaMultilingualNeural", # ์—ฌ์„ฑ ์Œ์„ฑ
300
+ "en-US-AndrewMultilingualNeural" # ๋‚จ์„ฑ ์Œ์„ฑ
301
+ ]
302
+
303
  for i, turn in enumerate(conversation_json["conversation"]):
304
  filename = output_dir / f"output_{i}.wav"
305
+ voice = voices[i % len(voices)]
306
 
307
  tmp_path = await self._generate_audio_edge(turn["text"], voice)
308
  os.rename(tmp_path, filename)
 
336
 
337
  return tmp_path
338
 
339
+ def text_to_speech_spark(self, conversation_json: Dict, language: str = "English", progress=None) -> Tuple[str, str]:
340
  """Convert text to speech using Spark TTS CLI"""
341
  if not SPARK_AVAILABLE or not self.spark_model_dir:
342
  raise RuntimeError("Spark TTS not available")
 
346
  audio_files = []
347
 
348
  # Create different voice characteristics for different speakers
349
+ if language == "Korean":
350
+ voice_configs = [
351
+ {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ง„ํ–‰์ž์ž…๋‹ˆ๋‹ค.", "gender": "female"},
352
+ {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ๊ฒŒ์ŠคํŠธ๋กœ ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜์–ด ๊ธฐ์ฉ๋‹ˆ๋‹ค.", "gender": "male"}
353
+ ]
354
+ else:
355
+ voice_configs = [
356
+ {"prompt_text": "Hello, welcome to our podcast. I'm your host today.", "gender": "female"},
357
+ {"prompt_text": "Thank you for having me. I'm excited to be here.", "gender": "male"}
358
+ ]
359
 
360
  for i, turn in enumerate(conversation_json["conversation"]):
361
  text = turn["text"]
 
501
  converter = UnifiedAudioConverter(ConversationConfig())
502
 
503
 
504
+ async def synthesize(article_url: str, mode: str = "API", tts_engine: str = "Edge-TTS", language: str = "English"):
505
  """Main synthesis function"""
506
  if not article_url:
507
  return "Please provide a valid URL.", None
 
521
  if not api_key:
522
  return "API key not found. Please set TOGETHER_API_KEY environment variable.", None
523
  converter.initialize_api_mode(api_key)
524
+ conversation_json = converter.extract_conversation_api(text, language)
525
  else: # Local mode
526
  converter.initialize_local_mode()
527
+ conversation_json = converter.extract_conversation_local(text, language)
528
 
529
  # Generate conversation text
530
  conversation_text = "\n".join(
 
538
  return f"Error: {str(e)}", None
539
 
540
 
541
+ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
542
  """Regenerate audio from edited conversation text"""
543
  if not conversation_text.strip():
544
  return "Please provide conversation text.", None
 
550
  if not conversation_json["conversation"]:
551
  return "No valid conversation found in the text.", None
552
 
553
+ # ํ•œ๊ตญ์–ด์ธ ๊ฒฝ์šฐ Edge-TTS๋งŒ ์‚ฌ์šฉ (๋‹ค๋ฅธ TTS๋Š” ํ•œ๊ตญ์–ด ์ง€์›์ด ์ œํ•œ์ )
554
+ if language == "Korean" and tts_engine != "Edge-TTS":
555
+ return "ํ•œ๊ตญ์–ด๋Š” Edge-TTS๋งŒ ์ง€์›๋ฉ๋‹ˆ๋‹ค. TTS ์—”์ง„์ด ์ž๋™์œผ๋กœ Edge-TTS๋กœ ๋ณ€๊ฒฝ๋ฉ๋‹ˆ๋‹ค.", None
556
+
557
  # Generate audio based on TTS engine
558
  if tts_engine == "Edge-TTS":
559
+ output_file, _ = await converter.text_to_speech_edge(conversation_json, language)
 
 
 
 
560
  elif tts_engine == "Spark-TTS":
561
  if not SPARK_AVAILABLE:
562
  return "Spark TTS not available. Please install required dependencies and clone the Spark-TTS repository.", None
563
  converter.initialize_spark_tts()
564
+ output_file, _ = converter.text_to_speech_spark(conversation_json, language)
565
  else: # MeloTTS
566
  if not MELO_AVAILABLE:
567
  return "MeloTTS not available. Please install required dependencies.", None
568
+ if language == "Korean":
569
+ return "MeloTTS does not support Korean. Please use Edge-TTS for Korean.", None
570
  converter.initialize_melo_tts()
571
  output_file, _ = converter.text_to_speech_melo(conversation_json)
572
 
 
576
  return f"Error generating audio: {str(e)}", None
577
 
578
 
579
+ def synthesize_sync(article_url: str, mode: str = "API", tts_engine: str = "Edge-TTS", language: str = "English"):
580
  """Synchronous wrapper for async synthesis"""
581
+ return asyncio.run(synthesize(article_url, mode, tts_engine, language))
582
 
583
 
584
+ def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
585
  """Synchronous wrapper for async audio regeneration"""
586
+ return asyncio.run(regenerate_audio(conversation_text, tts_engine, language))
587
+
588
+
589
+ def update_tts_engine_for_korean(language):
590
+ """ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ TTS ์—”์ง„ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ"""
591
+ if language == "Korean":
592
+ return gr.Radio(
593
+ choices=["Edge-TTS"],
594
+ value="Edge-TTS",
595
+ label="TTS Engine",
596
+ info="ํ•œ๊ตญ์–ด๋Š” Edge-TTS๋งŒ ์ง€์›๋ฉ๋‹ˆ๋‹ค",
597
+ interactive=False
598
+ )
599
+ else:
600
+ return gr.Radio(
601
+ choices=["Edge-TTS", "Spark-TTS", "MeloTTS"],
602
+ value="Edge-TTS",
603
+ label="TTS Engine",
604
+ info="Edge-TTS: Cloud-based, natural voices | Spark-TTS: Local AI model | MeloTTS: Local, requires GPU",
605
+ interactive=True
606
+ )
607
 
608
 
609
  # Gradio Interface
 
619
  value=""
620
  )
621
  with gr.Column(scale=1):
622
+ # ์–ธ์–ด ์„ ํƒ ์ถ”๊ฐ€
623
+ language_selector = gr.Radio(
624
+ choices=["English", "Korean"],
625
+ value="English",
626
+ label="Language / ์–ธ์–ด",
627
+ info="Select output language / ์ถœ๋ ฅ ์–ธ์–ด๋ฅผ ์„ ํƒํ•˜์„ธ์š”"
628
+ )
629
+
630
  mode_selector = gr.Radio(
631
  choices=["API", "Local"],
632
  value="API",
 
634
  info="API: Faster, requires API key | Local: Slower, runs on device"
635
  )
636
 
637
+ # TTS ์—”์ง„ ์„ ํƒ
638
  with gr.Group():
639
  gr.Markdown("### TTS Engine Selection")
640
  tts_selector = gr.Radio(
 
651
 
652
  **Additional Option:**
653
  - โšก **MeloTTS**: Local processing, GPU recommended
654
+
655
+ **ํ•œ๊ตญ์–ด ์ง€์›:**
656
+ - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ๏ฟฝ๏ฟฝ๏ฟฝ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
657
  """)
658
 
659
+ convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
660
 
661
  with gr.Row():
662
  with gr.Column():
663
  conversation_output = gr.Textbox(
664
+ label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
665
  lines=15,
666
  max_lines=30,
667
  interactive=True,
668
+ placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
669
+ info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"
670
  )
671
 
672
  # ์˜ค๋””์˜ค ์ƒ์„ฑ ๋ฒ„ํŠผ ์ถ”๊ฐ€
673
  with gr.Row():
674
+ generate_audio_btn = gr.Button("๐ŸŽ™๏ธ Generate Audio from Text / ํ…์ŠคํŠธ์—์„œ ์˜ค๋””์˜ค ์ƒ์„ฑ", variant="secondary", size="lg")
675
+ gr.Markdown("*Edit the conversation above, then click to generate audio / ์œ„์˜ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•œ ํ›„ ํด๋ฆญํ•˜์—ฌ ์˜ค๋””์˜ค๋ฅผ ์ƒ์„ฑํ•˜์„ธ์š”*")
676
 
677
  with gr.Column():
678
  audio_output = gr.Audio(
679
+ label="Podcast Audio / ํŒŸ์บ์ŠคํŠธ ์˜ค๋””์˜ค",
680
  type="filepath",
681
  interactive=False
682
  )
683
 
684
  # ์ƒํƒœ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
685
  status_output = gr.Textbox(
686
+ label="Status / ์ƒํƒœ",
687
  interactive=False,
688
  visible=True
689
  )
 
691
  # TTS ์—”์ง„๋ณ„ ์„ค๋ช… ๋ฐ ์„ค์น˜ ์•ˆ๋‚ด ์ถ”๊ฐ€
692
  with gr.Row():
693
  gr.Markdown("""
694
+ ### TTS Engine Details / TTS ์—”์ง„ ์ƒ์„ธ์ •๋ณด:
695
 
696
  - **Edge-TTS**: Microsoft's cloud TTS service with high-quality natural voices. Requires internet connection.
697
+ - ๐Ÿ‡ฐ๐Ÿ‡ท **ํ•œ๊ตญ์–ด ์ง€์›**: ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ์Œ์„ฑ (์—ฌ์„ฑ: SunHi, ๋‚จ์„ฑ: InJoon)
698
  - **Spark-TTS**: SparkAudio's local AI model (0.5B parameters) with zero-shot voice cloning capability.
699
  - **Setup required**: Clone [Spark-TTS repository](https://github.com/SparkAudio/Spark-TTS) in current directory
700
  - Features: Bilingual support (Chinese/English), controllable speech generation
701
  - License: CC BY-NC-SA (Non-commercial use only)
702
+ - โš ๏ธ **ํ•œ๊ตญ์–ด ๋ฏธ์ง€์›**
703
  - **MeloTTS**: Local TTS with multiple voice options. GPU recommended for better performance.
704
+ - โš ๏ธ **ํ•œ๊ตญ์–ด ๋ฏธ์ง€์›**
705
 
706
  ### Spark-TTS Setup Instructions:
707
  ```bash
 
713
 
714
  gr.Examples(
715
  examples=[
716
+ ["https://huggingface.co/blog/openfree/cycle-navigator", "API", "Edge-TTS", "English"],
717
+ ["https://www.bbc.com/news/technology-67988517", "API", "Spark-TTS", "English"],
718
+ ["https://arxiv.org/abs/2301.00810", "API", "Edge-TTS", "Korean"],
719
  ],
720
+ inputs=[url_input, mode_selector, tts_selector, language_selector],
721
  outputs=[conversation_output, status_output],
722
  fn=synthesize_sync,
723
  cache_examples=False,
724
  )
725
 
726
+ # ์–ธ์–ด ๋ณ€๊ฒฝ ์‹œ TTS ์—”์ง„ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ
727
+ language_selector.change(
728
+ fn=update_tts_engine_for_korean,
729
+ inputs=[language_selector],
730
+ outputs=[tts_selector]
731
+ )
732
+
733
  # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
734
  convert_btn.click(
735
  fn=synthesize_sync,
736
+ inputs=[url_input, mode_selector, tts_selector, language_selector],
737
  outputs=[conversation_output, status_output]
738
  )
739
 
740
  generate_audio_btn.click(
741
  fn=regenerate_audio_sync,
742
+ inputs=[conversation_output, tts_selector, language_selector],
743
  outputs=[status_output, audio_output]
744
  )
745