Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,78 @@
|
|
1 |
-
import random
|
2 |
-
import numpy as np
|
3 |
-
import torch
|
4 |
-
from chatterbox.src.chatterbox.tts import ChatterboxTTS
|
5 |
-
import gradio as gr
|
6 |
import spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
print(f"๐ Running on device: {DEVICE}")
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
def set_seed(seed: int):
|
13 |
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
14 |
torch.manual_seed(seed)
|
@@ -18,6 +82,7 @@ def set_seed(seed: int):
|
|
18 |
random.seed(seed)
|
19 |
np.random.seed(seed)
|
20 |
|
|
|
21 |
def split_text_into_chunks(text: str, max_chars: int = 250) -> list[str]:
|
22 |
"""
|
23 |
ํ
์คํธ๋ฅผ ๋ฌธ์ฅ ๋จ์๋ก ๋๋๋, ๊ฐ ์ฒญํฌ๊ฐ max_chars๋ฅผ ๋์ง ์๋๋ก ํฉ๋๋ค.
|
@@ -65,215 +130,953 @@ def split_text_into_chunks(text: str, max_chars: int = 250) -> list[str]:
|
|
65 |
|
66 |
return chunks
|
67 |
|
68 |
-
|
69 |
-
def
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
temperature=temperature_input,
|
95 |
-
cfg_weight=cfgw_input,
|
96 |
-
)
|
97 |
-
return (model.sr, wav.squeeze(0).numpy())
|
98 |
|
99 |
-
# ๊ธด
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
try:
|
110 |
-
wav = model.generate(
|
111 |
-
chunk,
|
112 |
-
audio_prompt_path=audio_prompt_path_input,
|
113 |
-
exaggeration=exaggeration_input,
|
114 |
-
temperature=temperature_input,
|
115 |
-
cfg_weight=cfgw_input,
|
116 |
-
)
|
117 |
-
wav_chunk = wav.squeeze(0).numpy()
|
118 |
-
audio_segments.append(wav_chunk)
|
119 |
-
except Exception as e:
|
120 |
-
print(f"์ฒญํฌ {i + 1} ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
121 |
-
continue
|
122 |
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
#
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
final_audio.append(segment)
|
133 |
-
if i < len(audio_segments) - 1:
|
134 |
-
final_audio.append(silence)
|
135 |
|
136 |
-
|
|
|
137 |
|
138 |
-
|
139 |
-
return (model.sr, concatenated_audio)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
-
|
149 |
-
"""
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac"
|
166 |
-
)
|
167 |
-
|
168 |
-
with gr.Row():
|
169 |
-
exaggeration = gr.Slider(
|
170 |
-
0.25, 2, step=.05,
|
171 |
-
label="Exaggeration (Neutral = 0.5)",
|
172 |
-
value=.5
|
173 |
)
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
)
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
-
#
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
207 |
)
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
inputs=[text, chunk_size],
|
223 |
-
outputs=[char_count]
|
224 |
-
)
|
225 |
-
|
226 |
-
chunk_size.change(
|
227 |
-
fn=update_char_count,
|
228 |
-
inputs=[text, chunk_size],
|
229 |
-
outputs=[char_count]
|
230 |
-
)
|
231 |
|
232 |
-
|
233 |
-
def
|
|
|
234 |
try:
|
235 |
-
|
236 |
|
237 |
-
#
|
238 |
-
|
239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
)
|
|
|
241 |
|
242 |
-
|
|
|
|
|
243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
except Exception as e:
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
exaggeration,
|
253 |
-
|
254 |
seed_num,
|
255 |
cfg_weight,
|
256 |
chunk_size
|
257 |
-
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
)
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
)
|
275 |
|
276 |
-
# ์ฑ ์คํ ์ ๋ชจ๋ธ ๋ก๋ ์ ๊ฑฐ (GPU ํจ์ ๋ด์์๋ง ๋ก๋)
|
277 |
-
print("์ฑ์ด ์์๋์์ต๋๋ค. ๋ชจ๋ธ์ ์ฒซ ์์ฑ ์ ๋ก๋๋ฉ๋๋ค.")
|
278 |
|
279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import spaces
|
2 |
+
import gradio as gr
|
3 |
+
import os
|
4 |
+
import asyncio
|
5 |
+
import torch
|
6 |
+
import io
|
7 |
+
import json
|
8 |
import re
|
9 |
+
import httpx
|
10 |
+
import tempfile
|
11 |
+
import wave
|
12 |
+
import base64
|
13 |
+
import numpy as np
|
14 |
+
import soundfile as sf
|
15 |
+
import subprocess
|
16 |
+
import shutil
|
17 |
+
import requests
|
18 |
+
import logging
|
19 |
+
import random
|
20 |
+
from datetime import datetime, timedelta
|
21 |
+
from typing import List, Tuple, Dict, Optional
|
22 |
+
from pathlib import Path
|
23 |
+
from threading import Thread
|
24 |
+
from dotenv import load_dotenv
|
25 |
+
|
26 |
+
# PDF processing imports
|
27 |
+
from langchain_community.document_loaders import PyPDFLoader
|
28 |
+
|
29 |
+
# OpenAI imports
|
30 |
+
from openai import OpenAI
|
31 |
+
|
32 |
+
# Transformers imports (for legacy local mode)
|
33 |
+
from transformers import (
|
34 |
+
AutoModelForCausalLM,
|
35 |
+
AutoTokenizer,
|
36 |
+
TextIteratorStreamer,
|
37 |
+
BitsAndBytesConfig,
|
38 |
+
)
|
39 |
+
|
40 |
+
# Llama CPP imports (for new local mode)
|
41 |
+
try:
|
42 |
+
from llama_cpp import Llama
|
43 |
+
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
44 |
+
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
45 |
+
from llama_cpp_agent.chat_history import BasicChatHistory
|
46 |
+
from llama_cpp_agent.chat_history.messages import Roles
|
47 |
+
from huggingface_hub import hf_hub_download
|
48 |
+
LLAMA_CPP_AVAILABLE = True
|
49 |
+
except ImportError:
|
50 |
+
LLAMA_CPP_AVAILABLE = False
|
51 |
+
|
52 |
+
# Chatterbox TTS imports
|
53 |
+
try:
|
54 |
+
from chatterbox.src.chatterbox.tts import ChatterboxTTS
|
55 |
+
CHATTERBOX_AVAILABLE = True
|
56 |
+
except ImportError:
|
57 |
+
CHATTERBOX_AVAILABLE = False
|
58 |
+
|
59 |
+
# Import config and prompts
|
60 |
+
from config_prompts import (
|
61 |
+
ConversationConfig,
|
62 |
+
PromptBuilder,
|
63 |
+
DefaultConversations,
|
64 |
+
)
|
65 |
+
|
66 |
+
load_dotenv()
|
67 |
|
68 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
69 |
print(f"๐ Running on device: {DEVICE}")
|
70 |
|
71 |
+
# Brave Search API ์ค์
|
72 |
+
BRAVE_KEY = os.getenv("BSEARCH_API")
|
73 |
+
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
74 |
+
|
75 |
+
|
76 |
def set_seed(seed: int):
|
77 |
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
78 |
torch.manual_seed(seed)
|
|
|
82 |
random.seed(seed)
|
83 |
np.random.seed(seed)
|
84 |
|
85 |
+
|
86 |
def split_text_into_chunks(text: str, max_chars: int = 250) -> list[str]:
|
87 |
"""
|
88 |
ํ
์คํธ๋ฅผ ๋ฌธ์ฅ ๋จ์๋ก ๋๋๋, ๊ฐ ์ฒญํฌ๊ฐ max_chars๋ฅผ ๋์ง ์๋๋ก ํฉ๋๋ค.
|
|
|
130 |
|
131 |
return chunks
|
132 |
|
133 |
+
|
134 |
+
def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
135 |
+
"""Brave Search API๋ฅผ ์ฌ์ฉํ์ฌ ์ต์ ์ ๋ณด ๊ฒ์"""
|
136 |
+
if not BRAVE_KEY:
|
137 |
+
return []
|
138 |
+
params = {"q": query, "count": str(count)}
|
139 |
+
if freshness_days:
|
140 |
+
dt_from = (datetime.utcnow() - timedelta(days=freshness_days)).strftime("%Y-%m-%d")
|
141 |
+
params["freshness"] = dt_from
|
142 |
+
try:
|
143 |
+
r = requests.get(
|
144 |
+
BRAVE_ENDPOINT,
|
145 |
+
headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_KEY},
|
146 |
+
params=params,
|
147 |
+
timeout=15
|
148 |
+
)
|
149 |
+
raw = r.json().get("web", {}).get("results") or []
|
150 |
+
return [{
|
151 |
+
"title": r.get("title", ""),
|
152 |
+
"url": r.get("url", r.get("link", "")),
|
153 |
+
"snippet": r.get("description", r.get("text", "")),
|
154 |
+
"host": re.sub(r"https?://(www\.)?", "", r.get("url", "")).split("/")[0]
|
155 |
+
} for r in raw[:count]]
|
156 |
+
except Exception as e:
|
157 |
+
logging.error(f"Brave search error: {e}")
|
158 |
+
return []
|
159 |
+
|
160 |
+
|
161 |
+
def format_search_results(query: str, for_keyword: bool = False) -> str:
|
162 |
+
"""๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํ
ํ์ฌ ๋ฐํ"""
|
163 |
+
# ํค์๋ ๊ฒ์์ ๊ฒฝ์ฐ ๋ ๋ง์ ๊ฒฐ๊ณผ ์ฌ์ฉ
|
164 |
+
count = 5 if for_keyword else 3
|
165 |
+
rows = brave_search(query, count, freshness_days=7 if not for_keyword else None)
|
166 |
+
if not rows:
|
167 |
+
return ""
|
168 |
|
169 |
+
results = []
|
170 |
+
# ํค์๋ ๊ฒ์์ ๊ฒฝ์ฐ ๋ ์์ธํ ์ ๋ณด ํฌํจ
|
171 |
+
max_results = 4 if for_keyword else 2
|
172 |
+
for r in rows[:max_results]:
|
173 |
+
if for_keyword:
|
174 |
+
# ํค์๋ ๊ฒ์์ ๋ ๊ธด ์ค๋ํซ ์ฌ์ฉ
|
175 |
+
snippet = r['snippet'][:200] + "..." if len(r['snippet']) > 200 else r['snippet']
|
176 |
+
results.append(f"**{r['title']}**\n{snippet}\nSource: {r['host']}")
|
177 |
+
else:
|
178 |
+
# ์ผ๋ฐ ๊ฒ์์ ์งง์ ์ค๋ํซ
|
179 |
+
snippet = r['snippet'][:100] + "..." if len(r['snippet']) > 100 else r['snippet']
|
180 |
+
results.append(f"- {r['title']}: {snippet}")
|
181 |
|
182 |
+
return "\n\n".join(results) + "\n"
|
183 |
+
|
184 |
+
|
185 |
+
def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
|
186 |
+
"""ํ
์คํธ์์ ๊ฒ์ํ ํค์๋ ์ถ์ถ"""
|
187 |
+
# ํ
์คํธ ์๋ถ๋ถ๋ง ์ฌ์ฉ (๋๋ฌด ๋ง์ ํ
์คํธ ์ฒ๋ฆฌ ๋ฐฉ์ง)
|
188 |
+
text_sample = text[:500]
|
|
|
|
|
|
|
|
|
189 |
|
190 |
+
# ์์ด๋ ๋๋ฌธ์๋ก ์์ํ๋ ๋จ์ด ์ค ๊ฐ์ฅ ๊ธด ๊ฒ 1๊ฐ
|
191 |
+
words = text_sample.split()
|
192 |
+
keywords = [word.strip('.,!?;:') for word in words
|
193 |
+
if len(word) > 4 and word[0].isupper()]
|
194 |
+
if keywords:
|
195 |
+
return [max(keywords, key=len)] # ๊ฐ์ฅ ๊ธด ๋จ์ด 1๊ฐ
|
196 |
+
return []
|
197 |
+
|
198 |
+
|
199 |
+
def search_and_compile_content(keyword: str, language: str = "English") -> str:
|
200 |
+
"""ํค์๋๋ก ๊ฒ์ํ์ฌ ์ถฉ๋ถํ ์ฝํ
์ธ ์ปดํ์ผ"""
|
201 |
+
if not BRAVE_KEY:
|
202 |
+
# API ์์ ๋๋ ๊ธฐ๋ณธ ์ฝํ
์ธ ์์ฑ
|
203 |
+
return f"""
|
204 |
+
Comprehensive information about '{keyword}':
|
205 |
+
|
206 |
+
{keyword} is a significant topic in modern society.
|
207 |
+
This subject impacts our lives in various ways and has been
|
208 |
+
gaining increasing attention recently.
|
209 |
+
|
210 |
+
Key aspects:
|
211 |
+
1. Technological advancement and innovation
|
212 |
+
2. Social impact and changes
|
213 |
+
3. Future prospects and possibilities
|
214 |
+
4. Practical applications
|
215 |
+
5. Global trends and developments
|
216 |
+
|
217 |
+
Experts predict that {keyword} will become even more important,
|
218 |
+
and it's crucial to develop a deep understanding of this topic.
|
219 |
+
"""
|
220 |
|
221 |
+
# ์์ด ๊ฒ์ ์ฟผ๋ฆฌ
|
222 |
+
queries = [
|
223 |
+
f"{keyword} latest news 2024",
|
224 |
+
f"{keyword} explained comprehensive",
|
225 |
+
f"{keyword} trends forecast",
|
226 |
+
f"{keyword} advantages disadvantages",
|
227 |
+
f"{keyword} how to use",
|
228 |
+
f"{keyword} expert opinions"
|
229 |
+
]
|
230 |
|
231 |
+
all_content = []
|
232 |
+
total_content_length = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
+
for query in queries:
|
235 |
+
results = brave_search(query, count=5) # ๋ ๋ง์ ๊ฒฐ๊ณผ ๊ฐ์ ธ์ค๊ธฐ
|
236 |
+
for r in results[:3]: # ๊ฐ ์ฟผ๋ฆฌ๋น ์์ 3๊ฐ
|
237 |
+
content = f"**{r['title']}**\n{r['snippet']}\nSource: {r['host']}\n"
|
238 |
+
all_content.append(content)
|
239 |
+
total_content_length += len(r['snippet'])
|
240 |
|
241 |
+
# ์ฝํ
์ธ ๊ฐ ๋ถ์กฑํ๋ฉด ์ถ๊ฐ ์์ฑ
|
242 |
+
if total_content_length < 1000: # ์ต์ 1000์ ํ๋ณด
|
243 |
+
additional_content = f"""
|
244 |
+
Additional insights:
|
245 |
+
Recent developments in {keyword} show rapid advancement in this field.
|
246 |
+
Many experts are actively researching this topic, and its practical
|
247 |
+
applications continue to expand.
|
248 |
+
|
249 |
+
Key points to note:
|
250 |
+
- Accelerating technological innovation
|
251 |
+
- Improving user experience
|
252 |
+
- Enhanced accessibility
|
253 |
+
- Increased cost efficiency
|
254 |
+
- Growing global market
|
255 |
+
|
256 |
+
These factors are making the future of {keyword} increasingly promising.
|
257 |
+
"""
|
258 |
+
all_content.append(additional_content)
|
259 |
|
260 |
+
# ์ปดํ์ผ๋ ์ฝํ
์ธ ๋ฐํ
|
261 |
+
compiled = "\n\n".join(all_content)
|
|
|
|
|
|
|
262 |
|
263 |
+
# ํค์๋ ๊ธฐ๋ฐ ์๊ฐ
|
264 |
+
intro = f"### Comprehensive information and latest trends about '{keyword}':\n\n"
|
265 |
|
266 |
+
return intro + compiled
|
|
|
267 |
|
268 |
+
|
269 |
+
class UnifiedAudioConverter:
|
270 |
+
def __init__(self, config: ConversationConfig):
|
271 |
+
self.config = config
|
272 |
+
self.llm_client = None
|
273 |
+
self.legacy_local_model = None
|
274 |
+
self.legacy_tokenizer = None
|
275 |
+
# ์๋ก์ด ๋ก์ปฌ LLM ๊ด๋ จ
|
276 |
+
self.local_llm = None
|
277 |
+
self.local_llm_model = None
|
278 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
279 |
+
# ํ๋กฌํํธ ๋น๋ ์ถ๊ฐ
|
280 |
+
self.prompt_builder = PromptBuilder()
|
281 |
|
282 |
+
def initialize_api_mode(self, api_key: str):
|
283 |
+
"""Initialize API mode with Together API"""
|
284 |
+
self.llm_client = OpenAI(api_key=api_key, base_url="https://api.together.xyz/v1")
|
285 |
+
|
286 |
+
@spaces.GPU(duration=120)
|
287 |
+
def initialize_local_mode(self):
|
288 |
+
"""Initialize new local mode with Llama CPP"""
|
289 |
+
if not LLAMA_CPP_AVAILABLE:
|
290 |
+
raise RuntimeError("Llama CPP dependencies not available. Please install llama-cpp-python and llama-cpp-agent.")
|
291 |
+
|
292 |
+
if self.local_llm is None or self.local_llm_model != self.config.local_model_name:
|
293 |
+
try:
|
294 |
+
# ๋ชจ๋ธ ๋ค์ด๋ก๋
|
295 |
+
model_path = hf_hub_download(
|
296 |
+
repo_id=self.config.local_model_repo,
|
297 |
+
filename=self.config.local_model_name,
|
298 |
+
local_dir="./models"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
)
|
300 |
+
|
301 |
+
model_path_local = os.path.join("./models", self.config.local_model_name)
|
302 |
+
|
303 |
+
if not os.path.exists(model_path_local):
|
304 |
+
raise RuntimeError(f"Model file not found at {model_path_local}")
|
305 |
+
|
306 |
+
# Llama ๋ชจ๋ธ ์ด๊ธฐํ
|
307 |
+
self.local_llm = Llama(
|
308 |
+
model_path=model_path_local,
|
309 |
+
flash_attn=True,
|
310 |
+
n_gpu_layers=81 if torch.cuda.is_available() else 0,
|
311 |
+
n_batch=1024,
|
312 |
+
n_ctx=16384,
|
313 |
)
|
314 |
+
self.local_llm_model = self.config.local_model_name
|
315 |
+
print(f"Local LLM initialized: {model_path_local}")
|
316 |
+
|
317 |
+
except Exception as e:
|
318 |
+
print(f"Failed to initialize local LLM: {e}")
|
319 |
+
raise RuntimeError(f"Failed to initialize local LLM: {e}")
|
320 |
+
|
321 |
+
@spaces.GPU(duration=60)
|
322 |
+
def initialize_legacy_local_mode(self):
|
323 |
+
"""Initialize legacy local mode with Hugging Face model (fallback)"""
|
324 |
+
if self.legacy_local_model is None:
|
325 |
+
quantization_config = BitsAndBytesConfig(
|
326 |
+
load_in_4bit=True,
|
327 |
+
bnb_4bit_compute_dtype=torch.float16
|
328 |
)
|
329 |
+
self.legacy_local_model = AutoModelForCausalLM.from_pretrained(
|
330 |
+
self.config.legacy_local_model_name,
|
331 |
+
quantization_config=quantization_config
|
332 |
+
)
|
333 |
+
self.legacy_tokenizer = AutoTokenizer.from_pretrained(
|
334 |
+
self.config.legacy_local_model_name,
|
335 |
+
revision='8ab73a6800796d84448bc936db9bac5ad9f984ae'
|
336 |
+
)
|
337 |
+
|
338 |
+
def fetch_text(self, url: str) -> str:
|
339 |
+
"""Fetch text content from URL"""
|
340 |
+
if not url:
|
341 |
+
raise ValueError("URL cannot be empty")
|
342 |
+
|
343 |
+
if not url.startswith("http://") and not url.startswith("https://"):
|
344 |
+
raise ValueError("URL must start with 'http://' or 'https://'")
|
345 |
+
|
346 |
+
full_url = f"{self.config.prefix_url}{url}"
|
347 |
+
try:
|
348 |
+
response = httpx.get(full_url, timeout=60.0)
|
349 |
+
response.raise_for_status()
|
350 |
+
return response.text
|
351 |
+
except httpx.HTTPError as e:
|
352 |
+
raise RuntimeError(f"Failed to fetch URL: {e}")
|
353 |
|
354 |
+
def extract_text_from_pdf(self, pdf_file) -> str:
|
355 |
+
"""Extract text content from PDF file"""
|
356 |
+
try:
|
357 |
+
# Gradio returns file path, not file object
|
358 |
+
if isinstance(pdf_file, str):
|
359 |
+
pdf_path = pdf_file
|
360 |
+
else:
|
361 |
+
# If it's a file object (shouldn't happen with Gradio)
|
362 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
363 |
+
tmp_file.write(pdf_file.read())
|
364 |
+
pdf_path = tmp_file.name
|
365 |
+
|
366 |
+
# PDF ๋ก๋ ๋ฐ ํ
์คํธ ์ถ์ถ
|
367 |
+
loader = PyPDFLoader(pdf_path)
|
368 |
+
pages = loader.load()
|
369 |
+
|
370 |
+
# ๋ชจ๋ ํ์ด์ง์ ํ
์คํธ๋ฅผ ๊ฒฐํฉ
|
371 |
+
text = "\n".join([page.page_content for page in pages])
|
372 |
+
|
373 |
+
# ์์ ํ์ผ์ธ ๊ฒฝ์ฐ ์ญ์
|
374 |
+
if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
|
375 |
+
os.unlink(pdf_path)
|
376 |
+
|
377 |
+
return text
|
378 |
+
except Exception as e:
|
379 |
+
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
380 |
|
381 |
+
def _get_messages_formatter_type(self, model_name):
|
382 |
+
"""Get appropriate message formatter for the model"""
|
383 |
+
if "Mistral" in model_name or "BitSix" in model_name:
|
384 |
+
return MessagesFormatterType.CHATML
|
385 |
+
else:
|
386 |
+
return MessagesFormatterType.LLAMA_3
|
387 |
|
388 |
+
@spaces.GPU(duration=120)
|
389 |
+
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
390 |
+
"""Extract conversation using new local LLM with enhanced professional style"""
|
391 |
+
try:
|
392 |
+
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ
|
393 |
+
search_context = ""
|
394 |
+
if BRAVE_KEY and not text.startswith("Keyword-based content:"):
|
395 |
+
try:
|
396 |
+
keywords = extract_keywords_for_search(text, language)
|
397 |
+
if keywords:
|
398 |
+
search_query = f"{keywords[0]} latest news"
|
399 |
+
search_context = format_search_results(search_query)
|
400 |
+
print(f"Search context added for: {search_query}")
|
401 |
+
except Exception as e:
|
402 |
+
print(f"Search failed, continuing without context: {e}")
|
403 |
|
404 |
+
# ๋จผ์ ์๋ก์ด ๋ก์ปฌ LLM ์๋
|
405 |
+
self.initialize_local_mode()
|
406 |
+
|
407 |
+
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
408 |
+
provider = LlamaCppPythonProvider(self.local_llm)
|
409 |
+
|
410 |
+
# ์์ด ์ ์ฉ ์์คํ
๋ฉ์์ง
|
411 |
+
system_message = (
|
412 |
+
f"You are a professional podcast scriptwriter creating high-quality, "
|
413 |
+
f"insightful discussions in English. Create exactly 12 conversation exchanges "
|
414 |
+
f"with professional expertise. All dialogue must be in English. "
|
415 |
+
f"Respond only in JSON format."
|
416 |
)
|
417 |
+
|
418 |
+
agent = LlamaCppAgent(
|
419 |
+
provider,
|
420 |
+
system_prompt=system_message,
|
421 |
+
predefined_messages_formatter_type=chat_template,
|
422 |
+
debug_output=False
|
423 |
+
)
|
424 |
+
|
425 |
+
settings = provider.get_provider_default_settings()
|
426 |
+
settings.temperature = 0.75
|
427 |
+
settings.top_k = 40
|
428 |
+
settings.top_p = 0.95
|
429 |
+
settings.max_tokens = self.config.max_tokens
|
430 |
+
settings.repeat_penalty = 1.1
|
431 |
+
settings.stream = False
|
432 |
+
|
433 |
+
messages = BasicChatHistory()
|
434 |
|
435 |
+
prompt = self.prompt_builder.build_prompt(text, language, search_context)
|
436 |
+
response = agent.get_chat_response(
|
437 |
+
prompt,
|
438 |
+
llm_sampling_settings=settings,
|
439 |
+
chat_history=messages,
|
440 |
+
returns_streaming_generator=False,
|
441 |
+
print_output=False
|
442 |
)
|
443 |
|
444 |
+
# JSON ํ์ฑ
|
445 |
+
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
446 |
+
json_match = re.search(pattern, response)
|
447 |
+
|
448 |
+
if json_match:
|
449 |
+
conversation_data = json.loads(json_match.group())
|
450 |
+
return conversation_data
|
451 |
+
else:
|
452 |
+
raise ValueError("No valid JSON found in local LLM response")
|
453 |
+
|
454 |
+
except Exception as e:
|
455 |
+
print(f"Local LLM failed: {e}, falling back to legacy local method")
|
456 |
+
return self.extract_conversation_legacy_local(text, language, progress, search_context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
|
458 |
+
@spaces.GPU(duration=120)
|
459 |
+
def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
|
460 |
+
"""Extract conversation using legacy local model"""
|
461 |
try:
|
462 |
+
self.initialize_legacy_local_mode()
|
463 |
|
464 |
+
# ์์ด ์ ์ฉ ๋ฉ์์ง
|
465 |
+
messages = self.prompt_builder.build_messages_for_local(text, language, search_context)
|
466 |
+
|
467 |
+
terminators = [
|
468 |
+
self.legacy_tokenizer.eos_token_id,
|
469 |
+
self.legacy_tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
470 |
+
]
|
471 |
+
|
472 |
+
chat_messages = self.legacy_tokenizer.apply_chat_template(
|
473 |
+
messages, tokenize=False, add_generation_prompt=True
|
474 |
)
|
475 |
+
model_inputs = self.legacy_tokenizer([chat_messages], return_tensors="pt").to(self.device)
|
476 |
|
477 |
+
streamer = TextIteratorStreamer(
|
478 |
+
self.legacy_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
|
479 |
+
)
|
480 |
|
481 |
+
generate_kwargs = dict(
|
482 |
+
model_inputs,
|
483 |
+
streamer=streamer,
|
484 |
+
max_new_tokens=self.config.max_new_tokens,
|
485 |
+
do_sample=True,
|
486 |
+
temperature=0.75,
|
487 |
+
eos_token_id=terminators,
|
488 |
+
)
|
489 |
+
|
490 |
+
t = Thread(target=self.legacy_local_model.generate, kwargs=generate_kwargs)
|
491 |
+
t.start()
|
492 |
+
|
493 |
+
partial_text = ""
|
494 |
+
for new_text in streamer:
|
495 |
+
partial_text += new_text
|
496 |
+
|
497 |
+
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
498 |
+
json_match = re.search(pattern, partial_text)
|
499 |
+
|
500 |
+
if json_match:
|
501 |
+
return json.loads(json_match.group())
|
502 |
+
else:
|
503 |
+
raise ValueError("No valid JSON found in legacy local response")
|
504 |
+
|
505 |
+
except Exception as e:
|
506 |
+
print(f"Legacy local model also failed: {e}")
|
507 |
+
return DefaultConversations.get_conversation("English")
|
508 |
+
|
509 |
+
def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
|
510 |
+
"""Extract conversation using API"""
|
511 |
+
if not self.llm_client:
|
512 |
+
raise RuntimeError("API mode not initialized")
|
513 |
+
|
514 |
+
try:
|
515 |
+
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ
|
516 |
+
search_context = ""
|
517 |
+
if BRAVE_KEY and not text.startswith("Keyword-based content:"):
|
518 |
+
try:
|
519 |
+
keywords = extract_keywords_for_search(text, language)
|
520 |
+
if keywords:
|
521 |
+
search_query = f"{keywords[0]} latest news"
|
522 |
+
search_context = format_search_results(search_query)
|
523 |
+
print(f"Search context added for: {search_query}")
|
524 |
+
except Exception as e:
|
525 |
+
print(f"Search failed, continuing without context: {e}")
|
526 |
+
|
527 |
+
# ๋ฉ์์ง ๋น๋
|
528 |
+
messages = self.prompt_builder.build_messages_for_local(text, language, search_context)
|
529 |
+
|
530 |
+
chat_completion = self.llm_client.chat.completions.create(
|
531 |
+
messages=messages,
|
532 |
+
model=self.config.api_model_name,
|
533 |
+
temperature=0.75,
|
534 |
+
)
|
535 |
+
|
536 |
+
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
537 |
+
json_match = re.search(pattern, chat_completion.choices[0].message.content)
|
538 |
+
|
539 |
+
if not json_match:
|
540 |
+
raise ValueError("No valid JSON found in response")
|
541 |
+
|
542 |
+
return json.loads(json_match.group())
|
543 |
except Exception as e:
|
544 |
+
raise RuntimeError(f"Failed to extract conversation: {e}")
|
545 |
+
|
546 |
+
def parse_conversation_text(self, conversation_text: str) -> Dict:
|
547 |
+
"""Parse conversation text back to JSON format"""
|
548 |
+
lines = conversation_text.strip().split('\n')
|
549 |
+
conversation_data = {"conversation": []}
|
550 |
+
|
551 |
+
for line in lines:
|
552 |
+
if ':' in line:
|
553 |
+
speaker, text = line.split(':', 1)
|
554 |
+
conversation_data["conversation"].append({
|
555 |
+
"speaker": speaker.strip(),
|
556 |
+
"text": text.strip()
|
557 |
+
})
|
558 |
+
|
559 |
+
return conversation_data
|
560 |
|
561 |
+
@spaces.GPU(duration=120)
|
562 |
+
def generate_tts_audio_gpu(
|
563 |
+
self,
|
564 |
+
conversation_json: Dict,
|
565 |
+
audio_prompt_path_input: str,
|
566 |
+
exaggeration_input: float = 0.5,
|
567 |
+
temperature_input: float = 0.8,
|
568 |
+
seed_num_input: int = 0,
|
569 |
+
cfgw_input: float = 0.5,
|
570 |
+
chunk_size_input: int = 250
|
571 |
+
) -> tuple[int, np.ndarray]:
|
572 |
+
"""
|
573 |
+
Chatterbox TTS๋ฅผ ์ฌ์ฉํ์ฌ ๋ํ๋ฅผ ์์ฑ์ผ๋ก ๋ณํ
|
574 |
+
"""
|
575 |
+
if not CHATTERBOX_AVAILABLE:
|
576 |
+
raise RuntimeError("Chatterbox TTS not available")
|
577 |
+
|
578 |
+
# GPU ํจ์ ๋ด์์ ๋ชจ๋ธ ๋ก๋
|
579 |
+
model = ChatterboxTTS.from_pretrained(DEVICE)
|
580 |
+
|
581 |
+
if seed_num_input != 0:
|
582 |
+
set_seed(int(seed_num_input))
|
583 |
+
|
584 |
+
audio_segments = []
|
585 |
+
|
586 |
+
for i, turn in enumerate(conversation_json["conversation"]):
|
587 |
+
text = turn["text"]
|
588 |
+
if not text.strip():
|
589 |
+
continue
|
590 |
+
|
591 |
+
print(f"์์ฑ ์ค: Speaker {i+1} - '{text[:50]}...'")
|
592 |
+
|
593 |
+
try:
|
594 |
+
# ํ
์คํธ๊ฐ ์งง์ผ๋ฉด ๋จ์ผ ์์ฑ
|
595 |
+
if len(text) <= 300:
|
596 |
+
wav = model.generate(
|
597 |
+
text,
|
598 |
+
audio_prompt_path=audio_prompt_path_input,
|
599 |
+
exaggeration=exaggeration_input,
|
600 |
+
temperature=temperature_input,
|
601 |
+
cfg_weight=cfgw_input,
|
602 |
+
)
|
603 |
+
wav_chunk = wav.squeeze(0).numpy()
|
604 |
+
audio_segments.append(wav_chunk)
|
605 |
+
else:
|
606 |
+
# ๊ธด ํ
์คํธ๋ ์ฒญํฌ๋ก ๋ถํ
|
607 |
+
chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
|
608 |
+
|
609 |
+
chunk_audio_segments = []
|
610 |
+
for chunk in chunks:
|
611 |
+
wav = model.generate(
|
612 |
+
chunk,
|
613 |
+
audio_prompt_path=audio_prompt_path_input,
|
614 |
+
exaggeration=exaggeration_input,
|
615 |
+
temperature=temperature_input,
|
616 |
+
cfg_weight=cfgw_input,
|
617 |
+
)
|
618 |
+
wav_chunk = wav.squeeze(0).numpy()
|
619 |
+
chunk_audio_segments.append(wav_chunk)
|
620 |
+
|
621 |
+
# ์ฒญํฌ๋ค์ ์ฐ๊ฒฐ
|
622 |
+
if chunk_audio_segments:
|
623 |
+
silence_duration = int(0.1 * model.sr) # 0.1์ด ๋ฌด์
|
624 |
+
silence = np.zeros(silence_duration)
|
625 |
+
|
626 |
+
turn_audio = []
|
627 |
+
for j, segment in enumerate(chunk_audio_segments):
|
628 |
+
turn_audio.append(segment)
|
629 |
+
if j < len(chunk_audio_segments) - 1:
|
630 |
+
turn_audio.append(silence)
|
631 |
+
|
632 |
+
concatenated_turn = np.concatenate(turn_audio)
|
633 |
+
audio_segments.append(concatenated_turn)
|
634 |
+
|
635 |
+
except Exception as e:
|
636 |
+
print(f"Speaker {i+1} ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
637 |
+
continue
|
638 |
+
|
639 |
+
if not audio_segments:
|
640 |
+
raise RuntimeError("์ค๋์ค ์์ฑ์ ์คํจํ์ต๋๋ค.")
|
641 |
+
|
642 |
+
# ๋ชจ๋ ์คํผ์ปค์ ์ค๋์ค ์ธ๊ทธ๋จผํธ ์ฐ๊ฒฐ
|
643 |
+
speaker_silence_duration = int(0.5 * model.sr) # ์คํผ์ปค ๊ฐ 0.5์ด ๋ฌด์
|
644 |
+
speaker_silence = np.zeros(speaker_silence_duration)
|
645 |
+
|
646 |
+
final_audio = []
|
647 |
+
for i, segment in enumerate(audio_segments):
|
648 |
+
final_audio.append(segment)
|
649 |
+
if i < len(audio_segments) - 1:
|
650 |
+
final_audio.append(speaker_silence)
|
651 |
+
|
652 |
+
concatenated_audio = np.concatenate(final_audio)
|
653 |
+
|
654 |
+
print(f"์ค๋์ค ์์ฑ ์๋ฃ. ์ด ๊ธธ์ด: {len(concatenated_audio) / model.sr:.2f}์ด")
|
655 |
+
return (model.sr, concatenated_audio)
|
656 |
+
|
657 |
+
def _create_output_directory(self) -> str:
|
658 |
+
"""Create a unique output directory"""
|
659 |
+
random_bytes = os.urandom(8)
|
660 |
+
folder_name = base64.urlsafe_b64encode(random_bytes).decode("utf-8")
|
661 |
+
os.makedirs(folder_name, exist_ok=True)
|
662 |
+
return folder_name
|
663 |
+
|
664 |
+
|
665 |
+
# Global converter instance
|
666 |
+
converter = UnifiedAudioConverter(ConversationConfig())
|
667 |
+
|
668 |
+
|
669 |
+
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"):
|
670 |
+
"""Main synthesis function - handles URL, PDF, and Keyword inputs"""
|
671 |
+
try:
|
672 |
+
# Extract text based on input type
|
673 |
+
if input_type == "URL":
|
674 |
+
if not article_input or not isinstance(article_input, str):
|
675 |
+
return "Please provide a valid URL.", None
|
676 |
+
text = converter.fetch_text(article_input)
|
677 |
+
elif input_type == "PDF":
|
678 |
+
if not article_input:
|
679 |
+
return "Please upload a PDF file.", None
|
680 |
+
text = converter.extract_text_from_pdf(article_input)
|
681 |
+
else: # Keyword
|
682 |
+
if not article_input or not isinstance(article_input, str):
|
683 |
+
return "Please provide a keyword or topic.", None
|
684 |
+
text = search_and_compile_content(article_input, "English")
|
685 |
+
text = f"Keyword-based content:\n{text}"
|
686 |
+
|
687 |
+
# Limit text to max words
|
688 |
+
words = text.split()
|
689 |
+
if len(words) > converter.config.max_words:
|
690 |
+
text = " ".join(words[:converter.config.max_words])
|
691 |
+
|
692 |
+
# Extract conversation based on mode
|
693 |
+
if mode == "Local":
|
694 |
+
try:
|
695 |
+
conversation_json = converter.extract_conversation_local(text, "English")
|
696 |
+
except Exception as e:
|
697 |
+
print(f"Local mode failed: {e}, trying API fallback")
|
698 |
+
api_key = os.environ.get("TOGETHER_API_KEY")
|
699 |
+
if api_key:
|
700 |
+
converter.initialize_api_mode(api_key)
|
701 |
+
conversation_json = converter.extract_conversation_api(text, "English")
|
702 |
+
else:
|
703 |
+
raise RuntimeError("Local mode failed and no API key available for fallback")
|
704 |
+
else: # API mode
|
705 |
+
api_key = os.environ.get("TOGETHER_API_KEY")
|
706 |
+
if not api_key:
|
707 |
+
print("API key not found, falling back to local mode")
|
708 |
+
conversation_json = converter.extract_conversation_local(text, "English")
|
709 |
+
else:
|
710 |
+
try:
|
711 |
+
converter.initialize_api_mode(api_key)
|
712 |
+
conversation_json = converter.extract_conversation_api(text, "English")
|
713 |
+
except Exception as e:
|
714 |
+
print(f"API mode failed: {e}, falling back to local mode")
|
715 |
+
conversation_json = converter.extract_conversation_local(text, "English")
|
716 |
+
|
717 |
+
# Generate conversation text
|
718 |
+
conversation_text = "\n".join(
|
719 |
+
f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
|
720 |
+
for i, turn in enumerate(conversation_json["conversation"])
|
721 |
+
)
|
722 |
+
|
723 |
+
return conversation_text, None
|
724 |
+
|
725 |
+
except Exception as e:
|
726 |
+
return f"Error: {str(e)}", None
|
727 |
+
|
728 |
+
|
729 |
+
async def regenerate_audio(
|
730 |
+
conversation_text: str,
|
731 |
+
ref_audio_path: str,
|
732 |
+
exaggeration: float = 0.5,
|
733 |
+
temperature: float = 0.8,
|
734 |
+
seed_num: int = 0,
|
735 |
+
cfg_weight: float = 0.5,
|
736 |
+
chunk_size: int = 250
|
737 |
+
):
|
738 |
+
"""Regenerate audio from edited conversation text using Chatterbox TTS"""
|
739 |
+
if not conversation_text.strip():
|
740 |
+
return "Please provide conversation text.", None
|
741 |
+
|
742 |
+
try:
|
743 |
+
conversation_json = converter.parse_conversation_text(conversation_text)
|
744 |
+
|
745 |
+
if not conversation_json["conversation"]:
|
746 |
+
return "No valid conversation found in the text.", None
|
747 |
+
|
748 |
+
# Generate audio using Chatterbox TTS
|
749 |
+
sr, audio = converter.generate_tts_audio_gpu(
|
750 |
+
conversation_json,
|
751 |
+
ref_audio_path,
|
752 |
exaggeration,
|
753 |
+
temperature,
|
754 |
seed_num,
|
755 |
cfg_weight,
|
756 |
chunk_size
|
757 |
+
)
|
758 |
+
|
759 |
+
# Save audio to file
|
760 |
+
output_dir = converter._create_output_directory()
|
761 |
+
output_file = os.path.join(output_dir, "podcast_audio.wav")
|
762 |
+
sf.write(output_file, audio, sr)
|
763 |
+
|
764 |
+
return "Audio generated successfully!", output_file
|
765 |
+
|
766 |
+
except Exception as e:
|
767 |
+
return f"Error generating audio: {str(e)}", None
|
768 |
+
|
769 |
+
|
770 |
+
def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):
|
771 |
+
"""Synchronous wrapper for async synthesis"""
|
772 |
+
return asyncio.run(synthesize(article_input, input_type, mode))
|
773 |
+
|
774 |
+
|
775 |
+
def regenerate_audio_sync(conversation_text: str, ref_audio_path: str, exaggeration: float, temperature: float, seed_num: int, cfg_weight: float, chunk_size: int):
|
776 |
+
"""Synchronous wrapper for async audio regeneration"""
|
777 |
+
return asyncio.run(regenerate_audio(conversation_text, ref_audio_path, exaggeration, temperature, seed_num, cfg_weight, chunk_size))
|
778 |
+
|
779 |
+
|
780 |
+
def toggle_input_visibility(input_type):
|
781 |
+
"""Toggle visibility of URL input, file upload, and keyword input based on input type"""
|
782 |
+
if input_type == "URL":
|
783 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
784 |
+
elif input_type == "PDF":
|
785 |
+
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
786 |
+
else: # Keyword
|
787 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
788 |
+
|
789 |
+
|
790 |
+
def update_char_count(text, chunk_size):
|
791 |
+
"""ํ
์คํธ ๊ธธ์ด ์ ๋ณด ์
๋ฐ์ดํธ"""
|
792 |
+
char_len = len(text)
|
793 |
+
if char_len <= 300:
|
794 |
+
return f"{char_len} characters (single generation)"
|
795 |
+
else:
|
796 |
+
chunks = split_text_into_chunks(text, max_chars=chunk_size)
|
797 |
+
chunk_count = len(chunks)
|
798 |
+
estimated_time = chunk_count * 3 # ์ฒญํฌ๋น ์ฝ 3์ด ์์
|
799 |
+
return f"{char_len} characters, {chunk_count} chunks (estimated time: ~{estimated_time}s)"
|
800 |
+
|
801 |
+
|
802 |
+
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
803 |
+
if LLAMA_CPP_AVAILABLE:
|
804 |
+
try:
|
805 |
+
model_path = hf_hub_download(
|
806 |
+
repo_id=converter.config.local_model_repo,
|
807 |
+
filename=converter.config.local_model_name,
|
808 |
+
local_dir="./models"
|
809 |
+
)
|
810 |
+
print(f"Model downloaded to: {model_path}")
|
811 |
+
except Exception as e:
|
812 |
+
print(f"Failed to download model at startup: {e}")
|
813 |
+
|
814 |
+
|
815 |
+
# Gradio Interface
|
816 |
+
with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
|
817 |
+
.container {max-width: 1200px; margin: auto; padding: 20px;}
|
818 |
+
.header-text {text-align: center; margin-bottom: 30px;}
|
819 |
+
.input-group {background: #f7f7f7; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
|
820 |
+
.output-group {background: #f0f0f0; padding: 20px; border-radius: 10px;}
|
821 |
+
.status-box {background: #e8f4f8; padding: 15px; border-radius: 8px; margin-top: 10px;}
|
822 |
+
""") as demo:
|
823 |
+
with gr.Column(elem_classes="container"):
|
824 |
+
# ํค๋
|
825 |
+
with gr.Row(elem_classes="header-text"):
|
826 |
+
gr.Markdown("""
|
827 |
+
# ๐๏ธ LIVE Podcast Generator with Chatterbox TTS
|
828 |
+
### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation!
|
829 |
+
""")
|
830 |
+
|
831 |
+
with gr.Row(elem_classes="discord-badge"):
|
832 |
+
gr.HTML("""
|
833 |
+
<p style="text-align: center;">
|
834 |
+
<a href="https://discord.gg/openfreeai" target="_blank" style="display: inline-block; margin-right: 10px;">
|
835 |
+
<img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge">
|
836 |
+
</a>
|
837 |
+
<a href="https://open.spotify.com/show/36GtIP7iqJxCwp7FfXmTYK?si=KsIsUJq7SJiiudPTaMsXAA" target="_blank" style="display: inline-block;">
|
838 |
+
<img src="https://img.shields.io/static/v1?label=Spotify&message=Podcast&color=%230000ff&labelColor=%23000080&logo=Spotify&logoColor=white&style=for-the-badge" alt="badge">
|
839 |
+
</a>
|
840 |
+
<a href="https://huggingface.co/spaces/openfree/AI-Podcast" target="_blank" style="display: inline-block;">
|
841 |
+
<img src="https://img.shields.io/static/v1?label=Huggingface&message=AI%20Podcast&color=%230000ff&labelColor=%23ffa500&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
|
842 |
+
</a>
|
843 |
+
</p>
|
844 |
+
""")
|
845 |
+
|
846 |
+
# ์ํ ํ์ ์น์
|
847 |
+
with gr.Row():
|
848 |
+
with gr.Column(scale=1):
|
849 |
+
gr.Markdown(f"""
|
850 |
+
#### ๐ค System Status
|
851 |
+
- **LLM**: {converter.config.local_model_name.split('.')[0]}
|
852 |
+
- **Fallback**: {converter.config.api_model_name.split('/')[-1]}
|
853 |
+
- **Llama CPP**: {"โ
Ready" if LLAMA_CPP_AVAILABLE else "โ Not Available"}
|
854 |
+
- **Chatterbox TTS**: {"โ
Ready" if CHATTERBOX_AVAILABLE else "โ Not Available"}
|
855 |
+
- **Search**: {"โ
Brave API" if BRAVE_KEY else "โ No API"}
|
856 |
+
""")
|
857 |
+
with gr.Column(scale=1):
|
858 |
+
gr.Markdown("""
|
859 |
+
#### ๐๏ธ Chatterbox TTS Features
|
860 |
+
- **High Quality**: Neural voice synthesis
|
861 |
+
- **Voice Cloning**: Upload your reference audio
|
862 |
+
- **Unlimited Length**: Automatic text chunking
|
863 |
+
- **Professional Style**: Expert podcast discussions
|
864 |
+
""")
|
865 |
+
|
866 |
+
# ๋ฉ์ธ ์
๋ ฅ ์น์
|
867 |
+
with gr.Group(elem_classes="input-group"):
|
868 |
+
with gr.Row():
|
869 |
+
# ์ผ์ชฝ: ์
๋ ฅ ์ต์
๋ค
|
870 |
+
with gr.Column(scale=2):
|
871 |
+
# ์
๋ ฅ ํ์
์ ํ
|
872 |
+
input_type_selector = gr.Radio(
|
873 |
+
choices=["URL", "PDF", "Keyword"],
|
874 |
+
value="URL",
|
875 |
+
label="๐ฅ Input Type",
|
876 |
+
info="Choose your content source"
|
877 |
+
)
|
878 |
+
|
879 |
+
# URL ์
๋ ฅ
|
880 |
+
url_input = gr.Textbox(
|
881 |
+
label="๐ Article URL",
|
882 |
+
placeholder="Enter the article URL here...",
|
883 |
+
value="",
|
884 |
+
visible=True,
|
885 |
+
lines=2
|
886 |
+
)
|
887 |
+
|
888 |
+
# PDF ์
๋ก๋
|
889 |
+
pdf_input = gr.File(
|
890 |
+
label="๐ Upload PDF",
|
891 |
+
file_types=[".pdf"],
|
892 |
+
visible=False
|
893 |
+
)
|
894 |
+
|
895 |
+
# ํค์๋ ์
๋ ฅ
|
896 |
+
keyword_input = gr.Textbox(
|
897 |
+
label="๐ Topic/Keyword",
|
898 |
+
placeholder="Enter a topic (e.g., 'AI trends 2024', 'quantum computing')",
|
899 |
+
value="",
|
900 |
+
visible=False,
|
901 |
+
info="System will search and compile latest information",
|
902 |
+
lines=2
|
903 |
+
)
|
904 |
+
|
905 |
+
# ์ค๋ฅธ์ชฝ: ์ค์ ์ต์
๋ค
|
906 |
+
with gr.Column(scale=1):
|
907 |
+
# ์ฒ๋ฆฌ ๋ชจ๋
|
908 |
+
mode_selector = gr.Radio(
|
909 |
+
choices=["Local", "API"],
|
910 |
+
value="Local",
|
911 |
+
label="โ๏ธ Processing Mode",
|
912 |
+
info="Local: On-device | API: Cloud"
|
913 |
+
)
|
914 |
+
|
915 |
+
# ์์ฑ ๋ฒํผ
|
916 |
+
with gr.Row():
|
917 |
+
convert_btn = gr.Button(
|
918 |
+
"๐ฏ Generate Professional Conversation",
|
919 |
+
variant="primary",
|
920 |
+
size="lg",
|
921 |
+
scale=1
|
922 |
+
)
|
923 |
+
|
924 |
+
# TTS ์ค์ ์น์
|
925 |
+
with gr.Group(elem_classes="input-group"):
|
926 |
+
gr.Markdown("### ๐๏ธ Chatterbox TTS Settings")
|
927 |
+
|
928 |
+
with gr.Row():
|
929 |
+
with gr.Column(scale=2):
|
930 |
+
ref_audio = gr.Audio(
|
931 |
+
sources=["upload", "microphone"],
|
932 |
+
type="filepath",
|
933 |
+
label="Reference Audio File (Upload your voice)",
|
934 |
+
value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac",
|
935 |
+
info="Upload your voice sample for voice cloning"
|
936 |
+
)
|
937 |
+
|
938 |
+
with gr.Column(scale=1):
|
939 |
+
exaggeration = gr.Slider(
|
940 |
+
0.25, 2, step=.05,
|
941 |
+
label="Exaggeration (Neutral = 0.5)",
|
942 |
+
value=.5
|
943 |
+
)
|
944 |
+
cfg_weight = gr.Slider(
|
945 |
+
0.2, 1, step=.05,
|
946 |
+
label="CFG/Pace",
|
947 |
+
value=0.5
|
948 |
+
)
|
949 |
+
chunk_size = gr.Slider(
|
950 |
+
100, 300, step=50,
|
951 |
+
label="Chunk Size (characters)",
|
952 |
+
value=250,
|
953 |
+
info="Text chunking for long conversations"
|
954 |
+
)
|
955 |
+
|
956 |
+
with gr.Accordion("Advanced Options", open=False):
|
957 |
+
seed_num = gr.Number(value=0, label="Random seed (0 for random)")
|
958 |
+
temperature = gr.Slider(0.05, 5, step=.05, label="Temperature", value=.8)
|
959 |
+
|
960 |
+
# ์ถ๋ ฅ ์น์
|
961 |
+
with gr.Group(elem_classes="output-group"):
|
962 |
+
with gr.Row():
|
963 |
+
# ์ผ์ชฝ: ๋ํ ํ
์คํธ
|
964 |
+
with gr.Column(scale=3):
|
965 |
+
conversation_output = gr.Textbox(
|
966 |
+
label="๐ฌ Generated Professional Conversation (Editable)",
|
967 |
+
lines=25,
|
968 |
+
max_lines=50,
|
969 |
+
interactive=True,
|
970 |
+
placeholder="Professional podcast conversation will appear here...",
|
971 |
+
info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
|
972 |
+
)
|
973 |
+
|
974 |
+
# ํ
์คํธ ๊ธธ์ด ํ์
|
975 |
+
char_count = gr.Textbox(
|
976 |
+
label="Text Information",
|
977 |
+
value="0 characters",
|
978 |
+
interactive=False
|
979 |
+
)
|
980 |
+
|
981 |
+
# ์ค๋์ค ์์ฑ ๋ฒํผ
|
982 |
+
with gr.Row():
|
983 |
+
generate_audio_btn = gr.Button(
|
984 |
+
"๐๏ธ Generate Audio with Chatterbox TTS",
|
985 |
+
variant="secondary",
|
986 |
+
size="lg"
|
987 |
+
)
|
988 |
+
|
989 |
+
# ์ค๋ฅธ์ชฝ: ์ค๋์ค ์ถ๋ ฅ ๋ฐ ์ํ
|
990 |
+
with gr.Column(scale=2):
|
991 |
+
audio_output = gr.Audio(
|
992 |
+
label="๐ง Professional Podcast Audio",
|
993 |
+
type="filepath",
|
994 |
+
interactive=False
|
995 |
+
)
|
996 |
+
|
997 |
+
status_output = gr.Textbox(
|
998 |
+
label="๐ Status",
|
999 |
+
interactive=False,
|
1000 |
+
lines=3,
|
1001 |
+
elem_classes="status-box"
|
1002 |
+
)
|
1003 |
+
|
1004 |
+
# ๋์๋ง
|
1005 |
+
gr.Markdown("""
|
1006 |
+
#### ๐ก Quick Tips:
|
1007 |
+
- **URL**: Paste any article link
|
1008 |
+
- **PDF**: Upload documents directly
|
1009 |
+
- **Keyword**: Enter topics for AI research
|
1010 |
+
- **Voice Cloning**: Upload reference audio
|
1011 |
+
- Edit conversation before audio generation
|
1012 |
+
- Longer text automatically chunked
|
1013 |
+
""")
|
1014 |
+
|
1015 |
+
# ์์ ์น์
|
1016 |
+
with gr.Accordion("๐ Examples", open=False):
|
1017 |
+
gr.Examples(
|
1018 |
+
examples=[
|
1019 |
+
["https://huggingface.co/blog/openfreeai/cycle-navigator", "URL", "Local"],
|
1020 |
+
["quantum computing breakthroughs", "Keyword", "Local"],
|
1021 |
+
["https://huggingface.co/papers/2505.14810", "URL", "Local"],
|
1022 |
+
["artificial intelligence ethics", "Keyword", "Local"],
|
1023 |
+
],
|
1024 |
+
inputs=[url_input, input_type_selector, mode_selector],
|
1025 |
+
outputs=[conversation_output, status_output],
|
1026 |
+
fn=synthesize_sync,
|
1027 |
+
cache_examples=False,
|
1028 |
+
)
|
1029 |
+
|
1030 |
+
# Input type change handler
|
1031 |
+
input_type_selector.change(
|
1032 |
+
fn=toggle_input_visibility,
|
1033 |
+
inputs=[input_type_selector],
|
1034 |
+
outputs=[url_input, pdf_input, keyword_input]
|
1035 |
)
|
1036 |
|
1037 |
+
# ํ
์คํธ ์
๋ ฅ ์ ๋ฌธ์ ์ ์
๋ฐ์ดํธ
|
1038 |
+
conversation_output.change(
|
1039 |
+
fn=update_char_count,
|
1040 |
+
inputs=[conversation_output, chunk_size],
|
1041 |
+
outputs=[char_count]
|
1042 |
+
)
|
1043 |
+
|
1044 |
+
chunk_size.change(
|
1045 |
+
fn=update_char_count,
|
1046 |
+
inputs=[conversation_output, chunk_size],
|
1047 |
+
outputs=[char_count]
|
1048 |
+
)
|
1049 |
+
|
1050 |
+
# ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
1051 |
+
def get_article_input(input_type, url_input, pdf_input, keyword_input):
|
1052 |
+
"""Get the appropriate input based on input type"""
|
1053 |
+
if input_type == "URL":
|
1054 |
+
return url_input
|
1055 |
+
elif input_type == "PDF":
|
1056 |
+
return pdf_input
|
1057 |
+
else: # Keyword
|
1058 |
+
return keyword_input
|
1059 |
+
|
1060 |
+
convert_btn.click(
|
1061 |
+
fn=lambda input_type, url_input, pdf_input, keyword_input, mode: synthesize_sync(
|
1062 |
+
get_article_input(input_type, url_input, pdf_input, keyword_input), input_type, mode
|
1063 |
+
),
|
1064 |
+
inputs=[input_type_selector, url_input, pdf_input, keyword_input, mode_selector],
|
1065 |
+
outputs=[conversation_output, status_output]
|
1066 |
+
)
|
1067 |
+
|
1068 |
+
generate_audio_btn.click(
|
1069 |
+
fn=regenerate_audio_sync,
|
1070 |
+
inputs=[conversation_output, ref_audio, exaggeration, temperature, seed_num, cfg_weight, chunk_size],
|
1071 |
+
outputs=[status_output, audio_output]
|
1072 |
)
|
1073 |
|
|
|
|
|
1074 |
|
1075 |
+
# Launch the app
|
1076 |
+
if __name__ == "__main__":
|
1077 |
+
demo.queue(api_open=True, default_concurrency_limit=10).launch(
|
1078 |
+
show_api=True,
|
1079 |
+
share=False,
|
1080 |
+
server_name="0.0.0.0",
|
1081 |
+
server_port=7860
|
1082 |
+
)
|