Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,41 +9,43 @@ from typing import Optional
|
|
9 |
import asyncio
|
10 |
import time
|
11 |
import gc
|
|
|
12 |
|
13 |
# Inisialisasi FastAPI
|
14 |
-
app = FastAPI(title="LyonPoy AI Chat - CPU Optimized")
|
15 |
|
16 |
# Set seed untuk konsistensi
|
17 |
set_seed(42)
|
18 |
|
19 |
# CPU-Optimized 11 models configuration
|
|
|
20 |
MODELS = {
|
21 |
"distil-gpt-2": {
|
22 |
"name": "DistilGPT-2 β‘",
|
23 |
"model_path": "Lyon28/Distil_GPT-2",
|
24 |
"task": "text-generation",
|
25 |
-
"max_tokens":
|
26 |
-
"priority": 1
|
27 |
},
|
28 |
"gpt-2-tinny": {
|
29 |
"name": "GPT-2 Tinny β‘",
|
30 |
"model_path": "Lyon28/GPT-2-Tinny",
|
31 |
"task": "text-generation",
|
32 |
-
"max_tokens":
|
33 |
"priority": 1
|
34 |
},
|
35 |
"bert-tinny": {
|
36 |
"name": "BERT Tinny π",
|
37 |
"model_path": "Lyon28/Bert-Tinny",
|
38 |
"task": "text-classification",
|
39 |
-
"max_tokens": 0,
|
40 |
"priority": 1
|
41 |
},
|
42 |
"distilbert-base-uncased": {
|
43 |
"name": "DistilBERT π",
|
44 |
"model_path": "Lyon28/Distilbert-Base-Uncased",
|
45 |
"task": "text-classification",
|
46 |
-
"max_tokens": 0,
|
47 |
"priority": 1
|
48 |
},
|
49 |
"albert-base-v2": {
|
@@ -64,51 +66,57 @@ MODELS = {
|
|
64 |
"name": "T5 Small π",
|
65 |
"model_path": "Lyon28/T5-Small",
|
66 |
"task": "text2text-generation",
|
67 |
-
"max_tokens":
|
68 |
"priority": 2
|
69 |
},
|
70 |
"gpt-2": {
|
71 |
"name": "GPT-2 Standard",
|
72 |
"model_path": "Lyon28/GPT-2",
|
73 |
"task": "text-generation",
|
74 |
-
"max_tokens":
|
75 |
"priority": 2
|
76 |
},
|
77 |
"tinny-llama": {
|
78 |
"name": "Tinny Llama",
|
79 |
"model_path": "Lyon28/Tinny-Llama",
|
80 |
"task": "text-generation",
|
81 |
-
"max_tokens":
|
82 |
"priority": 3
|
83 |
},
|
84 |
"pythia": {
|
85 |
"name": "Pythia",
|
86 |
"model_path": "Lyon28/Pythia",
|
87 |
"task": "text-generation",
|
88 |
-
"max_tokens":
|
89 |
"priority": 3
|
90 |
},
|
91 |
"gpt-neo": {
|
92 |
"name": "GPT-Neo",
|
93 |
"model_path": "Lyon28/GPT-Neo",
|
94 |
"task": "text-generation",
|
95 |
-
"max_tokens":
|
96 |
"priority": 3
|
97 |
}
|
98 |
}
|
99 |
|
100 |
class ChatRequest(BaseModel):
|
101 |
-
message: str
|
102 |
model: Optional[str] = "distil-gpt-2"
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
# CPU-Optimized startup
|
105 |
@app.on_event("startup")
|
106 |
-
async def
|
107 |
app.state.pipelines = {}
|
108 |
-
app.state.tokenizers = {}
|
109 |
|
110 |
# Set CPU optimizations
|
111 |
-
torch.set_num_threads(2)
|
112 |
os.environ['OMP_NUM_THREADS'] = '2'
|
113 |
os.environ['MKL_NUM_THREADS'] = '2'
|
114 |
os.environ['NUMEXPR_NUM_THREADS'] = '2'
|
@@ -118,116 +126,143 @@ async def load_models():
|
|
118 |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
|
119 |
os.makedirs(os.environ['HF_HOME'], exist_ok=True)
|
120 |
|
121 |
-
print("π LyonPoy AI Chat - CPU Optimized Ready!")
|
122 |
|
123 |
# Lightweight frontend
|
124 |
@app.get("/", response_class=HTMLResponse)
|
125 |
async def get_frontend():
|
|
|
|
|
126 |
html_content = '''
|
127 |
<!DOCTYPE html>
|
128 |
<html lang="id">
|
129 |
<head>
|
130 |
<meta charset="UTF-8">
|
131 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
132 |
-
<title>LyonPoy AI Chat -
|
133 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
* { margin: 0; padding: 0; box-sizing: border-box; }
|
135 |
body {
|
136 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
137 |
-
background:
|
|
|
138 |
height: 100vh; display: flex; justify-content: center; align-items: center;
|
|
|
139 |
}
|
140 |
-
.
|
141 |
-
width:
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
143 |
}
|
144 |
-
.chat-header {
|
145 |
-
background:
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
.cpu-badge {
|
150 |
-
background: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 10px;
|
151 |
-
font-size: 10px; display: inline-block; margin-top: 3px;
|
152 |
}
|
|
|
153 |
.model-selector {
|
154 |
background: rgba(255,255,255,0.2); border: none; color: white;
|
155 |
-
padding: 6px 10px; border-radius: 15px; font-size:
|
156 |
-
|
157 |
}
|
158 |
-
.chat-messages {
|
159 |
-
flex: 1; padding:
|
160 |
display: flex; flex-direction: column; gap: 12px;
|
161 |
}
|
162 |
-
.message {
|
163 |
-
|
164 |
-
|
165 |
-
}
|
166 |
-
.message.user {
|
167 |
-
background: #00b4db; color: white; align-self: flex-end;
|
168 |
-
border-bottom-right-radius: 4px;
|
169 |
-
}
|
170 |
-
.message.bot {
|
171 |
-
background: white; color: #333; align-self: flex-start;
|
172 |
-
border-bottom-left-radius: 4px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
173 |
-
}
|
174 |
-
.message-time { font-size: 10px; opacity: 0.6; margin-top: 3px; }
|
175 |
-
.response-time {
|
176 |
-
font-size: 9px; color: #666; margin-top: 2px;
|
177 |
-
display: flex; align-items: center; gap: 3px;
|
178 |
}
|
179 |
-
.
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
}
|
182 |
-
.
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
padding: 15px; background: white; border-top: 1px solid #e5e7eb;
|
187 |
-
display: flex; gap: 8px; align-items: center;
|
188 |
}
|
189 |
-
.
|
190 |
-
|
191 |
-
|
|
|
|
|
192 |
}
|
193 |
-
.
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
}
|
199 |
-
.
|
200 |
-
.
|
201 |
-
|
202 |
-
|
203 |
-
padding:
|
|
|
|
|
204 |
}
|
205 |
-
.
|
206 |
-
|
207 |
-
|
|
|
|
|
208 |
}
|
209 |
-
.
|
210 |
-
|
211 |
-
|
|
|
|
|
212 |
}
|
213 |
-
.
|
214 |
-
.
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
}
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
223 |
}
|
|
|
|
|
224 |
</style>
|
225 |
</head>
|
226 |
<body>
|
227 |
-
<div class="
|
228 |
<div class="chat-header">
|
229 |
-
<h1
|
230 |
-
<div class="cpu-badge">CPU Optimized</div>
|
231 |
<select class="model-selector" id="modelSelect">
|
232 |
<option value="distil-gpt-2">π DistilGPT-2 (Fastest)</option>
|
233 |
<option value="gpt-2-tinny">π GPT-2 Tinny (Fast)</option>
|
@@ -244,145 +279,143 @@ async def get_frontend():
|
|
244 |
<div class="model-status" id="modelStatus">Ready to chat!</div>
|
245 |
</div>
|
246 |
<div class="chat-messages" id="chatMessages">
|
247 |
-
<div class="
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
251 |
</div>
|
252 |
</div>
|
253 |
-
<div class="typing-indicator" id="typingIndicator">
|
254 |
-
|
255 |
-
<
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
</div>
|
261 |
</div>
|
262 |
<script>
|
263 |
const chatMessages = document.getElementById('chatMessages');
|
264 |
-
const
|
|
|
|
|
265 |
const sendButton = document.getElementById('sendButton');
|
266 |
const modelSelect = document.getElementById('modelSelect');
|
267 |
const typingIndicator = document.getElementById('typingIndicator');
|
268 |
const modelStatus = document.getElementById('modelStatus');
|
269 |
|
270 |
-
// Production API Base
|
271 |
const API_BASE = window.location.origin;
|
272 |
|
273 |
-
function scrollToBottom() {
|
274 |
-
chatMessages.scrollTop = chatMessages.scrollHeight;
|
275 |
-
}
|
276 |
-
|
277 |
-
function getSpeedClass(time) {
|
278 |
-
if (time < 2000) return 'fast';
|
279 |
-
if (time < 5000) return 'medium';
|
280 |
-
return 'slow';
|
281 |
-
}
|
282 |
|
283 |
-
function addMessage(content, isUser = false,
|
|
|
|
|
|
|
284 |
const messageDiv = document.createElement('div');
|
285 |
-
messageDiv.className =
|
286 |
-
const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
${responseTime}ms
|
294 |
-
</div>`;
|
295 |
}
|
296 |
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
|
302 |
-
|
303 |
-
|
304 |
-
scrollToBottom();
|
305 |
-
}
|
306 |
-
|
307 |
-
function hideTyping() {
|
308 |
-
typingIndicator.style.display = 'none';
|
309 |
}
|
310 |
|
311 |
async function sendMessage() {
|
312 |
-
const
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
|
315 |
-
|
|
|
|
|
|
|
|
|
|
|
316 |
sendButton.disabled = true;
|
|
|
317 |
modelStatus.textContent = 'Processing...';
|
318 |
|
319 |
-
addMessage(message, true);
|
320 |
-
chatInput.value = '';
|
321 |
-
showTyping();
|
322 |
-
|
323 |
const startTime = Date.now();
|
324 |
|
325 |
try {
|
326 |
-
const response = await fetch('/chat', {
|
327 |
method: 'POST',
|
328 |
headers: { 'Content-Type': 'application/json' },
|
329 |
body: JSON.stringify({
|
330 |
-
message:
|
331 |
-
model: modelSelect.value
|
|
|
|
|
|
|
|
|
332 |
})
|
333 |
});
|
334 |
|
335 |
const data = await response.json();
|
336 |
const responseTime = Date.now() - startTime;
|
337 |
|
338 |
-
hideTyping();
|
339 |
-
modelStatus.textContent = 'Ready';
|
340 |
-
|
341 |
if (data.status === 'success') {
|
342 |
addMessage(data.response, false, responseTime);
|
343 |
} else {
|
344 |
-
addMessage('β οΈ Model
|
345 |
}
|
346 |
} catch (error) {
|
347 |
const responseTime = Date.now() - startTime;
|
348 |
-
|
349 |
-
modelStatus.textContent = 'Error';
|
350 |
-
addMessage('β Koneksi bermasalah, coba lagi.', false, responseTime);
|
351 |
console.error('Error:', error);
|
352 |
}
|
353 |
|
354 |
-
|
|
|
|
|
355 |
sendButton.disabled = false;
|
356 |
-
|
357 |
}
|
358 |
|
359 |
-
// Event listeners
|
360 |
sendButton.addEventListener('click', sendMessage);
|
361 |
-
|
362 |
-
if (e.key === 'Enter'
|
|
|
|
|
|
|
363 |
});
|
364 |
|
365 |
modelSelect.addEventListener('change', () => {
|
366 |
const selectedOption = modelSelect.options[modelSelect.selectedIndex];
|
367 |
-
|
368 |
-
modelStatus.textContent = `Model: ${modelName}`;
|
369 |
-
addMessage(`π Switched to: ${modelName}`);
|
370 |
});
|
371 |
|
372 |
-
// Auto-focus on load
|
373 |
window.addEventListener('load', () => {
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
// Character counter
|
379 |
-
chatInput.addEventListener('input', () => {
|
380 |
-
const remaining = 100 - chatInput.value.length;
|
381 |
-
if (remaining < 20) {
|
382 |
-
chatInput.style.borderColor = remaining < 10 ? '#ef4444' : '#f59e0b';
|
383 |
-
} else {
|
384 |
-
chatInput.style.borderColor = '#d1d5db';
|
385 |
-
}
|
386 |
});
|
387 |
</script>
|
388 |
</body>
|
@@ -398,192 +431,264 @@ async def chat(request: ChatRequest):
|
|
398 |
try:
|
399 |
model_id = request.model.lower()
|
400 |
if model_id not in MODELS:
|
401 |
-
model_id = "distil-gpt-2"
|
402 |
|
403 |
model_config = MODELS[model_id]
|
404 |
|
405 |
-
#
|
|
|
|
|
|
|
406 |
if model_id not in app.state.pipelines:
|
407 |
print(f"β‘ CPU Loading {model_config['name']}...")
|
408 |
|
409 |
-
# CPU-specific optimizations
|
410 |
pipeline_kwargs = {
|
411 |
"task": model_config["task"],
|
412 |
"model": model_config["model_path"],
|
413 |
-
"device": -1,
|
414 |
-
"torch_dtype": torch.float32,
|
415 |
"model_kwargs": {
|
416 |
"torchscript": False,
|
417 |
"low_cpu_mem_usage": True
|
418 |
}
|
419 |
}
|
420 |
-
|
|
|
|
|
421 |
app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
|
422 |
-
|
423 |
-
# Cleanup memory
|
424 |
gc.collect()
|
425 |
|
426 |
pipe = app.state.pipelines[model_id]
|
427 |
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
if model_config["task"] == "text-generation":
|
432 |
-
#
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
do_sample=True,
|
438 |
-
top_p=0.
|
439 |
-
pad_token_id=pipe.tokenizer.eos_token_id,
|
440 |
num_return_sequences=1,
|
441 |
-
early_stopping=True
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
if result.startswith(input_text):
|
446 |
-
result = result[len(input_text):].strip()
|
447 |
|
448 |
-
#
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
|
454 |
elif model_config["task"] == "text-classification":
|
455 |
-
|
|
|
|
|
456 |
confidence = f"{output['score']:.2f}"
|
457 |
-
|
458 |
|
459 |
elif model_config["task"] == "text2text-generation":
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
result = result[:97] + "..."
|
472 |
|
473 |
-
|
474 |
|
475 |
return {
|
476 |
-
"response":
|
477 |
"model": model_config["name"],
|
478 |
"status": "success",
|
479 |
-
"processing_time": f"{
|
480 |
}
|
481 |
|
482 |
except Exception as e:
|
483 |
print(f"β CPU Error: {e}")
|
484 |
-
|
|
|
|
|
485 |
|
486 |
-
# Fallback response
|
487 |
fallback_responses = [
|
488 |
-
"π Coba lagi dengan kata yang lebih
|
489 |
-
"π Hmm,
|
490 |
-
"β‘ Model sedang
|
491 |
-
"π
|
492 |
]
|
493 |
|
494 |
-
import random
|
495 |
fallback = random.choice(fallback_responses)
|
496 |
|
497 |
return {
|
498 |
-
"response": fallback,
|
499 |
"status": "error",
|
500 |
-
"
|
|
|
501 |
}
|
502 |
|
503 |
-
# Optimized inference endpoint untuk
|
|
|
|
|
504 |
@app.post("/inference")
|
505 |
async def inference(request: dict):
|
506 |
-
"""CPU-Optimized inference endpoint"""
|
507 |
try:
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
"
|
516 |
-
"gpt-2-tinny": "gpt-2-tinny",
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
|
|
|
|
|
|
526 |
}
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
# Quick processing
|
531 |
-
chat_request = ChatRequest(message=message, model=internal_model)
|
532 |
-
result = await chat(chat_request)
|
533 |
|
534 |
return {
|
535 |
-
"result": result
|
536 |
-
"status": "
|
537 |
-
"model_used": result
|
538 |
"processing_time": result.get("processing_time", "0ms")
|
539 |
}
|
540 |
|
541 |
except Exception as e:
|
542 |
print(f"β Inference Error: {e}")
|
543 |
return {
|
544 |
-
"result": "π
|
545 |
"status": "error"
|
546 |
}
|
547 |
|
548 |
# Lightweight health check
|
549 |
@app.get("/health")
|
550 |
async def health():
|
551 |
-
|
552 |
return {
|
553 |
"status": "healthy",
|
554 |
"platform": "CPU",
|
555 |
-
"loaded_models":
|
556 |
"total_models": len(MODELS),
|
557 |
-
"optimization": "CPU-Tuned"
|
558 |
}
|
559 |
|
560 |
# Model info endpoint
|
561 |
@app.get("/models")
|
562 |
-
async def
|
563 |
return {
|
564 |
"models": [
|
565 |
{
|
566 |
-
"id": k,
|
567 |
-
"
|
568 |
-
"task": v["task"],
|
569 |
-
"max_tokens": v["max_tokens"],
|
570 |
-
"priority": v["priority"],
|
571 |
"cpu_optimized": True
|
572 |
}
|
573 |
for k, v in MODELS.items()
|
574 |
],
|
575 |
"platform": "CPU",
|
576 |
-
"
|
577 |
}
|
578 |
|
579 |
# Run with CPU optimizations
|
580 |
if __name__ == "__main__":
|
581 |
port = int(os.environ.get("PORT", 7860))
|
|
|
|
|
|
|
582 |
uvicorn.run(
|
583 |
app,
|
584 |
host="0.0.0.0",
|
585 |
port=port,
|
586 |
-
workers=1,
|
587 |
-
timeout_keep_alive=30,
|
588 |
-
access_log=False
|
589 |
)
|
|
|
9 |
import asyncio
|
10 |
import time
|
11 |
import gc
|
12 |
+
import random # Ditambahkan untuk fallback
|
13 |
|
14 |
# Inisialisasi FastAPI
|
15 |
+
app = FastAPI(title="LyonPoy AI Chat - CPU Optimized (Prompt Mode)")
|
16 |
|
17 |
# Set seed untuk konsistensi
|
18 |
set_seed(42)
|
19 |
|
20 |
# CPU-Optimized 11 models configuration
|
21 |
+
# Menyesuaikan max_tokens untuk memberi ruang lebih bagi generasi setelah prompt
|
22 |
MODELS = {
|
23 |
"distil-gpt-2": {
|
24 |
"name": "DistilGPT-2 β‘",
|
25 |
"model_path": "Lyon28/Distil_GPT-2",
|
26 |
"task": "text-generation",
|
27 |
+
"max_tokens": 60, # Ditingkatkan
|
28 |
+
"priority": 1
|
29 |
},
|
30 |
"gpt-2-tinny": {
|
31 |
"name": "GPT-2 Tinny β‘",
|
32 |
"model_path": "Lyon28/GPT-2-Tinny",
|
33 |
"task": "text-generation",
|
34 |
+
"max_tokens": 50, # Ditingkatkan
|
35 |
"priority": 1
|
36 |
},
|
37 |
"bert-tinny": {
|
38 |
"name": "BERT Tinny π",
|
39 |
"model_path": "Lyon28/Bert-Tinny",
|
40 |
"task": "text-classification",
|
41 |
+
"max_tokens": 0, # Tidak relevan untuk klasifikasi
|
42 |
"priority": 1
|
43 |
},
|
44 |
"distilbert-base-uncased": {
|
45 |
"name": "DistilBERT π",
|
46 |
"model_path": "Lyon28/Distilbert-Base-Uncased",
|
47 |
"task": "text-classification",
|
48 |
+
"max_tokens": 0, # Tidak relevan untuk klasifikasi
|
49 |
"priority": 1
|
50 |
},
|
51 |
"albert-base-v2": {
|
|
|
66 |
"name": "T5 Small π",
|
67 |
"model_path": "Lyon28/T5-Small",
|
68 |
"task": "text2text-generation",
|
69 |
+
"max_tokens": 70, # Ditingkatkan
|
70 |
"priority": 2
|
71 |
},
|
72 |
"gpt-2": {
|
73 |
"name": "GPT-2 Standard",
|
74 |
"model_path": "Lyon28/GPT-2",
|
75 |
"task": "text-generation",
|
76 |
+
"max_tokens": 70, # Ditingkatkan
|
77 |
"priority": 2
|
78 |
},
|
79 |
"tinny-llama": {
|
80 |
"name": "Tinny Llama",
|
81 |
"model_path": "Lyon28/Tinny-Llama",
|
82 |
"task": "text-generation",
|
83 |
+
"max_tokens": 80, # Ditingkatkan
|
84 |
"priority": 3
|
85 |
},
|
86 |
"pythia": {
|
87 |
"name": "Pythia",
|
88 |
"model_path": "Lyon28/Pythia",
|
89 |
"task": "text-generation",
|
90 |
+
"max_tokens": 80, # Ditingkatkan
|
91 |
"priority": 3
|
92 |
},
|
93 |
"gpt-neo": {
|
94 |
"name": "GPT-Neo",
|
95 |
"model_path": "Lyon28/GPT-Neo",
|
96 |
"task": "text-generation",
|
97 |
+
"max_tokens": 90, # Ditingkatkan
|
98 |
"priority": 3
|
99 |
}
|
100 |
}
|
101 |
|
102 |
class ChatRequest(BaseModel):
|
103 |
+
message: str # Akan berisi prompt lengkap
|
104 |
model: Optional[str] = "distil-gpt-2"
|
105 |
+
# Tambahan field untuk prompt terstruktur jika diperlukan di Pydantic,
|
106 |
+
# tapi untuk saat ini kita akan parse dari 'message'
|
107 |
+
situasi: Optional[str] = ""
|
108 |
+
latar: Optional[str] = ""
|
109 |
+
user_message: str # Pesan pengguna aktual
|
110 |
+
|
111 |
|
112 |
# CPU-Optimized startup
|
113 |
@app.on_event("startup")
|
114 |
+
async def load_models_on_startup(): # Mengganti nama fungsi agar unik
|
115 |
app.state.pipelines = {}
|
116 |
+
app.state.tokenizers = {} # Meskipun tidak secara eksplisit digunakan, baik untuk dimiliki jika diperlukan
|
117 |
|
118 |
# Set CPU optimizations
|
119 |
+
torch.set_num_threads(2)
|
120 |
os.environ['OMP_NUM_THREADS'] = '2'
|
121 |
os.environ['MKL_NUM_THREADS'] = '2'
|
122 |
os.environ['NUMEXPR_NUM_THREADS'] = '2'
|
|
|
126 |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
|
127 |
os.makedirs(os.environ['HF_HOME'], exist_ok=True)
|
128 |
|
129 |
+
print("π LyonPoy AI Chat - CPU Optimized (Prompt Mode) Ready!")
|
130 |
|
131 |
# Lightweight frontend
|
132 |
@app.get("/", response_class=HTMLResponse)
|
133 |
async def get_frontend():
|
134 |
+
# Mengambil inspirasi styling dari styles.css dan layout dari chat.html
|
135 |
+
# Ini adalah versi yang SANGAT disederhanakan dan disematkan
|
136 |
html_content = '''
|
137 |
<!DOCTYPE html>
|
138 |
<html lang="id">
|
139 |
<head>
|
140 |
<meta charset="UTF-8">
|
141 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
142 |
+
<title>LyonPoy AI Chat - Prompt Mode</title>
|
143 |
<style>
|
144 |
+
:root {
|
145 |
+
--primary-color: #075E54; /* styles.css */
|
146 |
+
--bg-primary: #ffffff; /* styles.css */
|
147 |
+
--bg-secondary: #f8f9fa; /* styles.css */
|
148 |
+
--bg-accent: #DCF8C6; /* styles.css */
|
149 |
+
--text-primary: #212529; /* styles.css */
|
150 |
+
--text-white: #ffffff; /* styles.css */
|
151 |
+
--border-color: #dee2e6; /* styles.css */
|
152 |
+
--border-radius: 10px; /* styles.css */
|
153 |
+
--spacing-sm: 0.5rem;
|
154 |
+
--spacing-md: 1rem;
|
155 |
+
--shadow: 0 2px 5px rgba(0, 0, 0, 0.15); /* styles.css */
|
156 |
+
--font-size-base: 1rem;
|
157 |
+
--font-size-sm: 0.875rem;
|
158 |
+
--font-size-xs: 0.75rem;
|
159 |
+
}
|
160 |
* { margin: 0; padding: 0; box-sizing: border-box; }
|
161 |
body {
|
162 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
163 |
+
background-color: var(--bg-secondary); /* styles.css --bg-primary */
|
164 |
+
color: var(--text-primary); /* styles.css */
|
165 |
height: 100vh; display: flex; justify-content: center; align-items: center;
|
166 |
+
padding: var(--spacing-md);
|
167 |
}
|
168 |
+
.app-container { /* Mirip #app dari styles.css */
|
169 |
+
width: 100%;
|
170 |
+
max-width: 600px; /* Lebih lebar untuk input tambahan */
|
171 |
+
height: 95vh;
|
172 |
+
background: var(--bg-primary); /* styles.css */
|
173 |
+
border-radius: var(--border-radius); /* styles.css */
|
174 |
+
box-shadow: var(--shadow); /* styles.css */
|
175 |
+
display: flex; flex-direction: column; overflow: hidden;
|
176 |
}
|
177 |
+
.chat-header { /* Mirip .header.chat-header dari styles.css */
|
178 |
+
background: var(--primary-color); /* styles.css --secondary-color (untuk header chat) */
|
179 |
+
color: var(--text-white); /* styles.css */
|
180 |
+
padding: var(--spacing-md);
|
181 |
+
text-align: center;
|
|
|
|
|
|
|
182 |
}
|
183 |
+
.chat-header h1 { font-size: 1.2rem; font-weight: 600; margin-bottom: var(--spacing-sm); }
|
184 |
.model-selector {
|
185 |
background: rgba(255,255,255,0.2); border: none; color: white;
|
186 |
+
padding: 6px 10px; border-radius: 15px; font-size: 0.8rem; cursor: pointer;
|
187 |
+
width: 100%;
|
188 |
}
|
189 |
+
.chat-messages { /* Mirip .chat-messages dari styles.css */
|
190 |
+
flex: 1; padding: var(--spacing-md); overflow-y: auto; background: var(--bg-secondary); /* styles.css */
|
191 |
display: flex; flex-direction: column; gap: 12px;
|
192 |
}
|
193 |
+
.message-group { /* Mirip .message-group dari styles.css */
|
194 |
+
display: flex;
|
195 |
+
max-width: 75%; /* styles.css --message-max-width */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
}
|
197 |
+
.message-group.outgoing { align-self: flex-end; flex-direction: row-reverse; }
|
198 |
+
.message-group.incoming { align-self: flex-start; }
|
199 |
+
|
200 |
+
.message { /* Mirip .message dari styles.css */
|
201 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
202 |
+
border-radius: var(--border-radius); /* styles.css --message-border-radius */
|
203 |
+
font-size: var(--font-size-sm); /* styles.css --font-size-base (untuk pesan)*/
|
204 |
+
line-height: 1.4; word-wrap: break-word;
|
205 |
+
position: relative;
|
206 |
}
|
207 |
+
.message-group.outgoing .message {
|
208 |
+
background: var(--bg-accent); /* styles.css */
|
209 |
+
color: var(--text-primary);
|
210 |
+
margin-left: var(--spacing-md);
|
|
|
|
|
211 |
}
|
212 |
+
.message-group.incoming .message {
|
213 |
+
background: var(--bg-primary); /* styles.css */
|
214 |
+
color: var(--text-primary);
|
215 |
+
box-shadow: var(--shadow-sm); /* styles.css --shadow-sm */
|
216 |
+
margin-right: var(--spacing-md); /* Jika ada avatar */
|
217 |
}
|
218 |
+
.message-info { /* Mirip .message-info dari styles.css */
|
219 |
+
display: flex; justify-content: flex-end; align-items: center;
|
220 |
+
margin-top: var(--spacing-xs);
|
221 |
+
font-size: var(--font-size-xs); /* styles.css */
|
222 |
+
color: #6c757d; /* styles.css --text-muted */
|
223 |
}
|
224 |
+
.message-time { margin-right: var(--spacing-xs); }
|
225 |
+
.response-time-info { font-size: 9px; color: #666; margin-top: 2px; }
|
226 |
+
|
227 |
+
.input-area { /* Wadah untuk semua input */
|
228 |
+
padding: var(--spacing-md);
|
229 |
+
background: var(--bg-primary); /* styles.css */
|
230 |
+
border-top: 1px solid var(--border-color); /* styles.css */
|
231 |
}
|
232 |
+
.prompt-inputs { display: flex; gap: var(--spacing-sm); margin-bottom: var(--spacing-sm); }
|
233 |
+
.prompt-inputs input { flex: 1; }
|
234 |
+
|
235 |
+
.chat-input-container { /* Mirip .chat-input-container dari styles.css */
|
236 |
+
display: flex; gap: var(--spacing-sm); align-items: center;
|
237 |
}
|
238 |
+
.chat-input { /* Mirip textarea di .chat-input-field dari styles.css */
|
239 |
+
flex: 1; padding: var(--spacing-sm) var(--spacing-md);
|
240 |
+
border: 1px solid var(--border-color); /* styles.css */
|
241 |
+
border-radius: 20px; /* styles.css --border-radius-xl */
|
242 |
+
font-size: var(--font-size-sm); outline: none;
|
243 |
}
|
244 |
+
.chat-input:focus { border-color: var(--primary-color); }
|
245 |
+
.send-button { /* Mirip .send-btn dari styles.css */
|
246 |
+
background: var(--primary-color); color: var(--text-white); border: none;
|
247 |
+
border-radius: 50%; width: 40px; height: 40px; cursor: pointer;
|
248 |
+
display: flex; align-items: center; justify-content: center; font-size: 1.2rem;
|
249 |
}
|
250 |
+
.send-button:hover { filter: brightness(1.2); }
|
251 |
+
.send-button:disabled { background: #d1d5db; cursor: not-allowed; }
|
252 |
+
|
253 |
+
.typing-indicator-text {
|
254 |
+
font-style: italic; color: #6c757d; font-size: var(--font-size-sm);
|
255 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
256 |
+
text-align: center;
|
257 |
}
|
258 |
+
.model-status { font-size: 10px; color: rgba(255,255,255,0.8); margin-top: 3px; text-align: center; }
|
259 |
+
label { font-size: 0.9em; margin-bottom: 0.2em; display:block; }
|
260 |
</style>
|
261 |
</head>
|
262 |
<body>
|
263 |
+
<div class="app-container">
|
264 |
<div class="chat-header">
|
265 |
+
<h1>AI Character Prompt Mode</h1>
|
|
|
266 |
<select class="model-selector" id="modelSelect">
|
267 |
<option value="distil-gpt-2">π DistilGPT-2 (Fastest)</option>
|
268 |
<option value="gpt-2-tinny">π GPT-2 Tinny (Fast)</option>
|
|
|
279 |
<div class="model-status" id="modelStatus">Ready to chat!</div>
|
280 |
</div>
|
281 |
<div class="chat-messages" id="chatMessages">
|
282 |
+
<div class="message-group incoming">
|
283 |
+
<div class="message">
|
284 |
+
Hello! Atur Situasi, Latar, dan pesanmu di bawah. Lalu kirim!
|
285 |
+
<div class="message-info"><span class="message-time">${new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' })}</span></div>
|
286 |
+
</div>
|
287 |
</div>
|
288 |
</div>
|
289 |
+
<div class="typing-indicator-text" id="typingIndicator" style="display: none;">AI sedang berpikir...</div>
|
290 |
+
<div class="input-area">
|
291 |
+
<div class="prompt-inputs">
|
292 |
+
<div>
|
293 |
+
<label for="situasiInput">Situasi:</label>
|
294 |
+
<input type="text" class="chat-input" id="situasiInput" placeholder="Mis: Santai">
|
295 |
+
</div>
|
296 |
+
<div>
|
297 |
+
<label for="latarInput">Latar:</label>
|
298 |
+
<input type="text" class="chat-input" id="latarInput" placeholder="Mis: Tepi sungai">
|
299 |
+
</div>
|
300 |
+
</div>
|
301 |
+
<div class="chat-input-container">
|
302 |
+
<input type="text" class="chat-input" id="userMessageInput" placeholder="Ketik pesan sebagai {{User}}..." maxlength="150">
|
303 |
+
<button class="send-button" id="sendButton">β€</button>
|
304 |
+
</div>
|
305 |
</div>
|
306 |
</div>
|
307 |
<script>
|
308 |
const chatMessages = document.getElementById('chatMessages');
|
309 |
+
const situasiInput = document.getElementById('situasiInput');
|
310 |
+
const latarInput = document.getElementById('latarInput');
|
311 |
+
const userMessageInput = document.getElementById('userMessageInput');
|
312 |
const sendButton = document.getElementById('sendButton');
|
313 |
const modelSelect = document.getElementById('modelSelect');
|
314 |
const typingIndicator = document.getElementById('typingIndicator');
|
315 |
const modelStatus = document.getElementById('modelStatus');
|
316 |
|
|
|
317 |
const API_BASE = window.location.origin;
|
318 |
|
319 |
+
function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
+
function addMessage(content, isUser = false, responseTimeMs = null, fullPromptForUser = null) {
|
322 |
+
const messageGroupDiv = document.createElement('div');
|
323 |
+
messageGroupDiv.className = \`message-group \${isUser ? 'outgoing' : 'incoming'}\`;
|
324 |
+
|
325 |
const messageDiv = document.createElement('div');
|
326 |
+
messageDiv.className = 'message';
|
|
|
327 |
|
328 |
+
const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
|
329 |
+
let timeInfoHtml = \`<div class="message-info"><span class="message-time">\${time}</span></div>\`;
|
330 |
+
|
331 |
+
if (responseTimeMs !== null && !isUser) {
|
332 |
+
timeInfoHtml += \`<div class="response-time-info">\${responseTimeMs}ms</div>\`;
|
|
|
|
|
333 |
}
|
334 |
|
335 |
+
// Untuk pesan pengguna, kita tampilkan prompt lengkap atau hanya pesan user
|
336 |
+
// Saat ini, kita akan tampilkan pesan user saja untuk kebersihan, tapi prompt lengkap dikirim ke backend
|
337 |
+
const displayContent = isUser ? userMessageInput.value.trim() : content;
|
338 |
+
messageDiv.innerHTML = displayContent.replace(/\\n/g, '<br>') + timeInfoHtml;
|
339 |
|
340 |
+
messageGroupDiv.appendChild(messageDiv);
|
341 |
+
chatMessages.appendChild(messageGroupDiv);
|
342 |
+
scrollToBottom();
|
|
|
|
|
|
|
|
|
343 |
}
|
344 |
|
345 |
async function sendMessage() {
|
346 |
+
const situasi = situasiInput.value.trim();
|
347 |
+
const latar = latarInput.value.trim();
|
348 |
+
const userMsg = userMessageInput.value.trim();
|
349 |
+
|
350 |
+
if (!userMsg) {
|
351 |
+
alert("Pesan pengguna tidak boleh kosong!");
|
352 |
+
return;
|
353 |
+
}
|
354 |
|
355 |
+
const fullPrompt = \`Situasi: \${situasi}\\nLatar: \${latar}\\n{{User}}: \${userMsg}\\n{{Char}}:\`;
|
356 |
+
|
357 |
+
addMessage(userMsg, true, null, fullPrompt);
|
358 |
+
|
359 |
+
userMessageInput.value = ''; // Kosongkan input pesan user saja
|
360 |
+
userMessageInput.disabled = true;
|
361 |
sendButton.disabled = true;
|
362 |
+
typingIndicator.style.display = 'block';
|
363 |
modelStatus.textContent = 'Processing...';
|
364 |
|
|
|
|
|
|
|
|
|
365 |
const startTime = Date.now();
|
366 |
|
367 |
try {
|
368 |
+
const response = await fetch(API_BASE + '/chat', {
|
369 |
method: 'POST',
|
370 |
headers: { 'Content-Type': 'application/json' },
|
371 |
body: JSON.stringify({
|
372 |
+
message: fullPrompt, // Kirim prompt lengkap
|
373 |
+
model: modelSelect.value,
|
374 |
+
// Informasi tambahan jika ingin diproses backend secara terpisah
|
375 |
+
situasi: situasi,
|
376 |
+
latar: latar,
|
377 |
+
user_message: userMsg
|
378 |
})
|
379 |
});
|
380 |
|
381 |
const data = await response.json();
|
382 |
const responseTime = Date.now() - startTime;
|
383 |
|
|
|
|
|
|
|
384 |
if (data.status === 'success') {
|
385 |
addMessage(data.response, false, responseTime);
|
386 |
} else {
|
387 |
+
addMessage(data.response || 'β οΈ Model gagal merespon, coba lagi.', false, responseTime);
|
388 |
}
|
389 |
} catch (error) {
|
390 |
const responseTime = Date.now() - startTime;
|
391 |
+
addMessage('β Koneksi bermasalah atau error server.', false, responseTime);
|
|
|
|
|
392 |
console.error('Error:', error);
|
393 |
}
|
394 |
|
395 |
+
typingIndicator.style.display = 'none';
|
396 |
+
modelStatus.textContent = 'Ready';
|
397 |
+
userMessageInput.disabled = false;
|
398 |
sendButton.disabled = false;
|
399 |
+
userMessageInput.focus();
|
400 |
}
|
401 |
|
|
|
402 |
sendButton.addEventListener('click', sendMessage);
|
403 |
+
userMessageInput.addEventListener('keypress', (e) => {
|
404 |
+
if (e.key === 'Enter' && !e.shiftKey) {
|
405 |
+
e.preventDefault(); // Mencegah newline di input
|
406 |
+
sendMessage();
|
407 |
+
}
|
408 |
});
|
409 |
|
410 |
modelSelect.addEventListener('change', () => {
|
411 |
const selectedOption = modelSelect.options[modelSelect.selectedIndex];
|
412 |
+
modelStatus.textContent = \`Model: \${selectedOption.text}\`;
|
|
|
|
|
413 |
});
|
414 |
|
|
|
415 |
window.addEventListener('load', () => {
|
416 |
+
userMessageInput.focus();
|
417 |
+
const initialModelName = modelSelect.options[modelSelect.selectedIndex].text;
|
418 |
+
modelStatus.textContent = \`\${initialModelName} Ready\`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
});
|
420 |
</script>
|
421 |
</body>
|
|
|
431 |
try:
|
432 |
model_id = request.model.lower()
|
433 |
if model_id not in MODELS:
|
434 |
+
model_id = "distil-gpt-2"
|
435 |
|
436 |
model_config = MODELS[model_id]
|
437 |
|
438 |
+
# Pesan dari request sekarang adalah prompt yang sudah terstruktur
|
439 |
+
# contoh: "Situasi: Santai\nLatar:Tepi sungai\n{{User}}:sayang,danau nya indah ya, (memeluk {{char}} dari samping)\n{{Char}}:"
|
440 |
+
structured_prompt = request.message
|
441 |
+
|
442 |
if model_id not in app.state.pipelines:
|
443 |
print(f"β‘ CPU Loading {model_config['name']}...")
|
444 |
|
|
|
445 |
pipeline_kwargs = {
|
446 |
"task": model_config["task"],
|
447 |
"model": model_config["model_path"],
|
448 |
+
"device": -1,
|
449 |
+
"torch_dtype": torch.float32,
|
450 |
"model_kwargs": {
|
451 |
"torchscript": False,
|
452 |
"low_cpu_mem_usage": True
|
453 |
}
|
454 |
}
|
455 |
+
if model_config["task"] != "text-classification": # Tokenizer hanya untuk generator
|
456 |
+
app.state.tokenizers[model_id] = AutoTokenizer.from_pretrained(model_config["model_path"])
|
457 |
+
|
458 |
app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
|
|
|
|
|
459 |
gc.collect()
|
460 |
|
461 |
pipe = app.state.pipelines[model_id]
|
462 |
|
463 |
+
generated_text = "Output tidak didukung untuk task ini."
|
464 |
+
|
|
|
465 |
if model_config["task"] == "text-generation":
|
466 |
+
# Hitung panjang prompt dalam token
|
467 |
+
current_tokenizer = app.state.tokenizers.get(model_id)
|
468 |
+
if not current_tokenizer: # Fallback jika tokenizer tidak ada di state (seharusnya ada)
|
469 |
+
current_tokenizer = AutoTokenizer.from_pretrained(model_config["model_path"])
|
470 |
+
|
471 |
+
prompt_tokens = current_tokenizer.encode(structured_prompt, return_tensors="pt")
|
472 |
+
prompt_length_tokens = prompt_tokens.shape[1]
|
473 |
+
|
474 |
+
# max_length adalah total (prompt + generated). max_tokens adalah untuk generated.
|
475 |
+
# Pastikan max_length tidak melebihi kapasitas model (umumnya 512 atau 1024 untuk model kecil)
|
476 |
+
# dan juga tidak terlalu pendek.
|
477 |
+
# Beberapa model mungkin memiliki max_position_embeddings yang lebih kecil.
|
478 |
+
# Kita cap max_length ke sesuatu yang aman seperti 256 atau 512 jika terlalu besar.
|
479 |
+
# Model_config["max_tokens"] adalah max *new* tokens yang kita inginkan.
|
480 |
+
|
481 |
+
# Kita gunakan max_new_tokens langsung jika didukung oleh pipeline, atau atur max_length
|
482 |
+
# Untuk pipeline generik, max_length adalah yang utama.
|
483 |
+
# Max length harus lebih besar dari prompt.
|
484 |
+
# Max new tokens dari config model.
|
485 |
+
max_new_generated_tokens = model_config["max_tokens"]
|
486 |
+
max_len_for_generation = prompt_length_tokens + max_new_generated_tokens
|
487 |
+
|
488 |
+
# Batasi max_length total agar tidak terlalu besar untuk model kecil.
|
489 |
+
# Misalnya, GPT-2 memiliki konteks 1024. DistilGPT-2 juga.
|
490 |
+
# Model yang lebih kecil mungkin memiliki batas yang lebih rendah.
|
491 |
+
# Mari kita set batas atas yang aman, misal 512 untuk demo ini.
|
492 |
+
# Sesuaikan jika model spesifik Anda memiliki batas yang berbeda.
|
493 |
+
absolute_max_len = 512
|
494 |
+
if hasattr(pipe.model.config, 'max_position_embeddings'):
|
495 |
+
absolute_max_len = pipe.model.config.max_position_embeddings
|
496 |
+
|
497 |
+
max_len_for_generation = min(max_len_for_generation, absolute_max_len)
|
498 |
+
|
499 |
+
# Pastikan max_length setidaknya prompt + beberapa token baru
|
500 |
+
if max_len_for_generation <= prompt_length_tokens + 5 : # +5 token baru minimal
|
501 |
+
max_len_for_generation = prompt_length_tokens + 5
|
502 |
+
|
503 |
+
|
504 |
+
# Pastikan kita tidak meminta lebih banyak token baru daripada yang diizinkan oleh absolute_max_len
|
505 |
+
actual_max_new_tokens = max_len_for_generation - prompt_length_tokens
|
506 |
+
if actual_max_new_tokens <= 0: # Jika prompt sudah terlalu panjang
|
507 |
+
return {
|
508 |
+
"response": "Hmm, prompt terlalu panjang untuk model ini. Coba perpendek situasi/latar/pesan.",
|
509 |
+
"model": model_config["name"],
|
510 |
+
"status": "error_prompt_too_long",
|
511 |
+
"processing_time": f"{round((time.time() - start_time) * 1000)}ms"
|
512 |
+
}
|
513 |
+
|
514 |
+
outputs = pipe(
|
515 |
+
structured_prompt,
|
516 |
+
max_length=max_len_for_generation, # Total panjang
|
517 |
+
# max_new_tokens=actual_max_new_tokens, # Lebih disukai jika pipeline mendukungnya secara eksplisit
|
518 |
+
temperature=0.75, # Sedikit lebih kreatif
|
519 |
do_sample=True,
|
520 |
+
top_p=0.9, # Memperluas sampling sedikit
|
521 |
+
pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256, # 50256 untuk GPT2
|
522 |
num_return_sequences=1,
|
523 |
+
early_stopping=True,
|
524 |
+
truncation=True # Penting jika prompt terlalu panjang untuk model
|
525 |
+
)
|
526 |
+
generated_text = outputs[0]['generated_text']
|
|
|
|
|
527 |
|
528 |
+
# Cleanup: ekstrak hanya teks setelah prompt "{{Char}}:"
|
529 |
+
char_marker = "{{Char}}:"
|
530 |
+
if char_marker in generated_text:
|
531 |
+
generated_text = generated_text.split(char_marker, 1)[-1].strip()
|
532 |
+
elif generated_text.startswith(structured_prompt): # fallback jika marker tidak ada
|
533 |
+
generated_text = generated_text[len(structured_prompt):].strip()
|
534 |
+
|
535 |
+
# Hapus jika model mengulang bagian prompt user
|
536 |
+
if request.user_message and generated_text.startswith(request.user_message):
|
537 |
+
generated_text = generated_text[len(request.user_message):].strip()
|
538 |
+
|
539 |
+
# Batasi ke beberapa kalimat atau panjang tertentu untuk kecepatan & relevansi
|
540 |
+
# Ini bisa lebih fleksibel
|
541 |
+
sentences = generated_text.split('.')
|
542 |
+
if len(sentences) > 2: # Ambil 2 kalimat pertama jika ada
|
543 |
+
generated_text = sentences[0].strip() + ('.' if sentences[0] else '') + \
|
544 |
+
(sentences[1].strip() + '.' if len(sentences) > 1 and sentences[1] else '')
|
545 |
+
elif len(generated_text) > 150: # Batas karakter kasar
|
546 |
+
generated_text = generated_text[:147] + '...'
|
547 |
|
548 |
elif model_config["task"] == "text-classification":
|
549 |
+
# Untuk klasifikasi, kita gunakan pesan pengguna aktual, bukan prompt terstruktur
|
550 |
+
user_msg_for_classification = request.user_message if request.user_message else structured_prompt
|
551 |
+
output = pipe(user_msg_for_classification[:256], truncation=True, max_length=256)[0] # Batasi input
|
552 |
confidence = f"{output['score']:.2f}"
|
553 |
+
generated_text = f"π Klasifikasi pesan '{user_msg_for_classification[:30]}...': {output['label']} (Skor: {confidence})"
|
554 |
|
555 |
elif model_config["task"] == "text2text-generation":
|
556 |
+
# T5 dan model serupa mungkin memerlukan format input yang sedikit berbeda,
|
557 |
+
# tapi untuk demo ini kita coba kirim prompt apa adanya.
|
558 |
+
# Anda mungkin perlu menambahkan prefix task seperti "translate English to German: " untuk T5
|
559 |
+
# Untuk chat, kita bisa biarkan apa adanya atau gunakan user_message.
|
560 |
+
user_msg_for_t2t = request.user_message if request.user_message else structured_prompt
|
561 |
+
outputs = pipe(
|
562 |
+
user_msg_for_t2t[:256], # Batasi input untuk T5
|
563 |
+
max_length=model_config["max_tokens"], # Ini adalah max_length untuk output T5
|
564 |
+
temperature=0.65,
|
565 |
+
early_stopping=True,
|
566 |
+
truncation=True
|
567 |
+
)
|
568 |
+
generated_text = outputs[0]['generated_text']
|
569 |
|
570 |
+
if not generated_text or len(generated_text.strip()) < 1:
|
571 |
+
generated_text = "π€ Hmm, saya tidak yakin bagaimana merespon. Coba lagi dengan prompt berbeda?"
|
572 |
+
elif len(generated_text) > 250: # Batas akhir output
|
573 |
+
generated_text = generated_text[:247] + "..."
|
|
|
574 |
|
575 |
+
processing_time_ms = round((time.time() - start_time) * 1000)
|
576 |
|
577 |
return {
|
578 |
+
"response": generated_text,
|
579 |
"model": model_config["name"],
|
580 |
"status": "success",
|
581 |
+
"processing_time": f"{processing_time_ms}ms"
|
582 |
}
|
583 |
|
584 |
except Exception as e:
|
585 |
print(f"β CPU Error: {e}")
|
586 |
+
import traceback
|
587 |
+
traceback.print_exc() # Print full traceback for debugging
|
588 |
+
processing_time_ms = round((time.time() - start_time) * 1000)
|
589 |
|
|
|
590 |
fallback_responses = [
|
591 |
+
"π Maaf, ada sedikit gangguan. Coba lagi dengan kata yang lebih simpel?",
|
592 |
+
"π Hmm, sepertinya saya butuh istirahat sejenak. Mungkin pertanyaan lain?",
|
593 |
+
"β‘ Model sedang dioptimalkan, tunggu sebentar dan coba lagi...",
|
594 |
+
"π Mungkin coba model lain yang lebih cepat atau prompt yang berbeda?"
|
595 |
]
|
596 |
|
|
|
597 |
fallback = random.choice(fallback_responses)
|
598 |
|
599 |
return {
|
600 |
+
"response": f"{fallback} (Error: {str(e)[:100]})", # Beri sedikit info error
|
601 |
"status": "error",
|
602 |
+
"model": MODELS.get(model_id, {"name": "Unknown"})["name"] if 'model_id' in locals() else "Unknown",
|
603 |
+
"processing_time": f"{processing_time_ms}ms"
|
604 |
}
|
605 |
|
606 |
+
# Optimized inference endpoint (TIDAK DIPERBARUI SECARA RINCI untuk prompt mode baru,
|
607 |
+
# karena fokus utama adalah pada /chat dan frontendnya. Jika /inference juga perlu prompt mode,
|
608 |
+
# ia harus mengkonstruksi ChatRequest serupa.)
|
609 |
@app.post("/inference")
|
610 |
async def inference(request: dict):
|
611 |
+
"""CPU-Optimized inference endpoint - MUNGKIN PERLU PENYESUAIAN UNTUK PROMPT MODE"""
|
612 |
try:
|
613 |
+
# Untuk prompt mode, 'message' harus menjadi prompt terstruktur lengkap
|
614 |
+
# Atau endpoint ini harus diubah untuk menerima 'situasi', 'latar', 'user_message'
|
615 |
+
message = request.get("message", "")
|
616 |
+
model_id_from_request = request.get("model", "distil-gpt-2") # Harusnya model_id internal
|
617 |
+
|
618 |
+
# Jika yang diberikan adalah model path, coba map ke model_id internal
|
619 |
+
if "/" in model_id_from_request:
|
620 |
+
model_key_from_path = model_id_from_request.split("/")[-1].lower()
|
621 |
+
model_mapping = { "distil_gpt-2": "distil-gpt-2", "gpt-2-tinny": "gpt-2-tinny", /* ... (tambahkan semua mapping) ... */ }
|
622 |
+
internal_model = model_mapping.get(model_key_from_path, "distil-gpt-2")
|
623 |
+
else: # Asumsikan sudah model_id internal
|
624 |
+
internal_model = model_id_from_request
|
625 |
+
|
626 |
+
# Jika /inference perlu mendukung prompt mode, data yang dikirim ke ChatRequest harus disesuaikan
|
627 |
+
# Untuk contoh ini, kita asumsikan 'message' adalah user_message saja untuk /inference
|
628 |
+
# dan situasi/latar default atau tidak digunakan.
|
629 |
+
# Ini adalah penyederhanaan dan mungkin perlu diubah sesuai kebutuhan.
|
630 |
+
chat_req_data = {
|
631 |
+
"message": f"{{User}}: {message}\n{{Char}}:", # Bentuk prompt paling sederhana
|
632 |
+
"model": internal_model,
|
633 |
+
"user_message": message # Simpan pesan user asli
|
634 |
}
|
635 |
+
|
636 |
+
chat_request_obj = ChatRequest(**chat_req_data)
|
637 |
+
result = await chat(chat_request_obj)
|
|
|
|
|
|
|
638 |
|
639 |
return {
|
640 |
+
"result": result.get("response"),
|
641 |
+
"status": result.get("status"),
|
642 |
+
"model_used": result.get("model"),
|
643 |
"processing_time": result.get("processing_time", "0ms")
|
644 |
}
|
645 |
|
646 |
except Exception as e:
|
647 |
print(f"β Inference Error: {e}")
|
648 |
return {
|
649 |
+
"result": "π Terjadi kesalahan pada endpoint inference. Coba lagi...",
|
650 |
"status": "error"
|
651 |
}
|
652 |
|
653 |
# Lightweight health check
|
654 |
@app.get("/health")
|
655 |
async def health():
|
656 |
+
loaded_models_count = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0
|
657 |
return {
|
658 |
"status": "healthy",
|
659 |
"platform": "CPU",
|
660 |
+
"loaded_models": loaded_models_count,
|
661 |
"total_models": len(MODELS),
|
662 |
+
"optimization": "CPU-Tuned (Prompt Mode)"
|
663 |
}
|
664 |
|
665 |
# Model info endpoint
|
666 |
@app.get("/models")
|
667 |
+
async def get_models_info(): # Mengganti nama fungsi
|
668 |
return {
|
669 |
"models": [
|
670 |
{
|
671 |
+
"id": k, "name": v["name"], "task": v["task"],
|
672 |
+
"max_tokens_generate": v["max_tokens"], "priority": v["priority"],
|
|
|
|
|
|
|
673 |
"cpu_optimized": True
|
674 |
}
|
675 |
for k, v in MODELS.items()
|
676 |
],
|
677 |
"platform": "CPU",
|
678 |
+
"recommended_for_prompting": ["distil-gpt-2", "gpt-2-tinny", "tinny-llama", "gpt-neo", "pythia", "gpt-2"]
|
679 |
}
|
680 |
|
681 |
# Run with CPU optimizations
|
682 |
if __name__ == "__main__":
|
683 |
port = int(os.environ.get("PORT", 7860))
|
684 |
+
# Gunakan reload=True untuk pengembangan agar perubahan kode langsung terlihat
|
685 |
+
# Matikan reload untuk produksi
|
686 |
+
# uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, reload=True)
|
687 |
uvicorn.run(
|
688 |
app,
|
689 |
host="0.0.0.0",
|
690 |
port=port,
|
691 |
+
workers=1,
|
692 |
+
timeout_keep_alive=30, # Default FastAPI 5 detik, mungkin terlalu pendek untuk loading model
|
693 |
+
access_log=False
|
694 |
)
|