Lyon28 commited on
Commit
2c15096
Β·
verified Β·
1 Parent(s): 0db9e1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +371 -266
app.py CHANGED
@@ -9,41 +9,43 @@ from typing import Optional
9
  import asyncio
10
  import time
11
  import gc
 
12
 
13
  # Inisialisasi FastAPI
14
- app = FastAPI(title="LyonPoy AI Chat - CPU Optimized")
15
 
16
  # Set seed untuk konsistensi
17
  set_seed(42)
18
 
19
  # CPU-Optimized 11 models configuration
 
20
  MODELS = {
21
  "distil-gpt-2": {
22
  "name": "DistilGPT-2 ⚑",
23
  "model_path": "Lyon28/Distil_GPT-2",
24
  "task": "text-generation",
25
- "max_tokens": 20,
26
- "priority": 1 # Tercepat
27
  },
28
  "gpt-2-tinny": {
29
  "name": "GPT-2 Tinny ⚑",
30
  "model_path": "Lyon28/GPT-2-Tinny",
31
  "task": "text-generation",
32
- "max_tokens": 18,
33
  "priority": 1
34
  },
35
  "bert-tinny": {
36
  "name": "BERT Tinny πŸ“Š",
37
  "model_path": "Lyon28/Bert-Tinny",
38
  "task": "text-classification",
39
- "max_tokens": 0,
40
  "priority": 1
41
  },
42
  "distilbert-base-uncased": {
43
  "name": "DistilBERT πŸ“Š",
44
  "model_path": "Lyon28/Distilbert-Base-Uncased",
45
  "task": "text-classification",
46
- "max_tokens": 0,
47
  "priority": 1
48
  },
49
  "albert-base-v2": {
@@ -64,51 +66,57 @@ MODELS = {
64
  "name": "T5 Small πŸ”„",
65
  "model_path": "Lyon28/T5-Small",
66
  "task": "text2text-generation",
67
- "max_tokens": 25,
68
  "priority": 2
69
  },
70
  "gpt-2": {
71
  "name": "GPT-2 Standard",
72
  "model_path": "Lyon28/GPT-2",
73
  "task": "text-generation",
74
- "max_tokens": 22,
75
  "priority": 2
76
  },
77
  "tinny-llama": {
78
  "name": "Tinny Llama",
79
  "model_path": "Lyon28/Tinny-Llama",
80
  "task": "text-generation",
81
- "max_tokens": 25,
82
  "priority": 3
83
  },
84
  "pythia": {
85
  "name": "Pythia",
86
  "model_path": "Lyon28/Pythia",
87
  "task": "text-generation",
88
- "max_tokens": 25,
89
  "priority": 3
90
  },
91
  "gpt-neo": {
92
  "name": "GPT-Neo",
93
  "model_path": "Lyon28/GPT-Neo",
94
  "task": "text-generation",
95
- "max_tokens": 30,
96
  "priority": 3
97
  }
98
  }
99
 
100
  class ChatRequest(BaseModel):
101
- message: str
102
  model: Optional[str] = "distil-gpt-2"
 
 
 
 
 
 
103
 
104
  # CPU-Optimized startup
105
  @app.on_event("startup")
106
- async def load_models():
107
  app.state.pipelines = {}
108
- app.state.tokenizers = {}
109
 
110
  # Set CPU optimizations
111
- torch.set_num_threads(2) # Limit threads untuk Hugging Face
112
  os.environ['OMP_NUM_THREADS'] = '2'
113
  os.environ['MKL_NUM_THREADS'] = '2'
114
  os.environ['NUMEXPR_NUM_THREADS'] = '2'
@@ -118,116 +126,143 @@ async def load_models():
118
  os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
119
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
120
 
121
- print("πŸš€ LyonPoy AI Chat - CPU Optimized Ready!")
122
 
123
  # Lightweight frontend
124
  @app.get("/", response_class=HTMLResponse)
125
  async def get_frontend():
 
 
126
  html_content = '''
127
  <!DOCTYPE html>
128
  <html lang="id">
129
  <head>
130
  <meta charset="UTF-8">
131
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
132
- <title>LyonPoy AI Chat - CPU Fast</title>
133
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  * { margin: 0; padding: 0; box-sizing: border-box; }
135
  body {
136
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
137
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
 
138
  height: 100vh; display: flex; justify-content: center; align-items: center;
 
139
  }
140
- .chat-container {
141
- width: 400px; height: 600px; background: #fff; border-radius: 15px;
142
- box-shadow: 0 15px 35px rgba(0,0,0,0.1); display: flex; flex-direction: column; overflow: hidden;
 
 
 
 
 
143
  }
144
- .chat-header {
145
- background: linear-gradient(135deg, #00b4db, #0083b0); color: white;
146
- padding: 15px; text-align: center;
147
- }
148
- .chat-header h1 { font-size: 16px; font-weight: 600; margin-bottom: 5px; }
149
- .cpu-badge {
150
- background: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 10px;
151
- font-size: 10px; display: inline-block; margin-top: 3px;
152
  }
 
153
  .model-selector {
154
  background: rgba(255,255,255,0.2); border: none; color: white;
155
- padding: 6px 10px; border-radius: 15px; font-size: 11px; cursor: pointer;
156
- margin-top: 8px; width: 100%;
157
  }
158
- .chat-messages {
159
- flex: 1; padding: 15px; overflow-y: auto; background: #f8f9fa;
160
  display: flex; flex-direction: column; gap: 12px;
161
  }
162
- .message {
163
- max-width: 85%; padding: 10px 14px; border-radius: 12px;
164
- font-size: 13px; line-height: 1.3; word-wrap: break-word;
165
- }
166
- .message.user {
167
- background: #00b4db; color: white; align-self: flex-end;
168
- border-bottom-right-radius: 4px;
169
- }
170
- .message.bot {
171
- background: white; color: #333; align-self: flex-start;
172
- border-bottom-left-radius: 4px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
173
- }
174
- .message-time { font-size: 10px; opacity: 0.6; margin-top: 3px; }
175
- .response-time {
176
- font-size: 9px; color: #666; margin-top: 2px;
177
- display: flex; align-items: center; gap: 3px;
178
  }
179
- .speed-indicator {
180
- width: 6px; height: 6px; border-radius: 50%; display: inline-block;
 
 
 
 
 
 
 
181
  }
182
- .fast { background: #22c55e; }
183
- .medium { background: #f59e0b; }
184
- .slow { background: #ef4444; }
185
- .chat-input-container {
186
- padding: 15px; background: white; border-top: 1px solid #e5e7eb;
187
- display: flex; gap: 8px; align-items: center;
188
  }
189
- .chat-input {
190
- flex: 1; padding: 10px 14px; border: 1px solid #d1d5db;
191
- border-radius: 20px; font-size: 13px; outline: none;
 
 
192
  }
193
- .chat-input:focus { border-color: #00b4db; }
194
- .send-button {
195
- background: #00b4db; color: white; border: none; border-radius: 50%;
196
- width: 40px; height: 40px; cursor: pointer; display: flex;
197
- align-items: center; justify-content: center; font-size: 16px;
198
  }
199
- .send-button:hover { background: #0083b0; }
200
- .send-button:disabled { background: #d1d5db; cursor: not-allowed; }
201
- .welcome-message {
202
- text-align: center; color: #6b7280; font-size: 12px;
203
- padding: 15px; border-radius: 8px; background: rgba(255,255,255,0.8);
 
 
204
  }
205
- .typing-indicator {
206
- display: none; align-items: center; gap: 4px; padding: 10px 14px;
207
- background: white; border-radius: 12px; align-self: flex-start;
 
 
208
  }
209
- .typing-dot {
210
- width: 6px; height: 6px; background: #9ca3af; border-radius: 50%;
211
- animation: typing 1.2s infinite;
 
 
212
  }
213
- .typing-dot:nth-child(2) { animation-delay: 0.15s; }
214
- .typing-dot:nth-child(3) { animation-delay: 0.3s; }
215
- @keyframes typing { 0%, 60%, 100% { opacity: 0.3; } 30% { opacity: 1; } }
216
- .model-status {
217
- font-size: 10px; color: rgba(255,255,255,0.8); margin-top: 3px;
218
  }
219
- @media (max-width: 480px) {
220
- .chat-container { width: 100vw; height: 100vh; border-radius: 0; }
221
- .chat-header { padding: 12px; }
222
- .chat-messages { padding: 12px; }
 
 
 
223
  }
 
 
224
  </style>
225
  </head>
226
  <body>
227
- <div class="chat-container">
228
  <div class="chat-header">
229
- <h1>⚑ LyonPoy AI Chat</h1>
230
- <div class="cpu-badge">CPU Optimized</div>
231
  <select class="model-selector" id="modelSelect">
232
  <option value="distil-gpt-2">πŸš€ DistilGPT-2 (Fastest)</option>
233
  <option value="gpt-2-tinny">πŸš€ GPT-2 Tinny (Fast)</option>
@@ -244,145 +279,143 @@ async def get_frontend():
244
  <div class="model-status" id="modelStatus">Ready to chat!</div>
245
  </div>
246
  <div class="chat-messages" id="chatMessages">
247
- <div class="welcome-message">
248
- πŸš€ <strong>CPU-Optimized AI Chat</strong><br>
249
- Models dioptimalkan untuk kecepatan di CPU<br>
250
- Pilih model dan mulai chat!
 
251
  </div>
252
  </div>
253
- <div class="typing-indicator" id="typingIndicator">
254
- <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
255
- <span style="font-size: 11px; color: #6b7280; margin-left: 5px;">AI sedang berpikir...</span>
256
- </div>
257
- <div class="chat-input-container">
258
- <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan singkat (max 100 karakter)..." maxlength="100">
259
- <button class="send-button" id="sendButton">➀</button>
 
 
 
 
 
 
 
 
 
260
  </div>
261
  </div>
262
  <script>
263
  const chatMessages = document.getElementById('chatMessages');
264
- const chatInput = document.getElementById('chatInput');
 
 
265
  const sendButton = document.getElementById('sendButton');
266
  const modelSelect = document.getElementById('modelSelect');
267
  const typingIndicator = document.getElementById('typingIndicator');
268
  const modelStatus = document.getElementById('modelStatus');
269
 
270
- // Production API Base
271
  const API_BASE = window.location.origin;
272
 
273
- function scrollToBottom() {
274
- chatMessages.scrollTop = chatMessages.scrollHeight;
275
- }
276
-
277
- function getSpeedClass(time) {
278
- if (time < 2000) return 'fast';
279
- if (time < 5000) return 'medium';
280
- return 'slow';
281
- }
282
 
283
- function addMessage(content, isUser = false, responseTime = null) {
 
 
 
284
  const messageDiv = document.createElement('div');
285
- messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
286
- const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
287
 
288
- let timeInfo = `<div class="message-time">${time}</div>`;
289
- if (responseTime && !isUser) {
290
- const speedClass = getSpeedClass(responseTime);
291
- timeInfo += `<div class="response-time">
292
- <span class="speed-indicator ${speedClass}"></span>
293
- ${responseTime}ms
294
- </div>`;
295
  }
296
 
297
- messageDiv.innerHTML = `${content}${timeInfo}`;
298
- chatMessages.appendChild(messageDiv);
299
- scrollToBottom();
300
- }
301
 
302
- function showTyping() {
303
- typingIndicator.style.display = 'flex';
304
- scrollToBottom();
305
- }
306
-
307
- function hideTyping() {
308
- typingIndicator.style.display = 'none';
309
  }
310
 
311
  async function sendMessage() {
312
- const message = chatInput.value.trim();
313
- if (!message) return;
 
 
 
 
 
 
314
 
315
- chatInput.disabled = true;
 
 
 
 
 
316
  sendButton.disabled = true;
 
317
  modelStatus.textContent = 'Processing...';
318
 
319
- addMessage(message, true);
320
- chatInput.value = '';
321
- showTyping();
322
-
323
  const startTime = Date.now();
324
 
325
  try {
326
- const response = await fetch('/chat', {
327
  method: 'POST',
328
  headers: { 'Content-Type': 'application/json' },
329
  body: JSON.stringify({
330
- message: message,
331
- model: modelSelect.value
 
 
 
 
332
  })
333
  });
334
 
335
  const data = await response.json();
336
  const responseTime = Date.now() - startTime;
337
 
338
- hideTyping();
339
- modelStatus.textContent = 'Ready';
340
-
341
  if (data.status === 'success') {
342
  addMessage(data.response, false, responseTime);
343
  } else {
344
- addMessage('⚠️ Model sedang loading, tunggu sebentar...', false, responseTime);
345
  }
346
  } catch (error) {
347
  const responseTime = Date.now() - startTime;
348
- hideTyping();
349
- modelStatus.textContent = 'Error';
350
- addMessage('❌ Koneksi bermasalah, coba lagi.', false, responseTime);
351
  console.error('Error:', error);
352
  }
353
 
354
- chatInput.disabled = false;
 
 
355
  sendButton.disabled = false;
356
- chatInput.focus();
357
  }
358
 
359
- // Event listeners
360
  sendButton.addEventListener('click', sendMessage);
361
- chatInput.addEventListener('keypress', (e) => {
362
- if (e.key === 'Enter') sendMessage();
 
 
 
363
  });
364
 
365
  modelSelect.addEventListener('change', () => {
366
  const selectedOption = modelSelect.options[modelSelect.selectedIndex];
367
- const modelName = selectedOption.text;
368
- modelStatus.textContent = `Model: ${modelName}`;
369
- addMessage(`πŸ”„ Switched to: ${modelName}`);
370
  });
371
 
372
- // Auto-focus on load
373
  window.addEventListener('load', () => {
374
- chatInput.focus();
375
- modelStatus.textContent = 'DistilGPT-2 Ready (Fastest)';
376
- });
377
-
378
- // Character counter
379
- chatInput.addEventListener('input', () => {
380
- const remaining = 100 - chatInput.value.length;
381
- if (remaining < 20) {
382
- chatInput.style.borderColor = remaining < 10 ? '#ef4444' : '#f59e0b';
383
- } else {
384
- chatInput.style.borderColor = '#d1d5db';
385
- }
386
  });
387
  </script>
388
  </body>
@@ -398,192 +431,264 @@ async def chat(request: ChatRequest):
398
  try:
399
  model_id = request.model.lower()
400
  if model_id not in MODELS:
401
- model_id = "distil-gpt-2" # Default ke model tercepat
402
 
403
  model_config = MODELS[model_id]
404
 
405
- # Lazy loading dengan optimasi CPU
 
 
 
406
  if model_id not in app.state.pipelines:
407
  print(f"⚑ CPU Loading {model_config['name']}...")
408
 
409
- # CPU-specific optimizations
410
  pipeline_kwargs = {
411
  "task": model_config["task"],
412
  "model": model_config["model_path"],
413
- "device": -1, # Force CPU
414
- "torch_dtype": torch.float32, # CPU works best with float32
415
  "model_kwargs": {
416
  "torchscript": False,
417
  "low_cpu_mem_usage": True
418
  }
419
  }
420
-
 
 
421
  app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
422
-
423
- # Cleanup memory
424
  gc.collect()
425
 
426
  pipe = app.state.pipelines[model_id]
427
 
428
- # Ultra-fast processing dengan parameter minimal
429
- input_text = request.message[:80] # Limit input untuk CPU
430
-
431
  if model_config["task"] == "text-generation":
432
- # Minimal parameters untuk CPU speed
433
- result = pipe(
434
- input_text,
435
- max_length=min(len(input_text.split()) + model_config["max_tokens"], 60),
436
- temperature=0.7,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  do_sample=True,
438
- top_p=0.85,
439
- pad_token_id=pipe.tokenizer.eos_token_id,
440
  num_return_sequences=1,
441
- early_stopping=True
442
- )[0]['generated_text']
443
-
444
- # Quick cleanup
445
- if result.startswith(input_text):
446
- result = result[len(input_text):].strip()
447
 
448
- # Limit to 1 sentence untuk speed
449
- if '.' in result:
450
- result = result.split('.')[0] + '.'
451
- elif len(result) > 80:
452
- result = result[:77] + '...'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
  elif model_config["task"] == "text-classification":
455
- output = pipe(input_text, truncation=True, max_length=128)[0]
 
 
456
  confidence = f"{output['score']:.2f}"
457
- result = f"πŸ“Š {output['label']} ({confidence})"
458
 
459
  elif model_config["task"] == "text2text-generation":
460
- result = pipe(
461
- input_text,
462
- max_length=model_config["max_tokens"],
463
- temperature=0.6,
464
- early_stopping=True
465
- )[0]['generated_text']
 
 
 
 
 
 
 
466
 
467
- # Final cleanup
468
- if not result or len(result.strip()) < 3:
469
- result = "πŸ€” Hmm, coba kata lain?"
470
- elif len(result) > 100:
471
- result = result[:97] + "..."
472
 
473
- processing_time = round((time.time() - start_time) * 1000)
474
 
475
  return {
476
- "response": result,
477
  "model": model_config["name"],
478
  "status": "success",
479
- "processing_time": f"{processing_time}ms"
480
  }
481
 
482
  except Exception as e:
483
  print(f"❌ CPU Error: {e}")
484
- processing_time = round((time.time() - start_time) * 1000)
 
 
485
 
486
- # Fallback response
487
  fallback_responses = [
488
- "πŸ”„ Coba lagi dengan kata yang lebih simple?",
489
- "πŸ’­ Hmm, mungkin pertanyaan lain?",
490
- "⚑ Model sedang optimal, tunggu sebentar...",
491
- "πŸš€ Coba model lain yang lebih cepat?"
492
  ]
493
 
494
- import random
495
  fallback = random.choice(fallback_responses)
496
 
497
  return {
498
- "response": fallback,
499
  "status": "error",
500
- "processing_time": f"{processing_time}ms"
 
501
  }
502
 
503
- # Optimized inference endpoint untuk production
 
 
504
  @app.post("/inference")
505
  async def inference(request: dict):
506
- """CPU-Optimized inference endpoint"""
507
  try:
508
- message = request.get("message", "")[:80] # Limit input
509
- model_path = request.get("model", "Lyon28/Distil_GPT-2")
510
-
511
- # Fast model mapping
512
- model_key = model_path.split("/")[-1].lower()
513
- model_mapping = {
514
- "distil_gpt-2": "distil-gpt-2",
515
- "distil-gpt-2": "distil-gpt-2",
516
- "gpt-2-tinny": "gpt-2-tinny",
517
- "bert-tinny": "bert-tinny",
518
- "distilbert-base-uncased": "distilbert-base-uncased",
519
- "albert-base-v2": "albert-base-v2",
520
- "electra-small": "electra-small",
521
- "t5-small": "t5-small",
522
- "gpt-2": "gpt-2",
523
- "tinny-llama": "tinny-llama",
524
- "pythia": "pythia",
525
- "gpt-neo": "gpt-neo"
 
 
 
526
  }
527
-
528
- internal_model = model_mapping.get(model_key, "distil-gpt-2")
529
-
530
- # Quick processing
531
- chat_request = ChatRequest(message=message, model=internal_model)
532
- result = await chat(chat_request)
533
 
534
  return {
535
- "result": result["response"],
536
- "status": "success",
537
- "model_used": result["model"],
538
  "processing_time": result.get("processing_time", "0ms")
539
  }
540
 
541
  except Exception as e:
542
  print(f"❌ Inference Error: {e}")
543
  return {
544
- "result": "πŸ”„ Sedang optimasi, coba lagi...",
545
  "status": "error"
546
  }
547
 
548
  # Lightweight health check
549
  @app.get("/health")
550
  async def health():
551
- loaded_models = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0
552
  return {
553
  "status": "healthy",
554
  "platform": "CPU",
555
- "loaded_models": loaded_models,
556
  "total_models": len(MODELS),
557
- "optimization": "CPU-Tuned"
558
  }
559
 
560
  # Model info endpoint
561
  @app.get("/models")
562
- async def get_models():
563
  return {
564
  "models": [
565
  {
566
- "id": k,
567
- "name": v["name"],
568
- "task": v["task"],
569
- "max_tokens": v["max_tokens"],
570
- "priority": v["priority"],
571
  "cpu_optimized": True
572
  }
573
  for k, v in MODELS.items()
574
  ],
575
  "platform": "CPU",
576
- "recommended": ["distil-gpt-2", "gpt-2-tinny", "bert-tinny"]
577
  }
578
 
579
  # Run with CPU optimizations
580
  if __name__ == "__main__":
581
  port = int(os.environ.get("PORT", 7860))
 
 
 
582
  uvicorn.run(
583
  app,
584
  host="0.0.0.0",
585
  port=port,
586
- workers=1, # Single worker untuk CPU
587
- timeout_keep_alive=30,
588
- access_log=False # Disable access log untuk performance
589
  )
 
9
  import asyncio
10
  import time
11
  import gc
12
+ import random # Ditambahkan untuk fallback
13
 
14
  # Inisialisasi FastAPI
15
+ app = FastAPI(title="LyonPoy AI Chat - CPU Optimized (Prompt Mode)")
16
 
17
  # Set seed untuk konsistensi
18
  set_seed(42)
19
 
20
  # CPU-Optimized 11 models configuration
21
+ # Menyesuaikan max_tokens untuk memberi ruang lebih bagi generasi setelah prompt
22
  MODELS = {
23
  "distil-gpt-2": {
24
  "name": "DistilGPT-2 ⚑",
25
  "model_path": "Lyon28/Distil_GPT-2",
26
  "task": "text-generation",
27
+ "max_tokens": 60, # Ditingkatkan
28
+ "priority": 1
29
  },
30
  "gpt-2-tinny": {
31
  "name": "GPT-2 Tinny ⚑",
32
  "model_path": "Lyon28/GPT-2-Tinny",
33
  "task": "text-generation",
34
+ "max_tokens": 50, # Ditingkatkan
35
  "priority": 1
36
  },
37
  "bert-tinny": {
38
  "name": "BERT Tinny πŸ“Š",
39
  "model_path": "Lyon28/Bert-Tinny",
40
  "task": "text-classification",
41
+ "max_tokens": 0, # Tidak relevan untuk klasifikasi
42
  "priority": 1
43
  },
44
  "distilbert-base-uncased": {
45
  "name": "DistilBERT πŸ“Š",
46
  "model_path": "Lyon28/Distilbert-Base-Uncased",
47
  "task": "text-classification",
48
+ "max_tokens": 0, # Tidak relevan untuk klasifikasi
49
  "priority": 1
50
  },
51
  "albert-base-v2": {
 
66
  "name": "T5 Small πŸ”„",
67
  "model_path": "Lyon28/T5-Small",
68
  "task": "text2text-generation",
69
+ "max_tokens": 70, # Ditingkatkan
70
  "priority": 2
71
  },
72
  "gpt-2": {
73
  "name": "GPT-2 Standard",
74
  "model_path": "Lyon28/GPT-2",
75
  "task": "text-generation",
76
+ "max_tokens": 70, # Ditingkatkan
77
  "priority": 2
78
  },
79
  "tinny-llama": {
80
  "name": "Tinny Llama",
81
  "model_path": "Lyon28/Tinny-Llama",
82
  "task": "text-generation",
83
+ "max_tokens": 80, # Ditingkatkan
84
  "priority": 3
85
  },
86
  "pythia": {
87
  "name": "Pythia",
88
  "model_path": "Lyon28/Pythia",
89
  "task": "text-generation",
90
+ "max_tokens": 80, # Ditingkatkan
91
  "priority": 3
92
  },
93
  "gpt-neo": {
94
  "name": "GPT-Neo",
95
  "model_path": "Lyon28/GPT-Neo",
96
  "task": "text-generation",
97
+ "max_tokens": 90, # Ditingkatkan
98
  "priority": 3
99
  }
100
  }
101
 
102
  class ChatRequest(BaseModel):
103
+ message: str # Akan berisi prompt lengkap
104
  model: Optional[str] = "distil-gpt-2"
105
+ # Tambahan field untuk prompt terstruktur jika diperlukan di Pydantic,
106
+ # tapi untuk saat ini kita akan parse dari 'message'
107
+ situasi: Optional[str] = ""
108
+ latar: Optional[str] = ""
109
+ user_message: str # Pesan pengguna aktual
110
+
111
 
112
  # CPU-Optimized startup
113
  @app.on_event("startup")
114
+ async def load_models_on_startup(): # Mengganti nama fungsi agar unik
115
  app.state.pipelines = {}
116
+ app.state.tokenizers = {} # Meskipun tidak secara eksplisit digunakan, baik untuk dimiliki jika diperlukan
117
 
118
  # Set CPU optimizations
119
+ torch.set_num_threads(2)
120
  os.environ['OMP_NUM_THREADS'] = '2'
121
  os.environ['MKL_NUM_THREADS'] = '2'
122
  os.environ['NUMEXPR_NUM_THREADS'] = '2'
 
126
  os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
127
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
128
 
129
+ print("πŸš€ LyonPoy AI Chat - CPU Optimized (Prompt Mode) Ready!")
130
 
131
  # Lightweight frontend
132
  @app.get("/", response_class=HTMLResponse)
133
  async def get_frontend():
134
+ # Mengambil inspirasi styling dari styles.css dan layout dari chat.html
135
+ # Ini adalah versi yang SANGAT disederhanakan dan disematkan
136
  html_content = '''
137
  <!DOCTYPE html>
138
  <html lang="id">
139
  <head>
140
  <meta charset="UTF-8">
141
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
142
+ <title>LyonPoy AI Chat - Prompt Mode</title>
143
  <style>
144
+ :root {
145
+ --primary-color: #075E54; /* styles.css */
146
+ --bg-primary: #ffffff; /* styles.css */
147
+ --bg-secondary: #f8f9fa; /* styles.css */
148
+ --bg-accent: #DCF8C6; /* styles.css */
149
+ --text-primary: #212529; /* styles.css */
150
+ --text-white: #ffffff; /* styles.css */
151
+ --border-color: #dee2e6; /* styles.css */
152
+ --border-radius: 10px; /* styles.css */
153
+ --spacing-sm: 0.5rem;
154
+ --spacing-md: 1rem;
155
+ --shadow: 0 2px 5px rgba(0, 0, 0, 0.15); /* styles.css */
156
+ --font-size-base: 1rem;
157
+ --font-size-sm: 0.875rem;
158
+ --font-size-xs: 0.75rem;
159
+ }
160
  * { margin: 0; padding: 0; box-sizing: border-box; }
161
  body {
162
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
163
+ background-color: var(--bg-secondary); /* styles.css --bg-primary */
164
+ color: var(--text-primary); /* styles.css */
165
  height: 100vh; display: flex; justify-content: center; align-items: center;
166
+ padding: var(--spacing-md);
167
  }
168
+ .app-container { /* Mirip #app dari styles.css */
169
+ width: 100%;
170
+ max-width: 600px; /* Lebih lebar untuk input tambahan */
171
+ height: 95vh;
172
+ background: var(--bg-primary); /* styles.css */
173
+ border-radius: var(--border-radius); /* styles.css */
174
+ box-shadow: var(--shadow); /* styles.css */
175
+ display: flex; flex-direction: column; overflow: hidden;
176
  }
177
+ .chat-header { /* Mirip .header.chat-header dari styles.css */
178
+ background: var(--primary-color); /* styles.css --secondary-color (untuk header chat) */
179
+ color: var(--text-white); /* styles.css */
180
+ padding: var(--spacing-md);
181
+ text-align: center;
 
 
 
182
  }
183
+ .chat-header h1 { font-size: 1.2rem; font-weight: 600; margin-bottom: var(--spacing-sm); }
184
  .model-selector {
185
  background: rgba(255,255,255,0.2); border: none; color: white;
186
+ padding: 6px 10px; border-radius: 15px; font-size: 0.8rem; cursor: pointer;
187
+ width: 100%;
188
  }
189
+ .chat-messages { /* Mirip .chat-messages dari styles.css */
190
+ flex: 1; padding: var(--spacing-md); overflow-y: auto; background: var(--bg-secondary); /* styles.css */
191
  display: flex; flex-direction: column; gap: 12px;
192
  }
193
+ .message-group { /* Mirip .message-group dari styles.css */
194
+ display: flex;
195
+ max-width: 75%; /* styles.css --message-max-width */
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  }
197
+ .message-group.outgoing { align-self: flex-end; flex-direction: row-reverse; }
198
+ .message-group.incoming { align-self: flex-start; }
199
+
200
+ .message { /* Mirip .message dari styles.css */
201
+ padding: var(--spacing-sm) var(--spacing-md);
202
+ border-radius: var(--border-radius); /* styles.css --message-border-radius */
203
+ font-size: var(--font-size-sm); /* styles.css --font-size-base (untuk pesan)*/
204
+ line-height: 1.4; word-wrap: break-word;
205
+ position: relative;
206
  }
207
+ .message-group.outgoing .message {
208
+ background: var(--bg-accent); /* styles.css */
209
+ color: var(--text-primary);
210
+ margin-left: var(--spacing-md);
 
 
211
  }
212
+ .message-group.incoming .message {
213
+ background: var(--bg-primary); /* styles.css */
214
+ color: var(--text-primary);
215
+ box-shadow: var(--shadow-sm); /* styles.css --shadow-sm */
216
+ margin-right: var(--spacing-md); /* Jika ada avatar */
217
  }
218
+ .message-info { /* Mirip .message-info dari styles.css */
219
+ display: flex; justify-content: flex-end; align-items: center;
220
+ margin-top: var(--spacing-xs);
221
+ font-size: var(--font-size-xs); /* styles.css */
222
+ color: #6c757d; /* styles.css --text-muted */
223
  }
224
+ .message-time { margin-right: var(--spacing-xs); }
225
+ .response-time-info { font-size: 9px; color: #666; margin-top: 2px; }
226
+
227
+ .input-area { /* Wadah untuk semua input */
228
+ padding: var(--spacing-md);
229
+ background: var(--bg-primary); /* styles.css */
230
+ border-top: 1px solid var(--border-color); /* styles.css */
231
  }
232
+ .prompt-inputs { display: flex; gap: var(--spacing-sm); margin-bottom: var(--spacing-sm); }
233
+ .prompt-inputs input { flex: 1; }
234
+
235
+ .chat-input-container { /* Mirip .chat-input-container dari styles.css */
236
+ display: flex; gap: var(--spacing-sm); align-items: center;
237
  }
238
+ .chat-input { /* Mirip textarea di .chat-input-field dari styles.css */
239
+ flex: 1; padding: var(--spacing-sm) var(--spacing-md);
240
+ border: 1px solid var(--border-color); /* styles.css */
241
+ border-radius: 20px; /* styles.css --border-radius-xl */
242
+ font-size: var(--font-size-sm); outline: none;
243
  }
244
+ .chat-input:focus { border-color: var(--primary-color); }
245
+ .send-button { /* Mirip .send-btn dari styles.css */
246
+ background: var(--primary-color); color: var(--text-white); border: none;
247
+ border-radius: 50%; width: 40px; height: 40px; cursor: pointer;
248
+ display: flex; align-items: center; justify-content: center; font-size: 1.2rem;
249
  }
250
+ .send-button:hover { filter: brightness(1.2); }
251
+ .send-button:disabled { background: #d1d5db; cursor: not-allowed; }
252
+
253
+ .typing-indicator-text {
254
+ font-style: italic; color: #6c757d; font-size: var(--font-size-sm);
255
+ padding: var(--spacing-sm) var(--spacing-md);
256
+ text-align: center;
257
  }
258
+ .model-status { font-size: 10px; color: rgba(255,255,255,0.8); margin-top: 3px; text-align: center; }
259
+ label { font-size: 0.9em; margin-bottom: 0.2em; display:block; }
260
  </style>
261
  </head>
262
  <body>
263
+ <div class="app-container">
264
  <div class="chat-header">
265
+ <h1>AI Character Prompt Mode</h1>
 
266
  <select class="model-selector" id="modelSelect">
267
  <option value="distil-gpt-2">πŸš€ DistilGPT-2 (Fastest)</option>
268
  <option value="gpt-2-tinny">πŸš€ GPT-2 Tinny (Fast)</option>
 
279
  <div class="model-status" id="modelStatus">Ready to chat!</div>
280
  </div>
281
  <div class="chat-messages" id="chatMessages">
282
+ <div class="message-group incoming">
283
+ <div class="message">
284
+ Hello! Atur Situasi, Latar, dan pesanmu di bawah. Lalu kirim!
285
+ <div class="message-info"><span class="message-time">${new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' })}</span></div>
286
+ </div>
287
  </div>
288
  </div>
289
+ <div class="typing-indicator-text" id="typingIndicator" style="display: none;">AI sedang berpikir...</div>
290
+ <div class="input-area">
291
+ <div class="prompt-inputs">
292
+ <div>
293
+ <label for="situasiInput">Situasi:</label>
294
+ <input type="text" class="chat-input" id="situasiInput" placeholder="Mis: Santai">
295
+ </div>
296
+ <div>
297
+ <label for="latarInput">Latar:</label>
298
+ <input type="text" class="chat-input" id="latarInput" placeholder="Mis: Tepi sungai">
299
+ </div>
300
+ </div>
301
+ <div class="chat-input-container">
302
+ <input type="text" class="chat-input" id="userMessageInput" placeholder="Ketik pesan sebagai {{User}}..." maxlength="150">
303
+ <button class="send-button" id="sendButton">➀</button>
304
+ </div>
305
  </div>
306
  </div>
307
  <script>
308
  const chatMessages = document.getElementById('chatMessages');
309
+ const situasiInput = document.getElementById('situasiInput');
310
+ const latarInput = document.getElementById('latarInput');
311
+ const userMessageInput = document.getElementById('userMessageInput');
312
  const sendButton = document.getElementById('sendButton');
313
  const modelSelect = document.getElementById('modelSelect');
314
  const typingIndicator = document.getElementById('typingIndicator');
315
  const modelStatus = document.getElementById('modelStatus');
316
 
 
317
  const API_BASE = window.location.origin;
318
 
319
+ function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
 
 
 
 
 
 
 
 
320
 
321
+ function addMessage(content, isUser = false, responseTimeMs = null, fullPromptForUser = null) {
322
+ const messageGroupDiv = document.createElement('div');
323
+ messageGroupDiv.className = \`message-group \${isUser ? 'outgoing' : 'incoming'}\`;
324
+
325
  const messageDiv = document.createElement('div');
326
+ messageDiv.className = 'message';
 
327
 
328
+ const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
329
+ let timeInfoHtml = \`<div class="message-info"><span class="message-time">\${time}</span></div>\`;
330
+
331
+ if (responseTimeMs !== null && !isUser) {
332
+ timeInfoHtml += \`<div class="response-time-info">\${responseTimeMs}ms</div>\`;
 
 
333
  }
334
 
335
+ // Untuk pesan pengguna, kita tampilkan prompt lengkap atau hanya pesan user
336
+ // Saat ini, kita akan tampilkan pesan user saja untuk kebersihan, tapi prompt lengkap dikirim ke backend
337
+ const displayContent = isUser ? userMessageInput.value.trim() : content;
338
+ messageDiv.innerHTML = displayContent.replace(/\\n/g, '<br>') + timeInfoHtml;
339
 
340
+ messageGroupDiv.appendChild(messageDiv);
341
+ chatMessages.appendChild(messageGroupDiv);
342
+ scrollToBottom();
 
 
 
 
343
  }
344
 
345
  async function sendMessage() {
346
+ const situasi = situasiInput.value.trim();
347
+ const latar = latarInput.value.trim();
348
+ const userMsg = userMessageInput.value.trim();
349
+
350
+ if (!userMsg) {
351
+ alert("Pesan pengguna tidak boleh kosong!");
352
+ return;
353
+ }
354
 
355
+ const fullPrompt = \`Situasi: \${situasi}\\nLatar: \${latar}\\n{{User}}: \${userMsg}\\n{{Char}}:\`;
356
+
357
+ addMessage(userMsg, true, null, fullPrompt);
358
+
359
+ userMessageInput.value = ''; // Kosongkan input pesan user saja
360
+ userMessageInput.disabled = true;
361
  sendButton.disabled = true;
362
+ typingIndicator.style.display = 'block';
363
  modelStatus.textContent = 'Processing...';
364
 
 
 
 
 
365
  const startTime = Date.now();
366
 
367
  try {
368
+ const response = await fetch(API_BASE + '/chat', {
369
  method: 'POST',
370
  headers: { 'Content-Type': 'application/json' },
371
  body: JSON.stringify({
372
+ message: fullPrompt, // Kirim prompt lengkap
373
+ model: modelSelect.value,
374
+ // Informasi tambahan jika ingin diproses backend secara terpisah
375
+ situasi: situasi,
376
+ latar: latar,
377
+ user_message: userMsg
378
  })
379
  });
380
 
381
  const data = await response.json();
382
  const responseTime = Date.now() - startTime;
383
 
 
 
 
384
  if (data.status === 'success') {
385
  addMessage(data.response, false, responseTime);
386
  } else {
387
+ addMessage(data.response || '⚠️ Model gagal merespon, coba lagi.', false, responseTime);
388
  }
389
  } catch (error) {
390
  const responseTime = Date.now() - startTime;
391
+ addMessage('❌ Koneksi bermasalah atau error server.', false, responseTime);
 
 
392
  console.error('Error:', error);
393
  }
394
 
395
+ typingIndicator.style.display = 'none';
396
+ modelStatus.textContent = 'Ready';
397
+ userMessageInput.disabled = false;
398
  sendButton.disabled = false;
399
+ userMessageInput.focus();
400
  }
401
 
 
402
  sendButton.addEventListener('click', sendMessage);
403
+ userMessageInput.addEventListener('keypress', (e) => {
404
+ if (e.key === 'Enter' && !e.shiftKey) {
405
+ e.preventDefault(); // Mencegah newline di input
406
+ sendMessage();
407
+ }
408
  });
409
 
410
  modelSelect.addEventListener('change', () => {
411
  const selectedOption = modelSelect.options[modelSelect.selectedIndex];
412
+ modelStatus.textContent = \`Model: \${selectedOption.text}\`;
 
 
413
  });
414
 
 
415
  window.addEventListener('load', () => {
416
+ userMessageInput.focus();
417
+ const initialModelName = modelSelect.options[modelSelect.selectedIndex].text;
418
+ modelStatus.textContent = \`\${initialModelName} Ready\`;
 
 
 
 
 
 
 
 
 
419
  });
420
  </script>
421
  </body>
 
431
  try:
432
  model_id = request.model.lower()
433
  if model_id not in MODELS:
434
+ model_id = "distil-gpt-2"
435
 
436
  model_config = MODELS[model_id]
437
 
438
+ # Pesan dari request sekarang adalah prompt yang sudah terstruktur
439
+ # contoh: "Situasi: Santai\nLatar:Tepi sungai\n{{User}}:sayang,danau nya indah ya, (memeluk {{char}} dari samping)\n{{Char}}:"
440
+ structured_prompt = request.message
441
+
442
  if model_id not in app.state.pipelines:
443
  print(f"⚑ CPU Loading {model_config['name']}...")
444
 
 
445
  pipeline_kwargs = {
446
  "task": model_config["task"],
447
  "model": model_config["model_path"],
448
+ "device": -1,
449
+ "torch_dtype": torch.float32,
450
  "model_kwargs": {
451
  "torchscript": False,
452
  "low_cpu_mem_usage": True
453
  }
454
  }
455
+ if model_config["task"] != "text-classification": # Tokenizer hanya untuk generator
456
+ app.state.tokenizers[model_id] = AutoTokenizer.from_pretrained(model_config["model_path"])
457
+
458
  app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
 
 
459
  gc.collect()
460
 
461
  pipe = app.state.pipelines[model_id]
462
 
463
+ generated_text = "Output tidak didukung untuk task ini."
464
+
 
465
  if model_config["task"] == "text-generation":
466
+ # Hitung panjang prompt dalam token
467
+ current_tokenizer = app.state.tokenizers.get(model_id)
468
+ if not current_tokenizer: # Fallback jika tokenizer tidak ada di state (seharusnya ada)
469
+ current_tokenizer = AutoTokenizer.from_pretrained(model_config["model_path"])
470
+
471
+ prompt_tokens = current_tokenizer.encode(structured_prompt, return_tensors="pt")
472
+ prompt_length_tokens = prompt_tokens.shape[1]
473
+
474
+ # max_length adalah total (prompt + generated). max_tokens adalah untuk generated.
475
+ # Pastikan max_length tidak melebihi kapasitas model (umumnya 512 atau 1024 untuk model kecil)
476
+ # dan juga tidak terlalu pendek.
477
+ # Beberapa model mungkin memiliki max_position_embeddings yang lebih kecil.
478
+ # Kita cap max_length ke sesuatu yang aman seperti 256 atau 512 jika terlalu besar.
479
+ # Model_config["max_tokens"] adalah max *new* tokens yang kita inginkan.
480
+
481
+ # Kita gunakan max_new_tokens langsung jika didukung oleh pipeline, atau atur max_length
482
+ # Untuk pipeline generik, max_length adalah yang utama.
483
+ # Max length harus lebih besar dari prompt.
484
+ # Max new tokens dari config model.
485
+ max_new_generated_tokens = model_config["max_tokens"]
486
+ max_len_for_generation = prompt_length_tokens + max_new_generated_tokens
487
+
488
+ # Batasi max_length total agar tidak terlalu besar untuk model kecil.
489
+ # Misalnya, GPT-2 memiliki konteks 1024. DistilGPT-2 juga.
490
+ # Model yang lebih kecil mungkin memiliki batas yang lebih rendah.
491
+ # Mari kita set batas atas yang aman, misal 512 untuk demo ini.
492
+ # Sesuaikan jika model spesifik Anda memiliki batas yang berbeda.
493
+ absolute_max_len = 512
494
+ if hasattr(pipe.model.config, 'max_position_embeddings'):
495
+ absolute_max_len = pipe.model.config.max_position_embeddings
496
+
497
+ max_len_for_generation = min(max_len_for_generation, absolute_max_len)
498
+
499
+ # Pastikan max_length setidaknya prompt + beberapa token baru
500
+ if max_len_for_generation <= prompt_length_tokens + 5 : # +5 token baru minimal
501
+ max_len_for_generation = prompt_length_tokens + 5
502
+
503
+
504
+ # Pastikan kita tidak meminta lebih banyak token baru daripada yang diizinkan oleh absolute_max_len
505
+ actual_max_new_tokens = max_len_for_generation - prompt_length_tokens
506
+ if actual_max_new_tokens <= 0: # Jika prompt sudah terlalu panjang
507
+ return {
508
+ "response": "Hmm, prompt terlalu panjang untuk model ini. Coba perpendek situasi/latar/pesan.",
509
+ "model": model_config["name"],
510
+ "status": "error_prompt_too_long",
511
+ "processing_time": f"{round((time.time() - start_time) * 1000)}ms"
512
+ }
513
+
514
+ outputs = pipe(
515
+ structured_prompt,
516
+ max_length=max_len_for_generation, # Total panjang
517
+ # max_new_tokens=actual_max_new_tokens, # Lebih disukai jika pipeline mendukungnya secara eksplisit
518
+ temperature=0.75, # Sedikit lebih kreatif
519
  do_sample=True,
520
+ top_p=0.9, # Memperluas sampling sedikit
521
+ pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256, # 50256 untuk GPT2
522
  num_return_sequences=1,
523
+ early_stopping=True,
524
+ truncation=True # Penting jika prompt terlalu panjang untuk model
525
+ )
526
+ generated_text = outputs[0]['generated_text']
 
 
527
 
528
+ # Cleanup: ekstrak hanya teks setelah prompt "{{Char}}:"
529
+ char_marker = "{{Char}}:"
530
+ if char_marker in generated_text:
531
+ generated_text = generated_text.split(char_marker, 1)[-1].strip()
532
+ elif generated_text.startswith(structured_prompt): # fallback jika marker tidak ada
533
+ generated_text = generated_text[len(structured_prompt):].strip()
534
+
535
+ # Hapus jika model mengulang bagian prompt user
536
+ if request.user_message and generated_text.startswith(request.user_message):
537
+ generated_text = generated_text[len(request.user_message):].strip()
538
+
539
+ # Batasi ke beberapa kalimat atau panjang tertentu untuk kecepatan & relevansi
540
+ # Ini bisa lebih fleksibel
541
+ sentences = generated_text.split('.')
542
+ if len(sentences) > 2: # Ambil 2 kalimat pertama jika ada
543
+ generated_text = sentences[0].strip() + ('.' if sentences[0] else '') + \
544
+ (sentences[1].strip() + '.' if len(sentences) > 1 and sentences[1] else '')
545
+ elif len(generated_text) > 150: # Batas karakter kasar
546
+ generated_text = generated_text[:147] + '...'
547
 
548
  elif model_config["task"] == "text-classification":
549
+ # Untuk klasifikasi, kita gunakan pesan pengguna aktual, bukan prompt terstruktur
550
+ user_msg_for_classification = request.user_message if request.user_message else structured_prompt
551
+ output = pipe(user_msg_for_classification[:256], truncation=True, max_length=256)[0] # Batasi input
552
  confidence = f"{output['score']:.2f}"
553
+ generated_text = f"πŸ“Š Klasifikasi pesan '{user_msg_for_classification[:30]}...': {output['label']} (Skor: {confidence})"
554
 
555
  elif model_config["task"] == "text2text-generation":
556
+ # T5 dan model serupa mungkin memerlukan format input yang sedikit berbeda,
557
+ # tapi untuk demo ini kita coba kirim prompt apa adanya.
558
+ # Anda mungkin perlu menambahkan prefix task seperti "translate English to German: " untuk T5
559
+ # Untuk chat, kita bisa biarkan apa adanya atau gunakan user_message.
560
+ user_msg_for_t2t = request.user_message if request.user_message else structured_prompt
561
+ outputs = pipe(
562
+ user_msg_for_t2t[:256], # Batasi input untuk T5
563
+ max_length=model_config["max_tokens"], # Ini adalah max_length untuk output T5
564
+ temperature=0.65,
565
+ early_stopping=True,
566
+ truncation=True
567
+ )
568
+ generated_text = outputs[0]['generated_text']
569
 
570
+ if not generated_text or len(generated_text.strip()) < 1:
571
+ generated_text = "πŸ€” Hmm, saya tidak yakin bagaimana merespon. Coba lagi dengan prompt berbeda?"
572
+ elif len(generated_text) > 250: # Batas akhir output
573
+ generated_text = generated_text[:247] + "..."
 
574
 
575
+ processing_time_ms = round((time.time() - start_time) * 1000)
576
 
577
  return {
578
+ "response": generated_text,
579
  "model": model_config["name"],
580
  "status": "success",
581
+ "processing_time": f"{processing_time_ms}ms"
582
  }
583
 
584
  except Exception as e:
585
  print(f"❌ CPU Error: {e}")
586
+ import traceback
587
+ traceback.print_exc() # Print full traceback for debugging
588
+ processing_time_ms = round((time.time() - start_time) * 1000)
589
 
 
590
  fallback_responses = [
591
+ "πŸ”„ Maaf, ada sedikit gangguan. Coba lagi dengan kata yang lebih simpel?",
592
+ "πŸ’­ Hmm, sepertinya saya butuh istirahat sejenak. Mungkin pertanyaan lain?",
593
+ "⚑ Model sedang dioptimalkan, tunggu sebentar dan coba lagi...",
594
+ "πŸš€ Mungkin coba model lain yang lebih cepat atau prompt yang berbeda?"
595
  ]
596
 
 
597
  fallback = random.choice(fallback_responses)
598
 
599
  return {
600
+ "response": f"{fallback} (Error: {str(e)[:100]})", # Beri sedikit info error
601
  "status": "error",
602
+ "model": MODELS.get(model_id, {"name": "Unknown"})["name"] if 'model_id' in locals() else "Unknown",
603
+ "processing_time": f"{processing_time_ms}ms"
604
  }
605
 
606
+ # Optimized inference endpoint (TIDAK DIPERBARUI SECARA RINCI untuk prompt mode baru,
607
+ # karena fokus utama adalah pada /chat dan frontendnya. Jika /inference juga perlu prompt mode,
608
+ # ia harus mengkonstruksi ChatRequest serupa.)
609
  @app.post("/inference")
610
  async def inference(request: dict):
611
+ """CPU-Optimized inference endpoint - MUNGKIN PERLU PENYESUAIAN UNTUK PROMPT MODE"""
612
  try:
613
+ # Untuk prompt mode, 'message' harus menjadi prompt terstruktur lengkap
614
+ # Atau endpoint ini harus diubah untuk menerima 'situasi', 'latar', 'user_message'
615
+ message = request.get("message", "")
616
+ model_id_from_request = request.get("model", "distil-gpt-2") # Harusnya model_id internal
617
+
618
+ # Jika yang diberikan adalah model path, coba map ke model_id internal
619
+ if "/" in model_id_from_request:
620
+ model_key_from_path = model_id_from_request.split("/")[-1].lower()
621
+ model_mapping = { "distil_gpt-2": "distil-gpt-2", "gpt-2-tinny": "gpt-2-tinny", /* ... (tambahkan semua mapping) ... */ }
622
+ internal_model = model_mapping.get(model_key_from_path, "distil-gpt-2")
623
+ else: # Asumsikan sudah model_id internal
624
+ internal_model = model_id_from_request
625
+
626
+ # Jika /inference perlu mendukung prompt mode, data yang dikirim ke ChatRequest harus disesuaikan
627
+ # Untuk contoh ini, kita asumsikan 'message' adalah user_message saja untuk /inference
628
+ # dan situasi/latar default atau tidak digunakan.
629
+ # Ini adalah penyederhanaan dan mungkin perlu diubah sesuai kebutuhan.
630
+ chat_req_data = {
631
+ "message": f"{{User}}: {message}\n{{Char}}:", # Bentuk prompt paling sederhana
632
+ "model": internal_model,
633
+ "user_message": message # Simpan pesan user asli
634
  }
635
+
636
+ chat_request_obj = ChatRequest(**chat_req_data)
637
+ result = await chat(chat_request_obj)
 
 
 
638
 
639
  return {
640
+ "result": result.get("response"),
641
+ "status": result.get("status"),
642
+ "model_used": result.get("model"),
643
  "processing_time": result.get("processing_time", "0ms")
644
  }
645
 
646
  except Exception as e:
647
  print(f"❌ Inference Error: {e}")
648
  return {
649
+ "result": "πŸ”„ Terjadi kesalahan pada endpoint inference. Coba lagi...",
650
  "status": "error"
651
  }
652
 
653
  # Lightweight health check
654
  @app.get("/health")
655
  async def health():
656
+ loaded_models_count = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0
657
  return {
658
  "status": "healthy",
659
  "platform": "CPU",
660
+ "loaded_models": loaded_models_count,
661
  "total_models": len(MODELS),
662
+ "optimization": "CPU-Tuned (Prompt Mode)"
663
  }
664
 
665
  # Model info endpoint
666
  @app.get("/models")
667
+ async def get_models_info(): # Mengganti nama fungsi
668
  return {
669
  "models": [
670
  {
671
+ "id": k, "name": v["name"], "task": v["task"],
672
+ "max_tokens_generate": v["max_tokens"], "priority": v["priority"],
 
 
 
673
  "cpu_optimized": True
674
  }
675
  for k, v in MODELS.items()
676
  ],
677
  "platform": "CPU",
678
+ "recommended_for_prompting": ["distil-gpt-2", "gpt-2-tinny", "tinny-llama", "gpt-neo", "pythia", "gpt-2"]
679
  }
680
 
681
  # Run with CPU optimizations
682
  if __name__ == "__main__":
683
  port = int(os.environ.get("PORT", 7860))
684
+ # Gunakan reload=True untuk pengembangan agar perubahan kode langsung terlihat
685
+ # Matikan reload untuk produksi
686
+ # uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, reload=True)
687
  uvicorn.run(
688
  app,
689
  host="0.0.0.0",
690
  port=port,
691
+ workers=1,
692
+ timeout_keep_alive=30, # Default FastAPI 5 detik, mungkin terlalu pendek untuk loading model
693
+ access_log=False
694
  )