Lyon28 commited on
Commit
138b76f
·
verified ·
1 Parent(s): 24088e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -304
app.py CHANGED
@@ -1,220 +1,88 @@
1
  import os
2
  import uvicorn
3
- import asyncio
4
- from concurrent.futures import ThreadPoolExecutor
5
- from fastapi import FastAPI, HTTPException, BackgroundTasks
6
  from fastapi.responses import HTMLResponse
 
7
  from pydantic import BaseModel
8
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
9
  import torch
10
- from typing import Optional, Dict
11
- import time
12
- import logging
13
-
14
- # Setup logging
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
 
18
  # Inisialisasi FastAPI
19
- app = FastAPI(title="LyonPoy AI Chat - Optimized")
20
 
21
- # Optimized model configuration - prioritize smaller, faster models
22
  MODELS = {
23
- "distil-gpt-2": {
24
- "name": "DistilGPT-2",
25
- "model_path": "Lyon28/Distil_GPT-2",
26
- "task": "text-generation",
27
- "priority": 1 # Highest priority - smallest model
28
- },
29
- "gpt-2-tinny": {
30
- "name": "GPT-2 Tinny",
31
- "model_path": "Lyon28/GPT-2-Tinny",
32
- "task": "text-generation",
33
- "priority": 2
34
- },
35
  "tinny-llama": {
36
  "name": "Tinny Llama",
37
  "model_path": "Lyon28/Tinny-Llama",
38
- "task": "text-generation",
39
- "priority": 3
40
  },
41
- "gpt-2": {
42
- "name": "GPT-2",
43
- "model_path": "Lyon28/GPT-2",
44
- "task": "text-generation",
45
- "priority": 4
46
  },
47
  "bert-tinny": {
48
  "name": "BERT Tinny",
49
  "model_path": "Lyon28/Bert-Tinny",
50
- "task": "text-classification",
51
- "priority": 5
52
  },
53
  "albert-base-v2": {
54
  "name": "ALBERT Base V2",
55
  "model_path": "Lyon28/Albert-Base-V2",
56
- "task": "text-classification",
57
- "priority": 6
58
- },
59
- "distilbert-base-uncased": {
60
- "name": "DistilBERT",
61
- "model_path": "Lyon28/Distilbert-Base-Uncased",
62
- "task": "text-classification",
63
- "priority": 7
64
- },
65
- "electra-small": {
66
- "name": "ELECTRA Small",
67
- "model_path": "Lyon28/Electra-Small",
68
- "task": "text-classification",
69
- "priority": 8
70
  },
71
  "t5-small": {
72
  "name": "T5 Small",
73
  "model_path": "Lyon28/T5-Small",
74
- "task": "text2text-generation",
75
- "priority": 9
76
  },
77
- "pythia": {
78
- "name": "Pythia",
79
- "model_path": "Lyon28/Pythia",
80
- "task": "text-generation",
81
- "priority": 10
82
  },
83
  "gpt-neo": {
84
  "name": "GPT-Neo",
85
  "model_path": "Lyon28/GPT-Neo",
86
- "task": "text-generation",
87
- "priority": 11 # Largest model - lowest priority
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
  }
90
 
91
  class ChatRequest(BaseModel):
92
  message: str
93
- model: Optional[str] = "distil-gpt-2" # Default to fastest model
94
-
95
- # Global state
96
- app.state.pipelines = {}
97
- app.state.loading_models = set()
98
- app.state.executor = ThreadPoolExecutor(max_workers=2)
99
-
100
- # Optimized model loading
101
- async def load_model_async(model_id: str):
102
- """Load model in background thread"""
103
- if model_id in app.state.loading_models:
104
- return False
105
-
106
- app.state.loading_models.add(model_id)
107
-
108
- try:
109
- model_config = MODELS[model_id]
110
- logger.info(f"🔄 Loading {model_config['name']}...")
111
-
112
- # Load in thread to avoid blocking
113
- loop = asyncio.get_event_loop()
114
-
115
- def load_model():
116
- device = 0 if torch.cuda.is_available() else -1
117
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
118
-
119
- return pipeline(
120
- task=model_config["task"],
121
- model=model_config["model_path"],
122
- device=device,
123
- torch_dtype=dtype,
124
- use_fast=True,
125
- trust_remote_code=True,
126
- low_cpu_mem_usage=True,
127
- # Optimization for faster inference
128
- pad_token_id=50256 if "gpt" in model_id else None
129
- )
130
-
131
- pipeline_obj = await loop.run_in_executor(app.state.executor, load_model)
132
- app.state.pipelines[model_id] = pipeline_obj
133
- logger.info(f"✅ {model_config['name']} loaded successfully")
134
- return True
135
-
136
- except Exception as e:
137
- logger.error(f"❌ Failed to load {model_id}: {e}")
138
- return False
139
- finally:
140
- app.state.loading_models.discard(model_id)
141
 
 
142
  @app.on_event("startup")
143
  async def load_models():
144
- """Load high-priority models on startup"""
145
- os.environ['HF_HOME'] = './cache/huggingface' # Persistent cache
146
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
147
-
148
- # Pre-load top 3 fastest models
149
- priority_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])[:3]
150
-
151
- tasks = []
152
- for model_id in priority_models:
153
- task = asyncio.create_task(load_model_async(model_id))
154
- tasks.append(task)
155
-
156
- # Load models concurrently
157
- await asyncio.gather(*tasks, return_exceptions=True)
158
- logger.info("🚀 LyonPoy AI Chat Ready!")
159
 
160
- # Optimized inference
161
- async def run_inference(model_id: str, message: str):
162
- """Run inference in background thread"""
163
- if model_id not in app.state.pipelines:
164
- # Try to load model if not available
165
- success = await load_model_async(model_id)
166
- if not success:
167
- raise HTTPException(status_code=503, detail=f"Model {model_id} unavailable")
168
-
169
- pipe = app.state.pipelines[model_id]
170
- model_config = MODELS[model_id]
171
-
172
- loop = asyncio.get_event_loop()
173
-
174
- def inference():
175
- start_time = time.time()
176
-
177
- try:
178
- if model_config["task"] == "text-generation":
179
- # Optimized generation parameters
180
- result = pipe(
181
- message,
182
- max_new_tokens=min(50, 150 - len(message.split())), # Shorter responses
183
- temperature=0.7,
184
- do_sample=True,
185
- top_p=0.9,
186
- top_k=50,
187
- repetition_penalty=1.1,
188
- pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256
189
- )[0]['generated_text']
190
-
191
- # Clean output
192
- if result.startswith(message):
193
- result = result[len(message):].strip()
194
-
195
- # Limit response length
196
- if len(result) > 200:
197
- result = result[:200] + "..."
198
-
199
- elif model_config["task"] == "text-classification":
200
- output = pipe(message)[0]
201
- result = f"Analisis: {output['label']} (Keyakinan: {output['score']:.2f})"
202
-
203
- elif model_config["task"] == "text2text-generation":
204
- result = pipe(message, max_length=100, num_beams=2)[0]['generated_text']
205
-
206
- inference_time = time.time() - start_time
207
- logger.info(f"⚡ Inference time: {inference_time:.2f}s for {model_config['name']}")
208
-
209
- return result
210
-
211
- except Exception as e:
212
- logger.error(f"Inference error: {e}")
213
- raise e
214
-
215
- return await loop.run_in_executor(app.state.executor, inference)
216
-
217
- # Frontend route - simplified HTML
218
  @app.get("/", response_class=HTMLResponse)
219
  async def get_frontend():
220
  html_content = '''
@@ -223,188 +91,229 @@ async def get_frontend():
223
  <head>
224
  <meta charset="UTF-8">
225
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
226
- <title>LyonPoy AI Chat - Fast Mode</title>
227
  <style>
228
  * { margin: 0; padding: 0; box-sizing: border-box; }
229
- body { font-family: system-ui; background: #f5f5f5; padding: 20px; }
230
- .container { max-width: 600px; margin: 0 auto; background: white; border-radius: 10px; overflow: hidden; }
231
- .header { background: #007bff; color: white; padding: 15px; }
232
- .chat { height: 400px; overflow-y: auto; padding: 15px; background: #fafafa; }
233
- .message { margin: 10px 0; padding: 8px 12px; border-radius: 8px; }
234
- .user { background: #007bff; color: white; margin-left: 20%; }
235
- .bot { background: white; border: 1px solid #ddd; margin-right: 20%; }
236
- .input-area { padding: 15px; display: flex; gap: 10px; }
237
- input { flex: 1; padding: 10px; border: 1px solid #ddd; border-radius: 5px; }
238
- button { padding: 10px 15px; background: #007bff; color: white; border: none; border-radius: 5px; cursor: pointer; }
239
- select { padding: 5px; margin-left: 10px; }
240
- .loading { color: #666; font-style: italic; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  </style>
242
  </head>
243
  <body>
244
- <div class="container">
245
- <div class="header">
246
- <h1>🚀 LyonPoy AI - Fast Mode</h1>
247
- <select id="model">
248
- <option value="distil-gpt-2">DistilGPT-2 (Fastest)</option>
249
- <option value="gpt-2-tinny">GPT-2 Tinny</option>
250
  <option value="tinny-llama">Tinny Llama</option>
251
- <option value="gpt-2">GPT-2</option>
 
 
 
252
  <option value="bert-tinny">BERT Tinny</option>
253
  <option value="albert-base-v2">ALBERT Base V2</option>
254
  <option value="distilbert-base-uncased">DistilBERT</option>
255
  <option value="electra-small">ELECTRA Small</option>
256
  <option value="t5-small">T5 Small</option>
257
- <option value="pythia">Pythia</option>
258
- <option value="gpt-neo">GPT-Neo (Slowest)</option>
259
  </select>
260
  </div>
261
- <div class="chat" id="chat"></div>
262
- <div class="input-area">
263
- <input type="text" id="message" placeholder="Ketik pesan..." maxlength="200">
264
- <button onclick="sendMessage()">Kirim</button>
 
 
 
 
 
 
 
 
265
  </div>
266
  </div>
267
-
268
  <script>
269
- const chat = document.getElementById('chat');
270
- const messageInput = document.getElementById('message');
271
- const modelSelect = document.getElementById('model');
 
 
 
 
272
 
273
  function addMessage(content, isUser = false) {
274
- const div = document.createElement('div');
275
- div.className = `message ${isUser ? 'user' : 'bot'}`;
276
- div.textContent = content;
277
- chat.appendChild(div);
278
- chat.scrollTop = chat.scrollHeight;
 
279
  }
280
-
 
 
 
281
  async function sendMessage() {
282
- const message = messageInput.value.trim();
283
  if (!message) return;
284
-
285
- addMessage(message, true);
286
- messageInput.value = '';
287
- addMessage('⏳ Thinking...', false);
288
-
289
- const startTime = Date.now();
290
-
291
  try {
292
  const response = await fetch('/chat', {
293
  method: 'POST',
294
  headers: { 'Content-Type': 'application/json' },
295
- body: JSON.stringify({
296
- message: message,
297
- model: modelSelect.value
298
- })
299
  });
300
-
301
  const data = await response.json();
302
- const responseTime = ((Date.now() - startTime) / 1000).toFixed(1);
303
-
304
- // Remove loading message
305
- chat.removeChild(chat.lastElementChild);
306
-
307
  if (data.status === 'success') {
308
- addMessage(`${data.response} (${responseTime}s)`, false);
309
  } else {
310
- addMessage('❌ Error occurred', false);
311
  }
312
  } catch (error) {
313
- chat.removeChild(chat.lastElementChild);
314
- addMessage('❌ Connection error', false);
315
  }
 
316
  }
317
-
318
- messageInput.addEventListener('keypress', (e) => {
319
- if (e.key === 'Enter') sendMessage();
 
 
 
320
  });
321
-
322
- // Show welcome message
323
- addMessage('👋 Halo! Pilih model dan mulai chat. Model DistilGPT-2 paling cepat!', false);
324
  </script>
325
  </body>
326
  </html>
327
  '''
328
  return HTMLResponse(content=html_content)
329
 
330
- # Optimized chat endpoint
331
  @app.post("/chat")
332
- async def chat(request: ChatRequest, background_tasks: BackgroundTasks):
333
  try:
334
  model_id = request.model.lower()
335
  if model_id not in MODELS:
336
  raise HTTPException(status_code=400, detail="Model tidak tersedia")
337
 
338
- # Limit message length for faster processing
339
- message = request.message[:200] # Max 200 chars
340
 
341
- # Run inference
342
- result = await run_inference(model_id, message)
 
 
 
 
 
 
 
 
 
 
343
 
344
- # Load next priority model in background
345
- background_tasks.add_task(preload_next_model, model_id)
346
 
347
- return {
348
- "response": result,
349
- "model": MODELS[model_id]["name"],
350
- "status": "success"
351
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
- except HTTPException:
354
- raise
355
- except Exception as e:
356
- logger.error(f"Chat error: {e}")
357
- raise HTTPException(status_code=500, detail="Terjadi kesalahan")
358
-
359
- async def preload_next_model(current_model: str):
360
- """Preload next model in background"""
361
- try:
362
- # Find next unloaded model by priority
363
- loaded_models = set(app.state.pipelines.keys())
364
- all_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])
365
 
366
- for model_id in all_models:
367
- if model_id not in loaded_models and model_id not in app.state.loading_models:
368
- await load_model_async(model_id)
369
- break
370
  except Exception as e:
371
- logger.error(f"Background loading error: {e}")
 
372
 
373
- # Health check with model status
374
  @app.get("/health")
375
  async def health():
376
- loaded_models = list(app.state.pipelines.keys())
377
- return {
378
- "status": "healthy",
379
- "gpu": torch.cuda.is_available(),
380
- "loaded_models": loaded_models,
381
- "loading_models": list(app.state.loading_models)
382
- }
383
-
384
- # Model status endpoint
385
- @app.get("/models")
386
- async def get_models():
387
- models_status = {}
388
- for model_id, config in MODELS.items():
389
- models_status[model_id] = {
390
- "name": config["name"],
391
- "loaded": model_id in app.state.pipelines,
392
- "loading": model_id in app.state.loading_models,
393
- "priority": config["priority"]
394
- }
395
- return models_status
396
-
397
- # Cleanup on shutdown
398
- @app.on_event("shutdown")
399
- async def cleanup():
400
- app.state.executor.shutdown(wait=True)
401
 
 
402
  if __name__ == "__main__":
403
  port = int(os.environ.get("PORT", 7860))
404
- uvicorn.run(
405
- app,
406
- host="0.0.0.0",
407
- port=port,
408
- log_level="info",
409
- access_log=False # Disable access log for better performance
410
- )
 
1
  import os
2
  import uvicorn
3
+ from fastapi import FastAPI, HTTPException
 
 
4
  from fastapi.responses import HTMLResponse
5
+ from fastapi.staticfiles import StaticFiles
6
  from pydantic import BaseModel
7
+ from transformers import pipeline
8
  import torch
9
+ from typing import Optional
 
 
 
 
 
 
10
 
11
  # Inisialisasi FastAPI
12
+ app = FastAPI(title="LyonPoy AI Chat")
13
 
14
+ # All 11 models configuration
15
  MODELS = {
 
 
 
 
 
 
 
 
 
 
 
 
16
  "tinny-llama": {
17
  "name": "Tinny Llama",
18
  "model_path": "Lyon28/Tinny-Llama",
19
+ "task": "text-generation"
 
20
  },
21
+ "pythia": {
22
+ "name": "Pythia",
23
+ "model_path": "Lyon28/Pythia",
24
+ "task": "text-generation"
 
25
  },
26
  "bert-tinny": {
27
  "name": "BERT Tinny",
28
  "model_path": "Lyon28/Bert-Tinny",
29
+ "task": "text-classification"
 
30
  },
31
  "albert-base-v2": {
32
  "name": "ALBERT Base V2",
33
  "model_path": "Lyon28/Albert-Base-V2",
34
+ "task": "text-classification"
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
  "t5-small": {
37
  "name": "T5 Small",
38
  "model_path": "Lyon28/T5-Small",
39
+ "task": "text2text-generation"
 
40
  },
41
+ "gpt-2": {
42
+ "name": "GPT-2",
43
+ "model_path": "Lyon28/GPT-2",
44
+ "task": "text-generation"
 
45
  },
46
  "gpt-neo": {
47
  "name": "GPT-Neo",
48
  "model_path": "Lyon28/GPT-Neo",
49
+ "task": "text-generation"
50
+ },
51
+ "distilbert-base-uncased": {
52
+ "name": "DistilBERT",
53
+ "model_path": "Lyon28/Distilbert-Base-Uncased",
54
+ "task": "text-classification"
55
+ },
56
+ "distil-gpt-2": {
57
+ "name": "DistilGPT-2",
58
+ "model_path": "Lyon28/Distil_GPT-2",
59
+ "task": "text-generation"
60
+ },
61
+ "gpt-2-tinny": {
62
+ "name": "GPT-2 Tinny",
63
+ "model_path": "Lyon28/GPT-2-Tinny",
64
+ "task": "text-generation"
65
+ },
66
+ "electra-small": {
67
+ "name": "ELECTRA Small",
68
+ "model_path": "Lyon28/Electra-Small",
69
+ "task": "text-classification"
70
  }
71
  }
72
 
73
  class ChatRequest(BaseModel):
74
  message: str
75
+ model: Optional[str] = "gpt-2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # Startup
78
  @app.on_event("startup")
79
  async def load_models():
80
+ app.state.pipelines = {}
81
+ os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
82
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
83
+ print("🤖 LyonPoy AI Chat Ready!")
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Frontend route
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  @app.get("/", response_class=HTMLResponse)
87
  async def get_frontend():
88
  html_content = '''
 
91
  <head>
92
  <meta charset="UTF-8">
93
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
94
+ <title>LyonPoy AI Chat</title>
95
  <style>
96
  * { margin: 0; padding: 0; box-sizing: border-box; }
97
+ body {
98
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
99
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
100
+ height: 100vh; display: flex; justify-content: center; align-items: center;
101
+ }
102
+ .chat-container {
103
+ width: 400px; height: 600px; background: #fff; border-radius: 15px;
104
+ box-shadow: 0 20px 40px rgba(0,0,0,0.15); display: flex; flex-direction: column; overflow: hidden;
105
+ }
106
+ .chat-header {
107
+ background: linear-gradient(135deg, #25d366, #128c7e); color: white;
108
+ padding: 20px; text-align: center;
109
+ }
110
+ .chat-header h1 { font-size: 18px; font-weight: 600; margin-bottom: 8px; }
111
+ .model-selector {
112
+ background: rgba(255,255,255,0.2); border: none; color: white;
113
+ padding: 8px 12px; border-radius: 20px; font-size: 12px; cursor: pointer;
114
+ }
115
+ .chat-messages {
116
+ flex: 1; padding: 20px; overflow-y: auto; background: #f0f0f0;
117
+ display: flex; flex-direction: column; gap: 15px;
118
+ }
119
+ .message {
120
+ max-width: 80%; padding: 12px 16px; border-radius: 15px;
121
+ font-size: 14px; line-height: 1.4; animation: slideIn 0.3s ease;
122
+ }
123
+ .message.user {
124
+ background: #25d366; color: white; align-self: flex-end; border-bottom-right-radius: 5px;
125
+ }
126
+ .message.bot {
127
+ background: white; color: #333; align-self: flex-start;
128
+ border-bottom-left-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);
129
+ }
130
+ .message-time { font-size: 11px; opacity: 0.7; margin-top: 5px; }
131
+ .chat-input-container {
132
+ padding: 20px; background: white; border-top: 1px solid #e0e0e0;
133
+ display: flex; gap: 10px; align-items: center;
134
+ }
135
+ .chat-input {
136
+ flex: 1; padding: 12px 16px; border: 1px solid #e0e0e0;
137
+ border-radius: 25px; font-size: 14px; outline: none;
138
+ }
139
+ .chat-input:focus { border-color: #25d366; box-shadow: 0 0 0 2px rgba(37, 211, 102, 0.2); }
140
+ .send-button {
141
+ background: #25d366; color: white; border: none; border-radius: 50%;
142
+ width: 45px; height: 45px; cursor: pointer; display: flex;
143
+ align-items: center; justify-content: center;
144
+ }
145
+ .send-button:hover { background: #128c7e; }
146
+ .send-button:disabled { background: #ccc; cursor: not-allowed; }
147
+ .welcome-message {
148
+ text-align: center; color: #666; font-size: 13px;
149
+ padding: 20px; border-radius: 10px; background: rgba(255,255,255,0.7);
150
+ }
151
+ .typing-indicator {
152
+ display: none; align-items: center; gap: 5px; padding: 12px 16px;
153
+ background: white; border-radius: 15px; align-self: flex-start;
154
+ }
155
+ .typing-dot {
156
+ width: 8px; height: 8px; background: #999; border-radius: 50%;
157
+ animation: typing 1.4s infinite;
158
+ }
159
+ .typing-dot:nth-child(2) { animation-delay: 0.2s; }
160
+ .typing-dot:nth-child(3) { animation-delay: 0.4s; }
161
+ @keyframes typing { 0%, 60%, 100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
162
+ @keyframes slideIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
163
+ @media (max-width: 480px) { .chat-container { width: 100vw; height: 100vh; border-radius: 0; } }
164
  </style>
165
  </head>
166
  <body>
167
+ <div class="chat-container">
168
+ <div class="chat-header">
169
+ <h1>🤖 LyonPoy AI Chat</h1>
170
+ <select class="model-selector" id="modelSelect">
171
+ <option value="gpt-2">GPT-2 (General)</option>
 
172
  <option value="tinny-llama">Tinny Llama</option>
173
+ <option value="pythia">Pythia</option>
174
+ <option value="gpt-neo">GPT-Neo</option>
175
+ <option value="distil-gpt-2">DistilGPT-2</option>
176
+ <option value="gpt-2-tinny">GPT-2 Tinny</option>
177
  <option value="bert-tinny">BERT Tinny</option>
178
  <option value="albert-base-v2">ALBERT Base V2</option>
179
  <option value="distilbert-base-uncased">DistilBERT</option>
180
  <option value="electra-small">ELECTRA Small</option>
181
  <option value="t5-small">T5 Small</option>
 
 
182
  </select>
183
  </div>
184
+ <div class="chat-messages" id="chatMessages">
185
+ <div class="welcome-message">
186
+ 👋 Halo! Saya LyonPoy AI Assistant.<br>
187
+ Pilih model di atas dan mulai chat dengan saya!
188
+ </div>
189
+ </div>
190
+ <div class="typing-indicator" id="typingIndicator">
191
+ <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
192
+ </div>
193
+ <div class="chat-input-container">
194
+ <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan..." maxlength="500">
195
+ <button class="send-button" id="sendButton">➤</button>
196
  </div>
197
  </div>
 
198
  <script>
199
+ const chatMessages = document.getElementById('chatMessages');
200
+ const chatInput = document.getElementById('chatInput');
201
+ const sendButton = document.getElementById('sendButton');
202
+ const modelSelect = document.getElementById('modelSelect');
203
+ const typingIndicator = document.getElementById('typingIndicator');
204
+
205
+ function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
206
 
207
  function addMessage(content, isUser = false) {
208
+ const messageDiv = document.createElement('div');
209
+ messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
210
+ const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
211
+ messageDiv.innerHTML = `${content}<div class="message-time">${time}</div>`;
212
+ chatMessages.appendChild(messageDiv);
213
+ scrollToBottom();
214
  }
215
+
216
+ function showTyping() { typingIndicator.style.display = 'flex'; scrollToBottom(); }
217
+ function hideTyping() { typingIndicator.style.display = 'none'; }
218
+
219
  async function sendMessage() {
220
+ const message = chatInput.value.trim();
221
  if (!message) return;
222
+
223
+ chatInput.disabled = true; sendButton.disabled = true;
224
+ addMessage(message, true); chatInput.value = ''; showTyping();
225
+
 
 
 
226
  try {
227
  const response = await fetch('/chat', {
228
  method: 'POST',
229
  headers: { 'Content-Type': 'application/json' },
230
+ body: JSON.stringify({ message: message, model: modelSelect.value })
 
 
 
231
  });
 
232
  const data = await response.json();
233
+ hideTyping();
 
 
 
 
234
  if (data.status === 'success') {
235
+ addMessage(data.response);
236
  } else {
237
+ addMessage('❌ Maaf, terjadi kesalahan. Coba lagi nanti.');
238
  }
239
  } catch (error) {
240
+ hideTyping();
241
+ addMessage('❌ Tidak dapat terhubung ke server.');
242
  }
243
+ chatInput.disabled = false; sendButton.disabled = false; chatInput.focus();
244
  }
245
+
246
+ sendButton.addEventListener('click', sendMessage);
247
+ chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
248
+ modelSelect.addEventListener('change', () => {
249
+ const modelName = modelSelect.options[modelSelect.selectedIndex].text;
250
+ addMessage(`🔄 Model diubah ke: ${modelName}`);
251
  });
252
+ window.addEventListener('load', () => chatInput.focus());
 
 
253
  </script>
254
  </body>
255
  </html>
256
  '''
257
  return HTMLResponse(content=html_content)
258
 
259
+ # Chat API
260
  @app.post("/chat")
261
+ async def chat(request: ChatRequest):
262
  try:
263
  model_id = request.model.lower()
264
  if model_id not in MODELS:
265
  raise HTTPException(status_code=400, detail="Model tidak tersedia")
266
 
267
+ model_config = MODELS[model_id]
 
268
 
269
+ # Load model jika belum ada
270
+ if model_id not in app.state.pipelines:
271
+ print(f"⏳ Loading {model_config['name']}...")
272
+ device = 0 if torch.cuda.is_available() else -1
273
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
274
+
275
+ app.state.pipelines[model_id] = pipeline(
276
+ task=model_config["task"],
277
+ model=model_config["model_path"],
278
+ device=device,
279
+ torch_dtype=dtype
280
+ )
281
 
282
+ pipe = app.state.pipelines[model_id]
 
283
 
284
+ # Process berdasarkan task
285
+ if model_config["task"] == "text-generation":
286
+ result = pipe(
287
+ request.message,
288
+ max_length=min(len(request.message.split()) + 50, 200),
289
+ temperature=0.7,
290
+ do_sample=True,
291
+ pad_token_id=pipe.tokenizer.eos_token_id
292
+ )[0]['generated_text']
293
+
294
+ # Clean output
295
+ if result.startswith(request.message):
296
+ result = result[len(request.message):].strip()
297
+
298
+ elif model_config["task"] == "text-classification":
299
+ output = pipe(request.message)[0]
300
+ result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})"
301
+
302
+ elif model_config["task"] == "text2text-generation":
303
+ result = pipe(request.message, max_length=150)[0]['generated_text']
304
 
305
+ return {"response": result, "model": model_config["name"], "status": "success"}
 
 
 
 
 
 
 
 
 
 
 
306
 
 
 
 
 
307
  except Exception as e:
308
+ print(f" Error: {e}")
309
+ raise HTTPException(status_code=500, detail="Terjadi kesalahan")
310
 
311
+ # Health check
312
  @app.get("/health")
313
  async def health():
314
+ return {"status": "healthy", "gpu": torch.cuda.is_available()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
+ # Run app
317
  if __name__ == "__main__":
318
  port = int(os.environ.get("PORT", 7860))
319
+ uvicorn.run(app, host="0.0.0.0", port=port)