Lyon28 commited on
Commit
24088e0
·
verified ·
1 Parent(s): 3d635c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +304 -213
app.py CHANGED
@@ -1,88 +1,220 @@
1
  import os
2
  import uvicorn
3
- from fastapi import FastAPI, HTTPException
 
 
4
  from fastapi.responses import HTMLResponse
5
- from fastapi.staticfiles import StaticFiles
6
  from pydantic import BaseModel
7
- from transformers import pipeline
8
  import torch
9
- from typing import Optional
 
 
 
 
 
 
10
 
11
  # Inisialisasi FastAPI
12
- app = FastAPI(title="LyonPoy AI Chat")
13
 
14
- # All 11 models configuration
15
  MODELS = {
 
 
 
 
 
 
 
 
 
 
 
 
16
  "tinny-llama": {
17
  "name": "Tinny Llama",
18
  "model_path": "Lyon28/Tinny-Llama",
19
- "task": "text-generation"
 
20
  },
21
- "pythia": {
22
- "name": "Pythia",
23
- "model_path": "Lyon28/Pythia",
24
- "task": "text-generation"
 
25
  },
26
  "bert-tinny": {
27
  "name": "BERT Tinny",
28
  "model_path": "Lyon28/Bert-Tinny",
29
- "task": "text-classification"
 
30
  },
31
  "albert-base-v2": {
32
  "name": "ALBERT Base V2",
33
  "model_path": "Lyon28/Albert-Base-V2",
34
- "task": "text-classification"
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
  "t5-small": {
37
  "name": "T5 Small",
38
  "model_path": "Lyon28/T5-Small",
39
- "task": "text2text-generation"
 
40
  },
41
- "gpt-2": {
42
- "name": "GPT-2",
43
- "model_path": "Lyon28/GPT-2",
44
- "task": "text-generation"
 
45
  },
46
  "gpt-neo": {
47
  "name": "GPT-Neo",
48
  "model_path": "Lyon28/GPT-Neo",
49
- "task": "text-generation"
50
- },
51
- "distilbert-base-uncased": {
52
- "name": "DistilBERT",
53
- "model_path": "Lyon28/Distilbert-Base-Uncased",
54
- "task": "text-classification"
55
- },
56
- "distil-gpt-2": {
57
- "name": "DistilGPT-2",
58
- "model_path": "Lyon28/Distil_GPT-2",
59
- "task": "text-generation"
60
- },
61
- "gpt-2-tinny": {
62
- "name": "GPT-2 Tinny",
63
- "model_path": "Lyon28/GPT-2-Tinny",
64
- "task": "text-generation"
65
- },
66
- "electra-small": {
67
- "name": "ELECTRA Small",
68
- "model_path": "Lyon28/Electra-Small",
69
- "task": "text-classification"
70
  }
71
  }
72
 
73
  class ChatRequest(BaseModel):
74
  message: str
75
- model: Optional[str] = "gpt-2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # Startup
78
  @app.on_event("startup")
79
  async def load_models():
80
- app.state.pipelines = {}
81
- os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
82
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
83
- print("🤖 LyonPoy AI Chat Ready!")
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- # Frontend route
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  @app.get("/", response_class=HTMLResponse)
87
  async def get_frontend():
88
  html_content = '''
@@ -91,229 +223,188 @@ async def get_frontend():
91
  <head>
92
  <meta charset="UTF-8">
93
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
94
- <title>LyonPoy AI Chat</title>
95
  <style>
96
  * { margin: 0; padding: 0; box-sizing: border-box; }
97
- body {
98
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
99
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
100
- height: 100vh; display: flex; justify-content: center; align-items: center;
101
- }
102
- .chat-container {
103
- width: 400px; height: 600px; background: #fff; border-radius: 15px;
104
- box-shadow: 0 20px 40px rgba(0,0,0,0.15); display: flex; flex-direction: column; overflow: hidden;
105
- }
106
- .chat-header {
107
- background: linear-gradient(135deg, #25d366, #128c7e); color: white;
108
- padding: 20px; text-align: center;
109
- }
110
- .chat-header h1 { font-size: 18px; font-weight: 600; margin-bottom: 8px; }
111
- .model-selector {
112
- background: rgba(255,255,255,0.2); border: none; color: white;
113
- padding: 8px 12px; border-radius: 20px; font-size: 12px; cursor: pointer;
114
- }
115
- .chat-messages {
116
- flex: 1; padding: 20px; overflow-y: auto; background: #f0f0f0;
117
- display: flex; flex-direction: column; gap: 15px;
118
- }
119
- .message {
120
- max-width: 80%; padding: 12px 16px; border-radius: 15px;
121
- font-size: 14px; line-height: 1.4; animation: slideIn 0.3s ease;
122
- }
123
- .message.user {
124
- background: #25d366; color: white; align-self: flex-end; border-bottom-right-radius: 5px;
125
- }
126
- .message.bot {
127
- background: white; color: #333; align-self: flex-start;
128
- border-bottom-left-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);
129
- }
130
- .message-time { font-size: 11px; opacity: 0.7; margin-top: 5px; }
131
- .chat-input-container {
132
- padding: 20px; background: white; border-top: 1px solid #e0e0e0;
133
- display: flex; gap: 10px; align-items: center;
134
- }
135
- .chat-input {
136
- flex: 1; padding: 12px 16px; border: 1px solid #e0e0e0;
137
- border-radius: 25px; font-size: 14px; outline: none;
138
- }
139
- .chat-input:focus { border-color: #25d366; box-shadow: 0 0 0 2px rgba(37, 211, 102, 0.2); }
140
- .send-button {
141
- background: #25d366; color: white; border: none; border-radius: 50%;
142
- width: 45px; height: 45px; cursor: pointer; display: flex;
143
- align-items: center; justify-content: center;
144
- }
145
- .send-button:hover { background: #128c7e; }
146
- .send-button:disabled { background: #ccc; cursor: not-allowed; }
147
- .welcome-message {
148
- text-align: center; color: #666; font-size: 13px;
149
- padding: 20px; border-radius: 10px; background: rgba(255,255,255,0.7);
150
- }
151
- .typing-indicator {
152
- display: none; align-items: center; gap: 5px; padding: 12px 16px;
153
- background: white; border-radius: 15px; align-self: flex-start;
154
- }
155
- .typing-dot {
156
- width: 8px; height: 8px; background: #999; border-radius: 50%;
157
- animation: typing 1.4s infinite;
158
- }
159
- .typing-dot:nth-child(2) { animation-delay: 0.2s; }
160
- .typing-dot:nth-child(3) { animation-delay: 0.4s; }
161
- @keyframes typing { 0%, 60%, 100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
162
- @keyframes slideIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
163
- @media (max-width: 480px) { .chat-container { width: 100vw; height: 100vh; border-radius: 0; } }
164
  </style>
165
  </head>
166
  <body>
167
- <div class="chat-container">
168
- <div class="chat-header">
169
- <h1>🤖 LyonPoy AI Chat</h1>
170
- <select class="model-selector" id="modelSelect">
171
- <option value="gpt-2">GPT-2 (General)</option>
172
- <option value="tinny-llama">Tinny Llama</option>
173
- <option value="pythia">Pythia</option>
174
- <option value="gpt-neo">GPT-Neo</option>
175
- <option value="distil-gpt-2">DistilGPT-2</option>
176
  <option value="gpt-2-tinny">GPT-2 Tinny</option>
 
 
177
  <option value="bert-tinny">BERT Tinny</option>
178
  <option value="albert-base-v2">ALBERT Base V2</option>
179
  <option value="distilbert-base-uncased">DistilBERT</option>
180
  <option value="electra-small">ELECTRA Small</option>
181
  <option value="t5-small">T5 Small</option>
 
 
182
  </select>
183
  </div>
184
- <div class="chat-messages" id="chatMessages">
185
- <div class="welcome-message">
186
- 👋 Halo! Saya LyonPoy AI Assistant.<br>
187
- Pilih model di atas dan mulai chat dengan saya!
188
- </div>
189
- </div>
190
- <div class="typing-indicator" id="typingIndicator">
191
- <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
192
- </div>
193
- <div class="chat-input-container">
194
- <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan..." maxlength="500">
195
- <button class="send-button" id="sendButton">➤</button>
196
  </div>
197
  </div>
 
198
  <script>
199
- const chatMessages = document.getElementById('chatMessages');
200
- const chatInput = document.getElementById('chatInput');
201
- const sendButton = document.getElementById('sendButton');
202
- const modelSelect = document.getElementById('modelSelect');
203
- const typingIndicator = document.getElementById('typingIndicator');
204
-
205
- function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
206
 
207
  function addMessage(content, isUser = false) {
208
- const messageDiv = document.createElement('div');
209
- messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
210
- const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
211
- messageDiv.innerHTML = `${content}<div class="message-time">${time}</div>`;
212
- chatMessages.appendChild(messageDiv);
213
- scrollToBottom();
214
  }
215
-
216
- function showTyping() { typingIndicator.style.display = 'flex'; scrollToBottom(); }
217
- function hideTyping() { typingIndicator.style.display = 'none'; }
218
-
219
  async function sendMessage() {
220
- const message = chatInput.value.trim();
221
  if (!message) return;
222
-
223
- chatInput.disabled = true; sendButton.disabled = true;
224
- addMessage(message, true); chatInput.value = ''; showTyping();
225
-
 
 
 
226
  try {
227
  const response = await fetch('/chat', {
228
  method: 'POST',
229
  headers: { 'Content-Type': 'application/json' },
230
- body: JSON.stringify({ message: message, model: modelSelect.value })
 
 
 
231
  });
 
232
  const data = await response.json();
233
- hideTyping();
 
 
 
 
234
  if (data.status === 'success') {
235
- addMessage(data.response);
236
  } else {
237
- addMessage('❌ Maaf, terjadi kesalahan. Coba lagi nanti.');
238
  }
239
  } catch (error) {
240
- hideTyping();
241
- addMessage('❌ Tidak dapat terhubung ke server.');
242
  }
243
- chatInput.disabled = false; sendButton.disabled = false; chatInput.focus();
244
  }
245
-
246
- sendButton.addEventListener('click', sendMessage);
247
- chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
248
- modelSelect.addEventListener('change', () => {
249
- const modelName = modelSelect.options[modelSelect.selectedIndex].text;
250
- addMessage(`🔄 Model diubah ke: ${modelName}`);
251
  });
252
- window.addEventListener('load', () => chatInput.focus());
 
 
253
  </script>
254
  </body>
255
  </html>
256
  '''
257
  return HTMLResponse(content=html_content)
258
 
259
- # Chat API
260
  @app.post("/chat")
261
- async def chat(request: ChatRequest):
262
  try:
263
  model_id = request.model.lower()
264
  if model_id not in MODELS:
265
  raise HTTPException(status_code=400, detail="Model tidak tersedia")
266
 
267
- model_config = MODELS[model_id]
 
268
 
269
- # Load model jika belum ada
270
- if model_id not in app.state.pipelines:
271
- print(f"⏳ Loading {model_config['name']}...")
272
- device = 0 if torch.cuda.is_available() else -1
273
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
274
-
275
- app.state.pipelines[model_id] = pipeline(
276
- task=model_config["task"],
277
- model=model_config["model_path"],
278
- device=device,
279
- torch_dtype=dtype
280
- )
281
 
282
- pipe = app.state.pipelines[model_id]
 
283
 
284
- # Process berdasarkan task
285
- if model_config["task"] == "text-generation":
286
- result = pipe(
287
- request.message,
288
- max_length=min(len(request.message.split()) + 50, 200),
289
- temperature=0.7,
290
- do_sample=True,
291
- pad_token_id=pipe.tokenizer.eos_token_id
292
- )[0]['generated_text']
293
-
294
- # Clean output
295
- if result.startswith(request.message):
296
- result = result[len(request.message):].strip()
297
-
298
- elif model_config["task"] == "text-classification":
299
- output = pipe(request.message)[0]
300
- result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})"
301
-
302
- elif model_config["task"] == "text2text-generation":
303
- result = pipe(request.message, max_length=150)[0]['generated_text']
304
-
305
- return {"response": result, "model": model_config["name"], "status": "success"}
306
 
 
 
307
  except Exception as e:
308
- print(f" Error: {e}")
309
  raise HTTPException(status_code=500, detail="Terjadi kesalahan")
310
 
311
- # Health check
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  @app.get("/health")
313
  async def health():
314
- return {"status": "healthy", "gpu": torch.cuda.is_available()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
- # Run app
317
  if __name__ == "__main__":
318
  port = int(os.environ.get("PORT", 7860))
319
- uvicorn.run(app, host="0.0.0.0", port=port)
 
 
 
 
 
 
 
1
  import os
2
  import uvicorn
3
+ import asyncio
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
6
  from fastapi.responses import HTMLResponse
 
7
  from pydantic import BaseModel
8
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
9
  import torch
10
+ from typing import Optional, Dict
11
+ import time
12
+ import logging
13
+
14
+ # Setup logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
 
18
  # Inisialisasi FastAPI
19
+ app = FastAPI(title="LyonPoy AI Chat - Optimized")
20
 
21
+ # Optimized model configuration - prioritize smaller, faster models
22
  MODELS = {
23
+ "distil-gpt-2": {
24
+ "name": "DistilGPT-2",
25
+ "model_path": "Lyon28/Distil_GPT-2",
26
+ "task": "text-generation",
27
+ "priority": 1 # Highest priority - smallest model
28
+ },
29
+ "gpt-2-tinny": {
30
+ "name": "GPT-2 Tinny",
31
+ "model_path": "Lyon28/GPT-2-Tinny",
32
+ "task": "text-generation",
33
+ "priority": 2
34
+ },
35
  "tinny-llama": {
36
  "name": "Tinny Llama",
37
  "model_path": "Lyon28/Tinny-Llama",
38
+ "task": "text-generation",
39
+ "priority": 3
40
  },
41
+ "gpt-2": {
42
+ "name": "GPT-2",
43
+ "model_path": "Lyon28/GPT-2",
44
+ "task": "text-generation",
45
+ "priority": 4
46
  },
47
  "bert-tinny": {
48
  "name": "BERT Tinny",
49
  "model_path": "Lyon28/Bert-Tinny",
50
+ "task": "text-classification",
51
+ "priority": 5
52
  },
53
  "albert-base-v2": {
54
  "name": "ALBERT Base V2",
55
  "model_path": "Lyon28/Albert-Base-V2",
56
+ "task": "text-classification",
57
+ "priority": 6
58
+ },
59
+ "distilbert-base-uncased": {
60
+ "name": "DistilBERT",
61
+ "model_path": "Lyon28/Distilbert-Base-Uncased",
62
+ "task": "text-classification",
63
+ "priority": 7
64
+ },
65
+ "electra-small": {
66
+ "name": "ELECTRA Small",
67
+ "model_path": "Lyon28/Electra-Small",
68
+ "task": "text-classification",
69
+ "priority": 8
70
  },
71
  "t5-small": {
72
  "name": "T5 Small",
73
  "model_path": "Lyon28/T5-Small",
74
+ "task": "text2text-generation",
75
+ "priority": 9
76
  },
77
+ "pythia": {
78
+ "name": "Pythia",
79
+ "model_path": "Lyon28/Pythia",
80
+ "task": "text-generation",
81
+ "priority": 10
82
  },
83
  "gpt-neo": {
84
  "name": "GPT-Neo",
85
  "model_path": "Lyon28/GPT-Neo",
86
+ "task": "text-generation",
87
+ "priority": 11 # Largest model - lowest priority
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
  }
90
 
91
  class ChatRequest(BaseModel):
92
  message: str
93
+ model: Optional[str] = "distil-gpt-2" # Default to fastest model
94
+
95
+ # Global state
96
+ app.state.pipelines = {}
97
+ app.state.loading_models = set()
98
+ app.state.executor = ThreadPoolExecutor(max_workers=2)
99
+
100
+ # Optimized model loading
101
+ async def load_model_async(model_id: str):
102
+ """Load model in background thread"""
103
+ if model_id in app.state.loading_models:
104
+ return False
105
+
106
+ app.state.loading_models.add(model_id)
107
+
108
+ try:
109
+ model_config = MODELS[model_id]
110
+ logger.info(f"🔄 Loading {model_config['name']}...")
111
+
112
+ # Load in thread to avoid blocking
113
+ loop = asyncio.get_event_loop()
114
+
115
+ def load_model():
116
+ device = 0 if torch.cuda.is_available() else -1
117
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
118
+
119
+ return pipeline(
120
+ task=model_config["task"],
121
+ model=model_config["model_path"],
122
+ device=device,
123
+ torch_dtype=dtype,
124
+ use_fast=True,
125
+ trust_remote_code=True,
126
+ low_cpu_mem_usage=True,
127
+ # Optimization for faster inference
128
+ pad_token_id=50256 if "gpt" in model_id else None
129
+ )
130
+
131
+ pipeline_obj = await loop.run_in_executor(app.state.executor, load_model)
132
+ app.state.pipelines[model_id] = pipeline_obj
133
+ logger.info(f"✅ {model_config['name']} loaded successfully")
134
+ return True
135
+
136
+ except Exception as e:
137
+ logger.error(f"❌ Failed to load {model_id}: {e}")
138
+ return False
139
+ finally:
140
+ app.state.loading_models.discard(model_id)
141
 
 
142
  @app.on_event("startup")
143
  async def load_models():
144
+ """Load high-priority models on startup"""
145
+ os.environ['HF_HOME'] = './cache/huggingface' # Persistent cache
146
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
147
+
148
+ # Pre-load top 3 fastest models
149
+ priority_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])[:3]
150
+
151
+ tasks = []
152
+ for model_id in priority_models:
153
+ task = asyncio.create_task(load_model_async(model_id))
154
+ tasks.append(task)
155
+
156
+ # Load models concurrently
157
+ await asyncio.gather(*tasks, return_exceptions=True)
158
+ logger.info("🚀 LyonPoy AI Chat Ready!")
159
 
160
+ # Optimized inference
161
+ async def run_inference(model_id: str, message: str):
162
+ """Run inference in background thread"""
163
+ if model_id not in app.state.pipelines:
164
+ # Try to load model if not available
165
+ success = await load_model_async(model_id)
166
+ if not success:
167
+ raise HTTPException(status_code=503, detail=f"Model {model_id} unavailable")
168
+
169
+ pipe = app.state.pipelines[model_id]
170
+ model_config = MODELS[model_id]
171
+
172
+ loop = asyncio.get_event_loop()
173
+
174
+ def inference():
175
+ start_time = time.time()
176
+
177
+ try:
178
+ if model_config["task"] == "text-generation":
179
+ # Optimized generation parameters
180
+ result = pipe(
181
+ message,
182
+ max_new_tokens=min(50, 150 - len(message.split())), # Shorter responses
183
+ temperature=0.7,
184
+ do_sample=True,
185
+ top_p=0.9,
186
+ top_k=50,
187
+ repetition_penalty=1.1,
188
+ pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256
189
+ )[0]['generated_text']
190
+
191
+ # Clean output
192
+ if result.startswith(message):
193
+ result = result[len(message):].strip()
194
+
195
+ # Limit response length
196
+ if len(result) > 200:
197
+ result = result[:200] + "..."
198
+
199
+ elif model_config["task"] == "text-classification":
200
+ output = pipe(message)[0]
201
+ result = f"Analisis: {output['label']} (Keyakinan: {output['score']:.2f})"
202
+
203
+ elif model_config["task"] == "text2text-generation":
204
+ result = pipe(message, max_length=100, num_beams=2)[0]['generated_text']
205
+
206
+ inference_time = time.time() - start_time
207
+ logger.info(f"⚡ Inference time: {inference_time:.2f}s for {model_config['name']}")
208
+
209
+ return result
210
+
211
+ except Exception as e:
212
+ logger.error(f"Inference error: {e}")
213
+ raise e
214
+
215
+ return await loop.run_in_executor(app.state.executor, inference)
216
+
217
+ # Frontend route - simplified HTML
218
  @app.get("/", response_class=HTMLResponse)
219
  async def get_frontend():
220
  html_content = '''
 
223
  <head>
224
  <meta charset="UTF-8">
225
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
226
+ <title>LyonPoy AI Chat - Fast Mode</title>
227
  <style>
228
  * { margin: 0; padding: 0; box-sizing: border-box; }
229
+ body { font-family: system-ui; background: #f5f5f5; padding: 20px; }
230
+ .container { max-width: 600px; margin: 0 auto; background: white; border-radius: 10px; overflow: hidden; }
231
+ .header { background: #007bff; color: white; padding: 15px; }
232
+ .chat { height: 400px; overflow-y: auto; padding: 15px; background: #fafafa; }
233
+ .message { margin: 10px 0; padding: 8px 12px; border-radius: 8px; }
234
+ .user { background: #007bff; color: white; margin-left: 20%; }
235
+ .bot { background: white; border: 1px solid #ddd; margin-right: 20%; }
236
+ .input-area { padding: 15px; display: flex; gap: 10px; }
237
+ input { flex: 1; padding: 10px; border: 1px solid #ddd; border-radius: 5px; }
238
+ button { padding: 10px 15px; background: #007bff; color: white; border: none; border-radius: 5px; cursor: pointer; }
239
+ select { padding: 5px; margin-left: 10px; }
240
+ .loading { color: #666; font-style: italic; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  </style>
242
  </head>
243
  <body>
244
+ <div class="container">
245
+ <div class="header">
246
+ <h1>🚀 LyonPoy AI - Fast Mode</h1>
247
+ <select id="model">
248
+ <option value="distil-gpt-2">DistilGPT-2 (Fastest)</option>
 
 
 
 
249
  <option value="gpt-2-tinny">GPT-2 Tinny</option>
250
+ <option value="tinny-llama">Tinny Llama</option>
251
+ <option value="gpt-2">GPT-2</option>
252
  <option value="bert-tinny">BERT Tinny</option>
253
  <option value="albert-base-v2">ALBERT Base V2</option>
254
  <option value="distilbert-base-uncased">DistilBERT</option>
255
  <option value="electra-small">ELECTRA Small</option>
256
  <option value="t5-small">T5 Small</option>
257
+ <option value="pythia">Pythia</option>
258
+ <option value="gpt-neo">GPT-Neo (Slowest)</option>
259
  </select>
260
  </div>
261
+ <div class="chat" id="chat"></div>
262
+ <div class="input-area">
263
+ <input type="text" id="message" placeholder="Ketik pesan..." maxlength="200">
264
+ <button onclick="sendMessage()">Kirim</button>
 
 
 
 
 
 
 
 
265
  </div>
266
  </div>
267
+
268
  <script>
269
+ const chat = document.getElementById('chat');
270
+ const messageInput = document.getElementById('message');
271
+ const modelSelect = document.getElementById('model');
 
 
 
 
272
 
273
  function addMessage(content, isUser = false) {
274
+ const div = document.createElement('div');
275
+ div.className = `message ${isUser ? 'user' : 'bot'}`;
276
+ div.textContent = content;
277
+ chat.appendChild(div);
278
+ chat.scrollTop = chat.scrollHeight;
 
279
  }
280
+
 
 
 
281
  async function sendMessage() {
282
+ const message = messageInput.value.trim();
283
  if (!message) return;
284
+
285
+ addMessage(message, true);
286
+ messageInput.value = '';
287
+ addMessage('⏳ Thinking...', false);
288
+
289
+ const startTime = Date.now();
290
+
291
  try {
292
  const response = await fetch('/chat', {
293
  method: 'POST',
294
  headers: { 'Content-Type': 'application/json' },
295
+ body: JSON.stringify({
296
+ message: message,
297
+ model: modelSelect.value
298
+ })
299
  });
300
+
301
  const data = await response.json();
302
+ const responseTime = ((Date.now() - startTime) / 1000).toFixed(1);
303
+
304
+ // Remove loading message
305
+ chat.removeChild(chat.lastElementChild);
306
+
307
  if (data.status === 'success') {
308
+ addMessage(`${data.response} (${responseTime}s)`, false);
309
  } else {
310
+ addMessage('❌ Error occurred', false);
311
  }
312
  } catch (error) {
313
+ chat.removeChild(chat.lastElementChild);
314
+ addMessage('❌ Connection error', false);
315
  }
 
316
  }
317
+
318
+ messageInput.addEventListener('keypress', (e) => {
319
+ if (e.key === 'Enter') sendMessage();
 
 
 
320
  });
321
+
322
+ // Show welcome message
323
+ addMessage('👋 Halo! Pilih model dan mulai chat. Model DistilGPT-2 paling cepat!', false);
324
  </script>
325
  </body>
326
  </html>
327
  '''
328
  return HTMLResponse(content=html_content)
329
 
330
+ # Optimized chat endpoint
331
  @app.post("/chat")
332
+ async def chat(request: ChatRequest, background_tasks: BackgroundTasks):
333
  try:
334
  model_id = request.model.lower()
335
  if model_id not in MODELS:
336
  raise HTTPException(status_code=400, detail="Model tidak tersedia")
337
 
338
+ # Limit message length for faster processing
339
+ message = request.message[:200] # Max 200 chars
340
 
341
+ # Run inference
342
+ result = await run_inference(model_id, message)
 
 
 
 
 
 
 
 
 
 
343
 
344
+ # Load next priority model in background
345
+ background_tasks.add_task(preload_next_model, model_id)
346
 
347
+ return {
348
+ "response": result,
349
+ "model": MODELS[model_id]["name"],
350
+ "status": "success"
351
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
+ except HTTPException:
354
+ raise
355
  except Exception as e:
356
+ logger.error(f"Chat error: {e}")
357
  raise HTTPException(status_code=500, detail="Terjadi kesalahan")
358
 
359
+ async def preload_next_model(current_model: str):
360
+ """Preload next model in background"""
361
+ try:
362
+ # Find next unloaded model by priority
363
+ loaded_models = set(app.state.pipelines.keys())
364
+ all_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])
365
+
366
+ for model_id in all_models:
367
+ if model_id not in loaded_models and model_id not in app.state.loading_models:
368
+ await load_model_async(model_id)
369
+ break
370
+ except Exception as e:
371
+ logger.error(f"Background loading error: {e}")
372
+
373
+ # Health check with model status
374
  @app.get("/health")
375
  async def health():
376
+ loaded_models = list(app.state.pipelines.keys())
377
+ return {
378
+ "status": "healthy",
379
+ "gpu": torch.cuda.is_available(),
380
+ "loaded_models": loaded_models,
381
+ "loading_models": list(app.state.loading_models)
382
+ }
383
+
384
+ # Model status endpoint
385
+ @app.get("/models")
386
+ async def get_models():
387
+ models_status = {}
388
+ for model_id, config in MODELS.items():
389
+ models_status[model_id] = {
390
+ "name": config["name"],
391
+ "loaded": model_id in app.state.pipelines,
392
+ "loading": model_id in app.state.loading_models,
393
+ "priority": config["priority"]
394
+ }
395
+ return models_status
396
+
397
+ # Cleanup on shutdown
398
+ @app.on_event("shutdown")
399
+ async def cleanup():
400
+ app.state.executor.shutdown(wait=True)
401
 
 
402
  if __name__ == "__main__":
403
  port = int(os.environ.get("PORT", 7860))
404
+ uvicorn.run(
405
+ app,
406
+ host="0.0.0.0",
407
+ port=port,
408
+ log_level="info",
409
+ access_log=False # Disable access log for better performance
410
+ )