Lyon28 commited on
Commit
0db9e1d
Β·
verified Β·
1 Parent(s): 138b76f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +407 -137
app.py CHANGED
@@ -2,87 +2,125 @@ import os
2
  import uvicorn
3
  from fastapi import FastAPI, HTTPException
4
  from fastapi.responses import HTMLResponse
5
- from fastapi.staticfiles import StaticFiles
6
  from pydantic import BaseModel
7
- from transformers import pipeline
8
  import torch
9
  from typing import Optional
 
 
 
10
 
11
  # Inisialisasi FastAPI
12
- app = FastAPI(title="LyonPoy AI Chat")
13
 
14
- # All 11 models configuration
 
 
 
15
  MODELS = {
16
- "tinny-llama": {
17
- "name": "Tinny Llama",
18
- "model_path": "Lyon28/Tinny-Llama",
19
- "task": "text-generation"
 
 
20
  },
21
- "pythia": {
22
- "name": "Pythia",
23
- "model_path": "Lyon28/Pythia",
24
- "task": "text-generation"
 
 
25
  },
26
  "bert-tinny": {
27
- "name": "BERT Tinny",
28
  "model_path": "Lyon28/Bert-Tinny",
29
- "task": "text-classification"
 
 
 
 
 
 
 
 
 
30
  },
31
  "albert-base-v2": {
32
- "name": "ALBERT Base V2",
33
  "model_path": "Lyon28/Albert-Base-V2",
34
- "task": "text-classification"
 
 
 
 
 
 
 
 
 
35
  },
36
  "t5-small": {
37
- "name": "T5 Small",
38
  "model_path": "Lyon28/T5-Small",
39
- "task": "text2text-generation"
 
 
40
  },
41
  "gpt-2": {
42
- "name": "GPT-2",
43
  "model_path": "Lyon28/GPT-2",
44
- "task": "text-generation"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  "gpt-neo": {
47
  "name": "GPT-Neo",
48
  "model_path": "Lyon28/GPT-Neo",
49
- "task": "text-generation"
50
- },
51
- "distilbert-base-uncased": {
52
- "name": "DistilBERT",
53
- "model_path": "Lyon28/Distilbert-Base-Uncased",
54
- "task": "text-classification"
55
- },
56
- "distil-gpt-2": {
57
- "name": "DistilGPT-2",
58
- "model_path": "Lyon28/Distil_GPT-2",
59
- "task": "text-generation"
60
- },
61
- "gpt-2-tinny": {
62
- "name": "GPT-2 Tinny",
63
- "model_path": "Lyon28/GPT-2-Tinny",
64
- "task": "text-generation"
65
- },
66
- "electra-small": {
67
- "name": "ELECTRA Small",
68
- "model_path": "Lyon28/Electra-Small",
69
- "task": "text-classification"
70
  }
71
  }
72
 
73
  class ChatRequest(BaseModel):
74
  message: str
75
- model: Optional[str] = "gpt-2"
76
 
77
- # Startup
78
  @app.on_event("startup")
79
  async def load_models():
80
  app.state.pipelines = {}
 
 
 
 
 
 
 
 
 
81
  os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
 
82
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
83
- print("πŸ€– LyonPoy AI Chat Ready!")
 
84
 
85
- # Frontend route
86
  @app.get("/", response_class=HTMLResponse)
87
  async def get_frontend():
88
  html_content = '''
@@ -91,107 +129,133 @@ async def get_frontend():
91
  <head>
92
  <meta charset="UTF-8">
93
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
94
- <title>LyonPoy AI Chat</title>
95
  <style>
96
  * { margin: 0; padding: 0; box-sizing: border-box; }
97
  body {
98
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
99
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
100
  height: 100vh; display: flex; justify-content: center; align-items: center;
101
  }
102
  .chat-container {
103
  width: 400px; height: 600px; background: #fff; border-radius: 15px;
104
- box-shadow: 0 20px 40px rgba(0,0,0,0.15); display: flex; flex-direction: column; overflow: hidden;
105
  }
106
  .chat-header {
107
- background: linear-gradient(135deg, #25d366, #128c7e); color: white;
108
- padding: 20px; text-align: center;
 
 
 
 
 
109
  }
110
- .chat-header h1 { font-size: 18px; font-weight: 600; margin-bottom: 8px; }
111
  .model-selector {
112
  background: rgba(255,255,255,0.2); border: none; color: white;
113
- padding: 8px 12px; border-radius: 20px; font-size: 12px; cursor: pointer;
 
114
  }
115
  .chat-messages {
116
- flex: 1; padding: 20px; overflow-y: auto; background: #f0f0f0;
117
- display: flex; flex-direction: column; gap: 15px;
118
  }
119
  .message {
120
- max-width: 80%; padding: 12px 16px; border-radius: 15px;
121
- font-size: 14px; line-height: 1.4; animation: slideIn 0.3s ease;
122
  }
123
  .message.user {
124
- background: #25d366; color: white; align-self: flex-end; border-bottom-right-radius: 5px;
 
125
  }
126
  .message.bot {
127
  background: white; color: #333; align-self: flex-start;
128
- border-bottom-left-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
 
129
  }
130
- .message-time { font-size: 11px; opacity: 0.7; margin-top: 5px; }
 
 
131
  .chat-input-container {
132
- padding: 20px; background: white; border-top: 1px solid #e0e0e0;
133
- display: flex; gap: 10px; align-items: center;
134
  }
135
  .chat-input {
136
- flex: 1; padding: 12px 16px; border: 1px solid #e0e0e0;
137
- border-radius: 25px; font-size: 14px; outline: none;
138
  }
139
- .chat-input:focus { border-color: #25d366; box-shadow: 0 0 0 2px rgba(37, 211, 102, 0.2); }
140
  .send-button {
141
- background: #25d366; color: white; border: none; border-radius: 50%;
142
- width: 45px; height: 45px; cursor: pointer; display: flex;
143
- align-items: center; justify-content: center;
144
  }
145
- .send-button:hover { background: #128c7e; }
146
- .send-button:disabled { background: #ccc; cursor: not-allowed; }
147
  .welcome-message {
148
- text-align: center; color: #666; font-size: 13px;
149
- padding: 20px; border-radius: 10px; background: rgba(255,255,255,0.7);
150
  }
151
  .typing-indicator {
152
- display: none; align-items: center; gap: 5px; padding: 12px 16px;
153
- background: white; border-radius: 15px; align-self: flex-start;
154
  }
155
  .typing-dot {
156
- width: 8px; height: 8px; background: #999; border-radius: 50%;
157
- animation: typing 1.4s infinite;
 
 
 
 
 
 
 
 
 
 
 
158
  }
159
- .typing-dot:nth-child(2) { animation-delay: 0.2s; }
160
- .typing-dot:nth-child(3) { animation-delay: 0.4s; }
161
- @keyframes typing { 0%, 60%, 100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
162
- @keyframes slideIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
163
- @media (max-width: 480px) { .chat-container { width: 100vw; height: 100vh; border-radius: 0; } }
164
  </style>
165
  </head>
166
  <body>
167
  <div class="chat-container">
168
  <div class="chat-header">
169
- <h1>πŸ€– LyonPoy AI Chat</h1>
 
170
  <select class="model-selector" id="modelSelect">
171
- <option value="gpt-2">GPT-2 (General)</option>
 
 
 
 
 
 
 
172
  <option value="tinny-llama">Tinny Llama</option>
173
  <option value="pythia">Pythia</option>
174
  <option value="gpt-neo">GPT-Neo</option>
175
- <option value="distil-gpt-2">DistilGPT-2</option>
176
- <option value="gpt-2-tinny">GPT-2 Tinny</option>
177
- <option value="bert-tinny">BERT Tinny</option>
178
- <option value="albert-base-v2">ALBERT Base V2</option>
179
- <option value="distilbert-base-uncased">DistilBERT</option>
180
- <option value="electra-small">ELECTRA Small</option>
181
- <option value="t5-small">T5 Small</option>
182
  </select>
 
183
  </div>
184
  <div class="chat-messages" id="chatMessages">
185
  <div class="welcome-message">
186
- πŸ‘‹ Halo! Saya LyonPoy AI Assistant.<br>
187
- Pilih model di atas dan mulai chat dengan saya!
 
188
  </div>
189
  </div>
190
  <div class="typing-indicator" id="typingIndicator">
191
  <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
 
192
  </div>
193
  <div class="chat-input-container">
194
- <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan..." maxlength="500">
195
  <button class="send-button" id="sendButton">➀</button>
196
  </div>
197
  </div>
@@ -201,119 +265,325 @@ async def get_frontend():
201
  const sendButton = document.getElementById('sendButton');
202
  const modelSelect = document.getElementById('modelSelect');
203
  const typingIndicator = document.getElementById('typingIndicator');
 
 
 
 
204
 
205
- function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
 
 
 
 
 
 
 
 
206
 
207
- function addMessage(content, isUser = false) {
208
  const messageDiv = document.createElement('div');
209
  messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
210
  const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
211
- messageDiv.innerHTML = `${content}<div class="message-time">${time}</div>`;
 
 
 
 
 
 
 
 
 
 
212
  chatMessages.appendChild(messageDiv);
213
  scrollToBottom();
214
  }
215
 
216
- function showTyping() { typingIndicator.style.display = 'flex'; scrollToBottom(); }
217
- function hideTyping() { typingIndicator.style.display = 'none'; }
 
 
 
 
 
 
218
 
219
  async function sendMessage() {
220
  const message = chatInput.value.trim();
221
  if (!message) return;
222
 
223
- chatInput.disabled = true; sendButton.disabled = true;
224
- addMessage(message, true); chatInput.value = ''; showTyping();
 
 
 
 
 
 
 
225
 
226
  try {
227
  const response = await fetch('/chat', {
228
  method: 'POST',
229
  headers: { 'Content-Type': 'application/json' },
230
- body: JSON.stringify({ message: message, model: modelSelect.value })
 
 
 
231
  });
 
232
  const data = await response.json();
 
 
233
  hideTyping();
 
 
234
  if (data.status === 'success') {
235
- addMessage(data.response);
236
  } else {
237
- addMessage('❌ Maaf, terjadi kesalahan. Coba lagi nanti.');
238
  }
239
  } catch (error) {
 
240
  hideTyping();
241
- addMessage('❌ Tidak dapat terhubung ke server.');
 
 
242
  }
243
- chatInput.disabled = false; sendButton.disabled = false; chatInput.focus();
 
 
 
244
  }
245
 
 
246
  sendButton.addEventListener('click', sendMessage);
247
- chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
 
 
 
248
  modelSelect.addEventListener('change', () => {
249
- const modelName = modelSelect.options[modelSelect.selectedIndex].text;
250
- addMessage(`πŸ”„ Model diubah ke: ${modelName}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  });
252
- window.addEventListener('load', () => chatInput.focus());
253
  </script>
254
  </body>
255
  </html>
256
  '''
257
  return HTMLResponse(content=html_content)
258
 
259
- # Chat API
260
  @app.post("/chat")
261
  async def chat(request: ChatRequest):
 
 
262
  try:
263
  model_id = request.model.lower()
264
  if model_id not in MODELS:
265
- raise HTTPException(status_code=400, detail="Model tidak tersedia")
266
 
267
  model_config = MODELS[model_id]
268
 
269
- # Load model jika belum ada
270
  if model_id not in app.state.pipelines:
271
- print(f"⏳ Loading {model_config['name']}...")
272
- device = 0 if torch.cuda.is_available() else -1
273
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
274
 
275
- app.state.pipelines[model_id] = pipeline(
276
- task=model_config["task"],
277
- model=model_config["model_path"],
278
- device=device,
279
- torch_dtype=dtype
280
- )
 
 
 
 
 
 
 
 
 
 
281
 
282
  pipe = app.state.pipelines[model_id]
283
 
284
- # Process berdasarkan task
 
 
285
  if model_config["task"] == "text-generation":
 
286
  result = pipe(
287
- request.message,
288
- max_length=min(len(request.message.split()) + 50, 200),
289
  temperature=0.7,
290
  do_sample=True,
291
- pad_token_id=pipe.tokenizer.eos_token_id
 
 
 
292
  )[0]['generated_text']
293
 
294
- # Clean output
295
- if result.startswith(request.message):
296
- result = result[len(request.message):].strip()
297
 
 
 
 
 
 
 
298
  elif model_config["task"] == "text-classification":
299
- output = pipe(request.message)[0]
300
- result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})"
 
301
 
302
  elif model_config["task"] == "text2text-generation":
303
- result = pipe(request.message, max_length=150)[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
- return {"response": result, "model": model_config["name"], "status": "success"}
 
 
 
 
 
 
 
 
 
306
 
307
  except Exception as e:
308
- print(f"❌ Error: {e}")
309
- raise HTTPException(status_code=500, detail="Terjadi kesalahan")
 
 
 
310
 
311
- # Health check
312
  @app.get("/health")
313
  async def health():
314
- return {"status": "healthy", "gpu": torch.cuda.is_available()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
- # Run app
317
  if __name__ == "__main__":
318
  port = int(os.environ.get("PORT", 7860))
319
- uvicorn.run(app, host="0.0.0.0", port=port)
 
 
 
 
 
 
 
 
2
  import uvicorn
3
  from fastapi import FastAPI, HTTPException
4
  from fastapi.responses import HTMLResponse
 
5
  from pydantic import BaseModel
6
+ from transformers import pipeline, AutoTokenizer, AutoModel, set_seed
7
  import torch
8
  from typing import Optional
9
+ import asyncio
10
+ import time
11
+ import gc
12
 
13
  # Inisialisasi FastAPI
14
+ app = FastAPI(title="LyonPoy AI Chat - CPU Optimized")
15
 
16
+ # Set seed untuk konsistensi
17
+ set_seed(42)
18
+
19
+ # CPU-Optimized 11 models configuration
20
  MODELS = {
21
+ "distil-gpt-2": {
22
+ "name": "DistilGPT-2 ⚑",
23
+ "model_path": "Lyon28/Distil_GPT-2",
24
+ "task": "text-generation",
25
+ "max_tokens": 20,
26
+ "priority": 1 # Tercepat
27
  },
28
+ "gpt-2-tinny": {
29
+ "name": "GPT-2 Tinny ⚑",
30
+ "model_path": "Lyon28/GPT-2-Tinny",
31
+ "task": "text-generation",
32
+ "max_tokens": 18,
33
+ "priority": 1
34
  },
35
  "bert-tinny": {
36
+ "name": "BERT Tinny πŸ“Š",
37
  "model_path": "Lyon28/Bert-Tinny",
38
+ "task": "text-classification",
39
+ "max_tokens": 0,
40
+ "priority": 1
41
+ },
42
+ "distilbert-base-uncased": {
43
+ "name": "DistilBERT πŸ“Š",
44
+ "model_path": "Lyon28/Distilbert-Base-Uncased",
45
+ "task": "text-classification",
46
+ "max_tokens": 0,
47
+ "priority": 1
48
  },
49
  "albert-base-v2": {
50
+ "name": "ALBERT Base πŸ“Š",
51
  "model_path": "Lyon28/Albert-Base-V2",
52
+ "task": "text-classification",
53
+ "max_tokens": 0,
54
+ "priority": 2
55
+ },
56
+ "electra-small": {
57
+ "name": "ELECTRA Small πŸ“Š",
58
+ "model_path": "Lyon28/Electra-Small",
59
+ "task": "text-classification",
60
+ "max_tokens": 0,
61
+ "priority": 2
62
  },
63
  "t5-small": {
64
+ "name": "T5 Small πŸ”„",
65
  "model_path": "Lyon28/T5-Small",
66
+ "task": "text2text-generation",
67
+ "max_tokens": 25,
68
+ "priority": 2
69
  },
70
  "gpt-2": {
71
+ "name": "GPT-2 Standard",
72
  "model_path": "Lyon28/GPT-2",
73
+ "task": "text-generation",
74
+ "max_tokens": 22,
75
+ "priority": 2
76
+ },
77
+ "tinny-llama": {
78
+ "name": "Tinny Llama",
79
+ "model_path": "Lyon28/Tinny-Llama",
80
+ "task": "text-generation",
81
+ "max_tokens": 25,
82
+ "priority": 3
83
+ },
84
+ "pythia": {
85
+ "name": "Pythia",
86
+ "model_path": "Lyon28/Pythia",
87
+ "task": "text-generation",
88
+ "max_tokens": 25,
89
+ "priority": 3
90
  },
91
  "gpt-neo": {
92
  "name": "GPT-Neo",
93
  "model_path": "Lyon28/GPT-Neo",
94
+ "task": "text-generation",
95
+ "max_tokens": 30,
96
+ "priority": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
  }
99
 
100
  class ChatRequest(BaseModel):
101
  message: str
102
+ model: Optional[str] = "distil-gpt-2"
103
 
104
+ # CPU-Optimized startup
105
  @app.on_event("startup")
106
  async def load_models():
107
  app.state.pipelines = {}
108
+ app.state.tokenizers = {}
109
+
110
+ # Set CPU optimizations
111
+ torch.set_num_threads(2) # Limit threads untuk Hugging Face
112
+ os.environ['OMP_NUM_THREADS'] = '2'
113
+ os.environ['MKL_NUM_THREADS'] = '2'
114
+ os.environ['NUMEXPR_NUM_THREADS'] = '2'
115
+
116
+ # Set cache
117
  os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
118
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
119
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
120
+
121
+ print("πŸš€ LyonPoy AI Chat - CPU Optimized Ready!")
122
 
123
+ # Lightweight frontend
124
  @app.get("/", response_class=HTMLResponse)
125
  async def get_frontend():
126
  html_content = '''
 
129
  <head>
130
  <meta charset="UTF-8">
131
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
132
+ <title>LyonPoy AI Chat - CPU Fast</title>
133
  <style>
134
  * { margin: 0; padding: 0; box-sizing: border-box; }
135
  body {
136
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
137
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
138
  height: 100vh; display: flex; justify-content: center; align-items: center;
139
  }
140
  .chat-container {
141
  width: 400px; height: 600px; background: #fff; border-radius: 15px;
142
+ box-shadow: 0 15px 35px rgba(0,0,0,0.1); display: flex; flex-direction: column; overflow: hidden;
143
  }
144
  .chat-header {
145
+ background: linear-gradient(135deg, #00b4db, #0083b0); color: white;
146
+ padding: 15px; text-align: center;
147
+ }
148
+ .chat-header h1 { font-size: 16px; font-weight: 600; margin-bottom: 5px; }
149
+ .cpu-badge {
150
+ background: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 10px;
151
+ font-size: 10px; display: inline-block; margin-top: 3px;
152
  }
 
153
  .model-selector {
154
  background: rgba(255,255,255,0.2); border: none; color: white;
155
+ padding: 6px 10px; border-radius: 15px; font-size: 11px; cursor: pointer;
156
+ margin-top: 8px; width: 100%;
157
  }
158
  .chat-messages {
159
+ flex: 1; padding: 15px; overflow-y: auto; background: #f8f9fa;
160
+ display: flex; flex-direction: column; gap: 12px;
161
  }
162
  .message {
163
+ max-width: 85%; padding: 10px 14px; border-radius: 12px;
164
+ font-size: 13px; line-height: 1.3; word-wrap: break-word;
165
  }
166
  .message.user {
167
+ background: #00b4db; color: white; align-self: flex-end;
168
+ border-bottom-right-radius: 4px;
169
  }
170
  .message.bot {
171
  background: white; color: #333; align-self: flex-start;
172
+ border-bottom-left-radius: 4px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
173
+ }
174
+ .message-time { font-size: 10px; opacity: 0.6; margin-top: 3px; }
175
+ .response-time {
176
+ font-size: 9px; color: #666; margin-top: 2px;
177
+ display: flex; align-items: center; gap: 3px;
178
+ }
179
+ .speed-indicator {
180
+ width: 6px; height: 6px; border-radius: 50%; display: inline-block;
181
  }
182
+ .fast { background: #22c55e; }
183
+ .medium { background: #f59e0b; }
184
+ .slow { background: #ef4444; }
185
  .chat-input-container {
186
+ padding: 15px; background: white; border-top: 1px solid #e5e7eb;
187
+ display: flex; gap: 8px; align-items: center;
188
  }
189
  .chat-input {
190
+ flex: 1; padding: 10px 14px; border: 1px solid #d1d5db;
191
+ border-radius: 20px; font-size: 13px; outline: none;
192
  }
193
+ .chat-input:focus { border-color: #00b4db; }
194
  .send-button {
195
+ background: #00b4db; color: white; border: none; border-radius: 50%;
196
+ width: 40px; height: 40px; cursor: pointer; display: flex;
197
+ align-items: center; justify-content: center; font-size: 16px;
198
  }
199
+ .send-button:hover { background: #0083b0; }
200
+ .send-button:disabled { background: #d1d5db; cursor: not-allowed; }
201
  .welcome-message {
202
+ text-align: center; color: #6b7280; font-size: 12px;
203
+ padding: 15px; border-radius: 8px; background: rgba(255,255,255,0.8);
204
  }
205
  .typing-indicator {
206
+ display: none; align-items: center; gap: 4px; padding: 10px 14px;
207
+ background: white; border-radius: 12px; align-self: flex-start;
208
  }
209
  .typing-dot {
210
+ width: 6px; height: 6px; background: #9ca3af; border-radius: 50%;
211
+ animation: typing 1.2s infinite;
212
+ }
213
+ .typing-dot:nth-child(2) { animation-delay: 0.15s; }
214
+ .typing-dot:nth-child(3) { animation-delay: 0.3s; }
215
+ @keyframes typing { 0%, 60%, 100% { opacity: 0.3; } 30% { opacity: 1; } }
216
+ .model-status {
217
+ font-size: 10px; color: rgba(255,255,255,0.8); margin-top: 3px;
218
+ }
219
+ @media (max-width: 480px) {
220
+ .chat-container { width: 100vw; height: 100vh; border-radius: 0; }
221
+ .chat-header { padding: 12px; }
222
+ .chat-messages { padding: 12px; }
223
  }
 
 
 
 
 
224
  </style>
225
  </head>
226
  <body>
227
  <div class="chat-container">
228
  <div class="chat-header">
229
+ <h1>⚑ LyonPoy AI Chat</h1>
230
+ <div class="cpu-badge">CPU Optimized</div>
231
  <select class="model-selector" id="modelSelect">
232
+ <option value="distil-gpt-2">πŸš€ DistilGPT-2 (Fastest)</option>
233
+ <option value="gpt-2-tinny">πŸš€ GPT-2 Tinny (Fast)</option>
234
+ <option value="bert-tinny">πŸ“Š BERT Tinny (Analysis)</option>
235
+ <option value="distilbert-base-uncased">πŸ“Š DistilBERT (Analysis)</option>
236
+ <option value="albert-base-v2">πŸ“Š ALBERT Base</option>
237
+ <option value="electra-small">πŸ“Š ELECTRA Small</option>
238
+ <option value="t5-small">πŸ”„ T5 Small (Transform)</option>
239
+ <option value="gpt-2">GPT-2 Standard</option>
240
  <option value="tinny-llama">Tinny Llama</option>
241
  <option value="pythia">Pythia</option>
242
  <option value="gpt-neo">GPT-Neo</option>
 
 
 
 
 
 
 
243
  </select>
244
+ <div class="model-status" id="modelStatus">Ready to chat!</div>
245
  </div>
246
  <div class="chat-messages" id="chatMessages">
247
  <div class="welcome-message">
248
+ πŸš€ <strong>CPU-Optimized AI Chat</strong><br>
249
+ Models dioptimalkan untuk kecepatan di CPU<br>
250
+ Pilih model dan mulai chat!
251
  </div>
252
  </div>
253
  <div class="typing-indicator" id="typingIndicator">
254
  <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
255
+ <span style="font-size: 11px; color: #6b7280; margin-left: 5px;">AI sedang berpikir...</span>
256
  </div>
257
  <div class="chat-input-container">
258
+ <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan singkat (max 100 karakter)..." maxlength="100">
259
  <button class="send-button" id="sendButton">➀</button>
260
  </div>
261
  </div>
 
265
  const sendButton = document.getElementById('sendButton');
266
  const modelSelect = document.getElementById('modelSelect');
267
  const typingIndicator = document.getElementById('typingIndicator');
268
+ const modelStatus = document.getElementById('modelStatus');
269
+
270
+ // Production API Base
271
+ const API_BASE = window.location.origin;
272
 
273
+ function scrollToBottom() {
274
+ chatMessages.scrollTop = chatMessages.scrollHeight;
275
+ }
276
+
277
+ function getSpeedClass(time) {
278
+ if (time < 2000) return 'fast';
279
+ if (time < 5000) return 'medium';
280
+ return 'slow';
281
+ }
282
 
283
+ function addMessage(content, isUser = false, responseTime = null) {
284
  const messageDiv = document.createElement('div');
285
  messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
286
  const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
287
+
288
+ let timeInfo = `<div class="message-time">${time}</div>`;
289
+ if (responseTime && !isUser) {
290
+ const speedClass = getSpeedClass(responseTime);
291
+ timeInfo += `<div class="response-time">
292
+ <span class="speed-indicator ${speedClass}"></span>
293
+ ${responseTime}ms
294
+ </div>`;
295
+ }
296
+
297
+ messageDiv.innerHTML = `${content}${timeInfo}`;
298
  chatMessages.appendChild(messageDiv);
299
  scrollToBottom();
300
  }
301
 
302
+ function showTyping() {
303
+ typingIndicator.style.display = 'flex';
304
+ scrollToBottom();
305
+ }
306
+
307
+ function hideTyping() {
308
+ typingIndicator.style.display = 'none';
309
+ }
310
 
311
  async function sendMessage() {
312
  const message = chatInput.value.trim();
313
  if (!message) return;
314
 
315
+ chatInput.disabled = true;
316
+ sendButton.disabled = true;
317
+ modelStatus.textContent = 'Processing...';
318
+
319
+ addMessage(message, true);
320
+ chatInput.value = '';
321
+ showTyping();
322
+
323
+ const startTime = Date.now();
324
 
325
  try {
326
  const response = await fetch('/chat', {
327
  method: 'POST',
328
  headers: { 'Content-Type': 'application/json' },
329
+ body: JSON.stringify({
330
+ message: message,
331
+ model: modelSelect.value
332
+ })
333
  });
334
+
335
  const data = await response.json();
336
+ const responseTime = Date.now() - startTime;
337
+
338
  hideTyping();
339
+ modelStatus.textContent = 'Ready';
340
+
341
  if (data.status === 'success') {
342
+ addMessage(data.response, false, responseTime);
343
  } else {
344
+ addMessage('⚠️ Model sedang loading, tunggu sebentar...', false, responseTime);
345
  }
346
  } catch (error) {
347
+ const responseTime = Date.now() - startTime;
348
  hideTyping();
349
+ modelStatus.textContent = 'Error';
350
+ addMessage('❌ Koneksi bermasalah, coba lagi.', false, responseTime);
351
+ console.error('Error:', error);
352
  }
353
+
354
+ chatInput.disabled = false;
355
+ sendButton.disabled = false;
356
+ chatInput.focus();
357
  }
358
 
359
+ // Event listeners
360
  sendButton.addEventListener('click', sendMessage);
361
+ chatInput.addEventListener('keypress', (e) => {
362
+ if (e.key === 'Enter') sendMessage();
363
+ });
364
+
365
  modelSelect.addEventListener('change', () => {
366
+ const selectedOption = modelSelect.options[modelSelect.selectedIndex];
367
+ const modelName = selectedOption.text;
368
+ modelStatus.textContent = `Model: ${modelName}`;
369
+ addMessage(`πŸ”„ Switched to: ${modelName}`);
370
+ });
371
+
372
+ // Auto-focus on load
373
+ window.addEventListener('load', () => {
374
+ chatInput.focus();
375
+ modelStatus.textContent = 'DistilGPT-2 Ready (Fastest)';
376
+ });
377
+
378
+ // Character counter
379
+ chatInput.addEventListener('input', () => {
380
+ const remaining = 100 - chatInput.value.length;
381
+ if (remaining < 20) {
382
+ chatInput.style.borderColor = remaining < 10 ? '#ef4444' : '#f59e0b';
383
+ } else {
384
+ chatInput.style.borderColor = '#d1d5db';
385
+ }
386
  });
 
387
  </script>
388
  </body>
389
  </html>
390
  '''
391
  return HTMLResponse(content=html_content)
392
 
393
+ # CPU-Optimized Chat API
394
  @app.post("/chat")
395
  async def chat(request: ChatRequest):
396
+ start_time = time.time()
397
+
398
  try:
399
  model_id = request.model.lower()
400
  if model_id not in MODELS:
401
+ model_id = "distil-gpt-2" # Default ke model tercepat
402
 
403
  model_config = MODELS[model_id]
404
 
405
+ # Lazy loading dengan optimasi CPU
406
  if model_id not in app.state.pipelines:
407
+ print(f"⚑ CPU Loading {model_config['name']}...")
 
 
408
 
409
+ # CPU-specific optimizations
410
+ pipeline_kwargs = {
411
+ "task": model_config["task"],
412
+ "model": model_config["model_path"],
413
+ "device": -1, # Force CPU
414
+ "torch_dtype": torch.float32, # CPU works best with float32
415
+ "model_kwargs": {
416
+ "torchscript": False,
417
+ "low_cpu_mem_usage": True
418
+ }
419
+ }
420
+
421
+ app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
422
+
423
+ # Cleanup memory
424
+ gc.collect()
425
 
426
  pipe = app.state.pipelines[model_id]
427
 
428
+ # Ultra-fast processing dengan parameter minimal
429
+ input_text = request.message[:80] # Limit input untuk CPU
430
+
431
  if model_config["task"] == "text-generation":
432
+ # Minimal parameters untuk CPU speed
433
  result = pipe(
434
+ input_text,
435
+ max_length=min(len(input_text.split()) + model_config["max_tokens"], 60),
436
  temperature=0.7,
437
  do_sample=True,
438
+ top_p=0.85,
439
+ pad_token_id=pipe.tokenizer.eos_token_id,
440
+ num_return_sequences=1,
441
+ early_stopping=True
442
  )[0]['generated_text']
443
 
444
+ # Quick cleanup
445
+ if result.startswith(input_text):
446
+ result = result[len(input_text):].strip()
447
 
448
+ # Limit to 1 sentence untuk speed
449
+ if '.' in result:
450
+ result = result.split('.')[0] + '.'
451
+ elif len(result) > 80:
452
+ result = result[:77] + '...'
453
+
454
  elif model_config["task"] == "text-classification":
455
+ output = pipe(input_text, truncation=True, max_length=128)[0]
456
+ confidence = f"{output['score']:.2f}"
457
+ result = f"πŸ“Š {output['label']} ({confidence})"
458
 
459
  elif model_config["task"] == "text2text-generation":
460
+ result = pipe(
461
+ input_text,
462
+ max_length=model_config["max_tokens"],
463
+ temperature=0.6,
464
+ early_stopping=True
465
+ )[0]['generated_text']
466
+
467
+ # Final cleanup
468
+ if not result or len(result.strip()) < 3:
469
+ result = "πŸ€” Hmm, coba kata lain?"
470
+ elif len(result) > 100:
471
+ result = result[:97] + "..."
472
+
473
+ processing_time = round((time.time() - start_time) * 1000)
474
+
475
+ return {
476
+ "response": result,
477
+ "model": model_config["name"],
478
+ "status": "success",
479
+ "processing_time": f"{processing_time}ms"
480
+ }
481
+
482
+ except Exception as e:
483
+ print(f"❌ CPU Error: {e}")
484
+ processing_time = round((time.time() - start_time) * 1000)
485
+
486
+ # Fallback response
487
+ fallback_responses = [
488
+ "πŸ”„ Coba lagi dengan kata yang lebih simple?",
489
+ "πŸ’­ Hmm, mungkin pertanyaan lain?",
490
+ "⚑ Model sedang optimal, tunggu sebentar...",
491
+ "πŸš€ Coba model lain yang lebih cepat?"
492
+ ]
493
+
494
+ import random
495
+ fallback = random.choice(fallback_responses)
496
+
497
+ return {
498
+ "response": fallback,
499
+ "status": "error",
500
+ "processing_time": f"{processing_time}ms"
501
+ }
502
+
503
+ # Optimized inference endpoint untuk production
504
+ @app.post("/inference")
505
+ async def inference(request: dict):
506
+ """CPU-Optimized inference endpoint"""
507
+ try:
508
+ message = request.get("message", "")[:80] # Limit input
509
+ model_path = request.get("model", "Lyon28/Distil_GPT-2")
510
+
511
+ # Fast model mapping
512
+ model_key = model_path.split("/")[-1].lower()
513
+ model_mapping = {
514
+ "distil_gpt-2": "distil-gpt-2",
515
+ "distil-gpt-2": "distil-gpt-2",
516
+ "gpt-2-tinny": "gpt-2-tinny",
517
+ "bert-tinny": "bert-tinny",
518
+ "distilbert-base-uncased": "distilbert-base-uncased",
519
+ "albert-base-v2": "albert-base-v2",
520
+ "electra-small": "electra-small",
521
+ "t5-small": "t5-small",
522
+ "gpt-2": "gpt-2",
523
+ "tinny-llama": "tinny-llama",
524
+ "pythia": "pythia",
525
+ "gpt-neo": "gpt-neo"
526
+ }
527
+
528
+ internal_model = model_mapping.get(model_key, "distil-gpt-2")
529
 
530
+ # Quick processing
531
+ chat_request = ChatRequest(message=message, model=internal_model)
532
+ result = await chat(chat_request)
533
+
534
+ return {
535
+ "result": result["response"],
536
+ "status": "success",
537
+ "model_used": result["model"],
538
+ "processing_time": result.get("processing_time", "0ms")
539
+ }
540
 
541
  except Exception as e:
542
+ print(f"❌ Inference Error: {e}")
543
+ return {
544
+ "result": "πŸ”„ Sedang optimasi, coba lagi...",
545
+ "status": "error"
546
+ }
547
 
548
+ # Lightweight health check
549
  @app.get("/health")
550
  async def health():
551
+ loaded_models = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0
552
+ return {
553
+ "status": "healthy",
554
+ "platform": "CPU",
555
+ "loaded_models": loaded_models,
556
+ "total_models": len(MODELS),
557
+ "optimization": "CPU-Tuned"
558
+ }
559
+
560
+ # Model info endpoint
561
+ @app.get("/models")
562
+ async def get_models():
563
+ return {
564
+ "models": [
565
+ {
566
+ "id": k,
567
+ "name": v["name"],
568
+ "task": v["task"],
569
+ "max_tokens": v["max_tokens"],
570
+ "priority": v["priority"],
571
+ "cpu_optimized": True
572
+ }
573
+ for k, v in MODELS.items()
574
+ ],
575
+ "platform": "CPU",
576
+ "recommended": ["distil-gpt-2", "gpt-2-tinny", "bert-tinny"]
577
+ }
578
 
579
+ # Run with CPU optimizations
580
  if __name__ == "__main__":
581
  port = int(os.environ.get("PORT", 7860))
582
+ uvicorn.run(
583
+ app,
584
+ host="0.0.0.0",
585
+ port=port,
586
+ workers=1, # Single worker untuk CPU
587
+ timeout_keep_alive=30,
588
+ access_log=False # Disable access log untuk performance
589
+ )