Lyon28 commited on
Commit
3d635c7
·
verified ·
1 Parent(s): 7055a09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +286 -198
app.py CHANGED
@@ -1,231 +1,319 @@
 
 
1
  from fastapi import FastAPI, HTTPException
 
 
2
  from pydantic import BaseModel
3
  from transformers import pipeline
4
  import torch
5
- from fastapi.middleware.cors import CORSMiddleware
6
- from typing import Dict, Any, Optional
7
- import os # Import os module
8
 
9
- # Inisialisasi aplikasi FastAPI
10
- app = FastAPI(
11
- title="LyonPoy Model Inference API",
12
- description="API untuk mengakses 11 model machine learning",
13
- version="1.0.0"
14
- )
15
 
16
- # Konfigurasi CORS untuk frontend eksternal
17
- app.add_middleware(
18
- CORSMiddleware,
19
- allow_origins=["*"],
20
- allow_credentials=True,
21
- allow_methods=["*"],
22
- allow_headers=["*"],
23
- )
24
-
25
- # Konfigurasi Model
26
- MODEL_MAP = {
27
- "tinny-llama": "Lyon28/Tinny-Llama",
28
- "pythia": "Lyon28/Pythia",
29
- "bert-tinny": "Lyon28/Bert-Tinny",
30
- "albert-base-v2": "Lyon28/Albert-Base-V2",
31
- "t5-small": "Lyon28/T5-Small",
32
- "gpt-2": "Lyon28/GPT-2",
33
- "gpt-neo": "Lyon28/GPT-Neo",
34
- "distilbert-base-uncased": "Lyon28/Distilbert-Base-Uncased",
35
- "distil-gpt-2": "Lyon28/Distil_GPT-2",
36
- "gpt-2-tinny": "Lyon28/GPT-2-Tinny",
37
- "electra-small": "Lyon28/Electra-Small"
38
- }
39
-
40
- TASK_MAP = {
41
- "text-generation": ["gpt-2", "gpt-neo", "distil-gpt-2", "gpt-2-tinny", "tinny-llama", "pythia"],
42
- "text-classification": ["bert-tinny", "albert-base-v2", "distilbert-base-uncased", "electra-small"],
43
- "text2text-generation": ["t5-small"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
45
 
46
- class InferenceRequest(BaseModel):
47
- text: str
48
- model_id: Optional[str] = "gpt-2" # Default model
49
- max_length: int = 100
50
- temperature: float = 0.9
51
- top_p: float = 0.95
52
-
53
- # Helper functions
54
- def get_task(model_id: str) -> str:
55
- for task, models in TASK_MAP.items():
56
- if model_id in models:
57
- return task
58
- # Default to text-generation if not found (or raise an error)
59
- return "text-generation"
60
 
61
- # Event startup untuk inisialisasi model
62
  @app.on_event("startup")
63
  async def load_models():
64
  app.state.pipelines = {}
65
- print("🟢 Semua model siap digunakan!")
66
- # Menyetel HF_HOME untuk mengatasi masalah izin cache
67
  os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
68
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
 
69
 
70
- # Endpoint utama
71
- @app.get("/")
72
- async def root():
73
- return {
74
- "message": "Selamat datang di Lyon28 Model API",
75
- "endpoints": {
76
- "documentation": "/docs",
77
- "model_list": "/models",
78
- "health_check": "/health",
79
- "inference_with_model": "/inference/{model_id}",
80
- "inference_general": "/inference"
81
- },
82
- "total_models": len(MODEL_MAP),
83
- "usage_examples": {
84
- "specific_model": "POST /inference/gpt-2 with JSON body",
85
- "general_inference": "POST /inference with model_id in JSON body"
86
  }
87
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Endpoint untuk list model
90
- @app.get("/models")
91
- async def list_models():
92
- return {
93
- "available_models": list(MODEL_MAP.keys()),
94
- "total_models": len(MODEL_MAP)
95
- }
 
 
 
96
 
97
- # Endpoint health check
98
- @app.get("/health")
99
- async def health_check():
100
- return {
101
- "status": "healthy",
102
- "gpu_available": torch.cuda.is_available(),
103
- "gpu_type": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU-only"
104
- }
105
 
106
- # NEW: General inference endpoint (handles POST /inference)
107
- @app.post("/inference")
108
- async def general_inference(request: InferenceRequest):
109
- """
110
- General inference endpoint that accepts model_id in the request body
111
- """
112
- return await process_inference(request.model_id, request)
113
 
114
- # Endpoint inference dengan model_id di path
115
- @app.post("/inference/{model_id}")
116
- async def model_inference(model_id: str, request: InferenceRequest):
117
- """
118
- Specific model inference endpoint with model_id in path
119
- """
120
- return await process_inference(model_id, request)
121
 
122
- # Shared inference processing function
123
- async def process_inference(model_id: str, request: InferenceRequest):
124
- try:
125
- # Pastikan model_id dalam lowercase agar sesuai dengan MODEL_MAP
126
- model_id = model_id.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- # Validasi model ID
129
- if model_id not in MODEL_MAP:
130
- available_models = ", ".join(MODEL_MAP.keys())
131
- raise HTTPException(
132
- status_code=404,
133
- detail=f"Model '{model_id}' tidak ditemukan. Model yang tersedia: {available_models}"
134
- )
 
 
 
 
 
135
 
136
- # Dapatkan task yang sesuai
137
- task = get_task(model_id)
 
 
 
 
 
 
 
138
 
139
- # Load model jika belum ada di memory
140
  if model_id not in app.state.pipelines:
141
- print(f"⏳ Memuat model {model_id} untuk task {task}...")
142
- # Menggunakan device=-1 (CPU) sebagai default yang aman
143
- # Jika Anda yakin Hugging Face Space Anda memiliki GPU, gunakan device=0
144
- device_to_use = 0 if torch.cuda.is_available() else -1
145
- # Menyesuaikan dtype berdasarkan device
146
- dtype_to_use = torch.float16 if torch.cuda.is_available() else torch.float32
147
-
148
- try:
149
- app.state.pipelines[model_id] = pipeline(
150
- task=task,
151
- model=MODEL_MAP[model_id],
152
- device=device_to_use,
153
- torch_dtype=dtype_to_use
154
- )
155
- print(f"✅ Model {model_id} berhasil dimuat!")
156
- except Exception as load_error:
157
- print(f"❌ Gagal memuat model {model_id}: {load_error}")
158
- raise HTTPException(
159
- status_code=503,
160
- detail=f"Gagal memuat model {model_id}. Coba lagi nanti."
161
- )
162
-
163
- pipe = app.state.pipelines[model_id]
164
-
165
- # Proses berdasarkan task
166
- if task == "text-generation":
167
- result = pipe(
168
- request.text,
169
- max_length=request.max_length,
170
- temperature=request.temperature,
171
- top_p=request.top_p,
172
- do_sample=True
173
- )[0]['generated_text']
174
 
175
- elif task == "text-classification":
176
- # Untuk text-classification, output adalah list of dict, kita ambil yang pertama
177
- output = pipe(request.text)[0]
178
- result = {
179
- "label": output['label'],
180
- "confidence": round(output['score'], 4)
181
- }
182
 
183
- elif task == "text2text-generation":
184
- # Untuk text2text-generation, output juga list of dict
185
  result = pipe(
186
- request.text,
187
- max_length=request.max_length
 
 
 
188
  )[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- else:
191
- # Fallback untuk task yang tidak terduga, meski harusnya terhandle oleh get_task
192
- raise HTTPException(
193
- status_code=500,
194
- detail=f"Tugas ({task}) untuk model {model_id} tidak didukung atau tidak dikenali."
195
- )
196
-
197
- return {
198
- "result": result,
199
- "model_used": model_id,
200
- "task": task,
201
- "status": "success"
202
- }
203
-
204
- except HTTPException as he:
205
- # Re-raise HTTP exceptions
206
- raise he
207
  except Exception as e:
208
- # Log error lebih detail untuk debugging
209
- print(f"‼️ Error saat memproses model {model_id}: {e}")
210
- import traceback
211
- traceback.print_exc() # Mencetak full traceback ke log
212
 
213
- raise HTTPException(
214
- status_code=500,
215
- detail=f"Error processing request: {str(e)}. Cek log server untuk detail."
216
- )
217
 
218
- # Error handler untuk 404
219
- @app.exception_handler(404)
220
- async def not_found_handler(request, exc):
221
- return {
222
- "error": "Endpoint tidak ditemukan",
223
- "available_endpoints": [
224
- "GET /",
225
- "GET /models",
226
- "GET /health",
227
- "POST /inference",
228
- "POST /inference/{model_id}"
229
- ],
230
- "tip": "Gunakan /docs untuk dokumentasi lengkap"
231
- }
 
1
+ import os
2
+ import uvicorn
3
  from fastapi import FastAPI, HTTPException
4
+ from fastapi.responses import HTMLResponse
5
+ from fastapi.staticfiles import StaticFiles
6
  from pydantic import BaseModel
7
  from transformers import pipeline
8
  import torch
9
+ from typing import Optional
 
 
10
 
11
+ # Inisialisasi FastAPI
12
+ app = FastAPI(title="LyonPoy AI Chat")
 
 
 
 
13
 
14
+ # All 11 models configuration
15
+ MODELS = {
16
+ "tinny-llama": {
17
+ "name": "Tinny Llama",
18
+ "model_path": "Lyon28/Tinny-Llama",
19
+ "task": "text-generation"
20
+ },
21
+ "pythia": {
22
+ "name": "Pythia",
23
+ "model_path": "Lyon28/Pythia",
24
+ "task": "text-generation"
25
+ },
26
+ "bert-tinny": {
27
+ "name": "BERT Tinny",
28
+ "model_path": "Lyon28/Bert-Tinny",
29
+ "task": "text-classification"
30
+ },
31
+ "albert-base-v2": {
32
+ "name": "ALBERT Base V2",
33
+ "model_path": "Lyon28/Albert-Base-V2",
34
+ "task": "text-classification"
35
+ },
36
+ "t5-small": {
37
+ "name": "T5 Small",
38
+ "model_path": "Lyon28/T5-Small",
39
+ "task": "text2text-generation"
40
+ },
41
+ "gpt-2": {
42
+ "name": "GPT-2",
43
+ "model_path": "Lyon28/GPT-2",
44
+ "task": "text-generation"
45
+ },
46
+ "gpt-neo": {
47
+ "name": "GPT-Neo",
48
+ "model_path": "Lyon28/GPT-Neo",
49
+ "task": "text-generation"
50
+ },
51
+ "distilbert-base-uncased": {
52
+ "name": "DistilBERT",
53
+ "model_path": "Lyon28/Distilbert-Base-Uncased",
54
+ "task": "text-classification"
55
+ },
56
+ "distil-gpt-2": {
57
+ "name": "DistilGPT-2",
58
+ "model_path": "Lyon28/Distil_GPT-2",
59
+ "task": "text-generation"
60
+ },
61
+ "gpt-2-tinny": {
62
+ "name": "GPT-2 Tinny",
63
+ "model_path": "Lyon28/GPT-2-Tinny",
64
+ "task": "text-generation"
65
+ },
66
+ "electra-small": {
67
+ "name": "ELECTRA Small",
68
+ "model_path": "Lyon28/Electra-Small",
69
+ "task": "text-classification"
70
+ }
71
  }
72
 
73
+ class ChatRequest(BaseModel):
74
+ message: str
75
+ model: Optional[str] = "gpt-2"
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # Startup
78
  @app.on_event("startup")
79
  async def load_models():
80
  app.state.pipelines = {}
 
 
81
  os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
82
  os.makedirs(os.environ['HF_HOME'], exist_ok=True)
83
+ print("🤖 LyonPoy AI Chat Ready!")
84
 
85
+ # Frontend route
86
+ @app.get("/", response_class=HTMLResponse)
87
+ async def get_frontend():
88
+ html_content = '''
89
+ <!DOCTYPE html>
90
+ <html lang="id">
91
+ <head>
92
+ <meta charset="UTF-8">
93
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
94
+ <title>LyonPoy AI Chat</title>
95
+ <style>
96
+ * { margin: 0; padding: 0; box-sizing: border-box; }
97
+ body {
98
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
99
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
100
+ height: 100vh; display: flex; justify-content: center; align-items: center;
101
  }
102
+ .chat-container {
103
+ width: 400px; height: 600px; background: #fff; border-radius: 15px;
104
+ box-shadow: 0 20px 40px rgba(0,0,0,0.15); display: flex; flex-direction: column; overflow: hidden;
105
+ }
106
+ .chat-header {
107
+ background: linear-gradient(135deg, #25d366, #128c7e); color: white;
108
+ padding: 20px; text-align: center;
109
+ }
110
+ .chat-header h1 { font-size: 18px; font-weight: 600; margin-bottom: 8px; }
111
+ .model-selector {
112
+ background: rgba(255,255,255,0.2); border: none; color: white;
113
+ padding: 8px 12px; border-radius: 20px; font-size: 12px; cursor: pointer;
114
+ }
115
+ .chat-messages {
116
+ flex: 1; padding: 20px; overflow-y: auto; background: #f0f0f0;
117
+ display: flex; flex-direction: column; gap: 15px;
118
+ }
119
+ .message {
120
+ max-width: 80%; padding: 12px 16px; border-radius: 15px;
121
+ font-size: 14px; line-height: 1.4; animation: slideIn 0.3s ease;
122
+ }
123
+ .message.user {
124
+ background: #25d366; color: white; align-self: flex-end; border-bottom-right-radius: 5px;
125
+ }
126
+ .message.bot {
127
+ background: white; color: #333; align-self: flex-start;
128
+ border-bottom-left-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);
129
+ }
130
+ .message-time { font-size: 11px; opacity: 0.7; margin-top: 5px; }
131
+ .chat-input-container {
132
+ padding: 20px; background: white; border-top: 1px solid #e0e0e0;
133
+ display: flex; gap: 10px; align-items: center;
134
+ }
135
+ .chat-input {
136
+ flex: 1; padding: 12px 16px; border: 1px solid #e0e0e0;
137
+ border-radius: 25px; font-size: 14px; outline: none;
138
+ }
139
+ .chat-input:focus { border-color: #25d366; box-shadow: 0 0 0 2px rgba(37, 211, 102, 0.2); }
140
+ .send-button {
141
+ background: #25d366; color: white; border: none; border-radius: 50%;
142
+ width: 45px; height: 45px; cursor: pointer; display: flex;
143
+ align-items: center; justify-content: center;
144
+ }
145
+ .send-button:hover { background: #128c7e; }
146
+ .send-button:disabled { background: #ccc; cursor: not-allowed; }
147
+ .welcome-message {
148
+ text-align: center; color: #666; font-size: 13px;
149
+ padding: 20px; border-radius: 10px; background: rgba(255,255,255,0.7);
150
+ }
151
+ .typing-indicator {
152
+ display: none; align-items: center; gap: 5px; padding: 12px 16px;
153
+ background: white; border-radius: 15px; align-self: flex-start;
154
+ }
155
+ .typing-dot {
156
+ width: 8px; height: 8px; background: #999; border-radius: 50%;
157
+ animation: typing 1.4s infinite;
158
+ }
159
+ .typing-dot:nth-child(2) { animation-delay: 0.2s; }
160
+ .typing-dot:nth-child(3) { animation-delay: 0.4s; }
161
+ @keyframes typing { 0%, 60%, 100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
162
+ @keyframes slideIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
163
+ @media (max-width: 480px) { .chat-container { width: 100vw; height: 100vh; border-radius: 0; } }
164
+ </style>
165
+ </head>
166
+ <body>
167
+ <div class="chat-container">
168
+ <div class="chat-header">
169
+ <h1>🤖 LyonPoy AI Chat</h1>
170
+ <select class="model-selector" id="modelSelect">
171
+ <option value="gpt-2">GPT-2 (General)</option>
172
+ <option value="tinny-llama">Tinny Llama</option>
173
+ <option value="pythia">Pythia</option>
174
+ <option value="gpt-neo">GPT-Neo</option>
175
+ <option value="distil-gpt-2">DistilGPT-2</option>
176
+ <option value="gpt-2-tinny">GPT-2 Tinny</option>
177
+ <option value="bert-tinny">BERT Tinny</option>
178
+ <option value="albert-base-v2">ALBERT Base V2</option>
179
+ <option value="distilbert-base-uncased">DistilBERT</option>
180
+ <option value="electra-small">ELECTRA Small</option>
181
+ <option value="t5-small">T5 Small</option>
182
+ </select>
183
+ </div>
184
+ <div class="chat-messages" id="chatMessages">
185
+ <div class="welcome-message">
186
+ 👋 Halo! Saya LyonPoy AI Assistant.<br>
187
+ Pilih model di atas dan mulai chat dengan saya!
188
+ </div>
189
+ </div>
190
+ <div class="typing-indicator" id="typingIndicator">
191
+ <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
192
+ </div>
193
+ <div class="chat-input-container">
194
+ <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan..." maxlength="500">
195
+ <button class="send-button" id="sendButton">➤</button>
196
+ </div>
197
+ </div>
198
+ <script>
199
+ const chatMessages = document.getElementById('chatMessages');
200
+ const chatInput = document.getElementById('chatInput');
201
+ const sendButton = document.getElementById('sendButton');
202
+ const modelSelect = document.getElementById('modelSelect');
203
+ const typingIndicator = document.getElementById('typingIndicator');
204
 
205
+ function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
206
+
207
+ function addMessage(content, isUser = false) {
208
+ const messageDiv = document.createElement('div');
209
+ messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
210
+ const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
211
+ messageDiv.innerHTML = `${content}<div class="message-time">${time}</div>`;
212
+ chatMessages.appendChild(messageDiv);
213
+ scrollToBottom();
214
+ }
215
 
216
+ function showTyping() { typingIndicator.style.display = 'flex'; scrollToBottom(); }
217
+ function hideTyping() { typingIndicator.style.display = 'none'; }
 
 
 
 
 
 
218
 
219
+ async function sendMessage() {
220
+ const message = chatInput.value.trim();
221
+ if (!message) return;
 
 
 
 
222
 
223
+ chatInput.disabled = true; sendButton.disabled = true;
224
+ addMessage(message, true); chatInput.value = ''; showTyping();
 
 
 
 
 
225
 
226
+ try {
227
+ const response = await fetch('/chat', {
228
+ method: 'POST',
229
+ headers: { 'Content-Type': 'application/json' },
230
+ body: JSON.stringify({ message: message, model: modelSelect.value })
231
+ });
232
+ const data = await response.json();
233
+ hideTyping();
234
+ if (data.status === 'success') {
235
+ addMessage(data.response);
236
+ } else {
237
+ addMessage('❌ Maaf, terjadi kesalahan. Coba lagi nanti.');
238
+ }
239
+ } catch (error) {
240
+ hideTyping();
241
+ addMessage('❌ Tidak dapat terhubung ke server.');
242
+ }
243
+ chatInput.disabled = false; sendButton.disabled = false; chatInput.focus();
244
+ }
245
 
246
+ sendButton.addEventListener('click', sendMessage);
247
+ chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
248
+ modelSelect.addEventListener('change', () => {
249
+ const modelName = modelSelect.options[modelSelect.selectedIndex].text;
250
+ addMessage(`🔄 Model diubah ke: ${modelName}`);
251
+ });
252
+ window.addEventListener('load', () => chatInput.focus());
253
+ </script>
254
+ </body>
255
+ </html>
256
+ '''
257
+ return HTMLResponse(content=html_content)
258
 
259
+ # Chat API
260
+ @app.post("/chat")
261
+ async def chat(request: ChatRequest):
262
+ try:
263
+ model_id = request.model.lower()
264
+ if model_id not in MODELS:
265
+ raise HTTPException(status_code=400, detail="Model tidak tersedia")
266
+
267
+ model_config = MODELS[model_id]
268
 
269
+ # Load model jika belum ada
270
  if model_id not in app.state.pipelines:
271
+ print(f"⏳ Loading {model_config['name']}...")
272
+ device = 0 if torch.cuda.is_available() else -1
273
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
274
+
275
+ app.state.pipelines[model_id] = pipeline(
276
+ task=model_config["task"],
277
+ model=model_config["model_path"],
278
+ device=device,
279
+ torch_dtype=dtype
280
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
+ pipe = app.state.pipelines[model_id]
 
 
 
 
 
 
283
 
284
+ # Process berdasarkan task
285
+ if model_config["task"] == "text-generation":
286
  result = pipe(
287
+ request.message,
288
+ max_length=min(len(request.message.split()) + 50, 200),
289
+ temperature=0.7,
290
+ do_sample=True,
291
+ pad_token_id=pipe.tokenizer.eos_token_id
292
  )[0]['generated_text']
293
+
294
+ # Clean output
295
+ if result.startswith(request.message):
296
+ result = result[len(request.message):].strip()
297
+
298
+ elif model_config["task"] == "text-classification":
299
+ output = pipe(request.message)[0]
300
+ result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})"
301
+
302
+ elif model_config["task"] == "text2text-generation":
303
+ result = pipe(request.message, max_length=150)[0]['generated_text']
304
+
305
+ return {"response": result, "model": model_config["name"], "status": "success"}
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  except Exception as e:
308
+ print(f"❌ Error: {e}")
309
+ raise HTTPException(status_code=500, detail="Terjadi kesalahan")
 
 
310
 
311
+ # Health check
312
+ @app.get("/health")
313
+ async def health():
314
+ return {"status": "healthy", "gpu": torch.cuda.is_available()}
315
 
316
+ # Run app
317
+ if __name__ == "__main__":
318
+ port = int(os.environ.get("PORT", 7860))
319
+ uvicorn.run(app, host="0.0.0.0", port=port)