Spaces:

Lyon28
/

AI-Character-Chat

Running

App Files Files Community

Lyon28 commited on 9 days ago

Commit

138b76f

verified ·

1 Parent(s): 24088e0

Update app.py

Browse files

Files changed (1) hide show

app.py +213 -304

app.py CHANGED Viewed

@@ -1,220 +1,88 @@
 import os
 import uvicorn
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
-from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
-from typing import Optional, Dict
-import time
-import logging
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 # Inisialisasi FastAPI
-app = FastAPI(title="LyonPoy AI Chat - Optimized")
-# Optimized model configuration - prioritize smaller, faster models
 MODELS = {
-    "distil-gpt-2": {
-        "name": "DistilGPT-2",
-        "model_path": "Lyon28/Distil_GPT-2",
-        "task": "text-generation",
-        "priority": 1  # Highest priority - smallest model
-    },
-    "gpt-2-tinny": {
-        "name": "GPT-2 Tinny",
-        "model_path": "Lyon28/GPT-2-Tinny",
-        "task": "text-generation",
-        "priority": 2
-    },
     "tinny-llama": {
         "name": "Tinny Llama",
         "model_path": "Lyon28/Tinny-Llama",
-        "task": "text-generation",
-        "priority": 3
     },
-    "gpt-2": {
-        "name": "GPT-2",
-        "model_path": "Lyon28/GPT-2",
-        "task": "text-generation",
-        "priority": 4
     },
     "bert-tinny": {
         "name": "BERT Tinny",
         "model_path": "Lyon28/Bert-Tinny",
-        "task": "text-classification",
-        "priority": 5
     },
     "albert-base-v2": {
         "name": "ALBERT Base V2",
         "model_path": "Lyon28/Albert-Base-V2",
-        "task": "text-classification",
-        "priority": 6
-    },
-    "distilbert-base-uncased": {
-        "name": "DistilBERT",
-        "model_path": "Lyon28/Distilbert-Base-Uncased",
-        "task": "text-classification",
-        "priority": 7
-    },
-    "electra-small": {
-        "name": "ELECTRA Small",
-        "model_path": "Lyon28/Electra-Small",
-        "task": "text-classification",
-        "priority": 8
     },
     "t5-small": {
         "name": "T5 Small",
         "model_path": "Lyon28/T5-Small",
-        "task": "text2text-generation",
-        "priority": 9
     },
-    "pythia": {
-        "name": "Pythia",
-        "model_path": "Lyon28/Pythia",
-        "task": "text-generation",
-        "priority": 10
     },
     "gpt-neo": {
         "name": "GPT-Neo",
         "model_path": "Lyon28/GPT-Neo",
-        "task": "text-generation",
-        "priority": 11  # Largest model - lowest priority
     }
 }
 class ChatRequest(BaseModel):
     message: str
-    model: Optional[str] = "distil-gpt-2"  # Default to fastest model
-# Global state
-app.state.pipelines = {}
-app.state.loading_models = set()
-app.state.executor = ThreadPoolExecutor(max_workers=2)
-# Optimized model loading
-async def load_model_async(model_id: str):
-    """Load model in background thread"""
-    if model_id in app.state.loading_models:
-        return False
-    app.state.loading_models.add(model_id)
-    try:
-        model_config = MODELS[model_id]
-        logger.info(f"🔄 Loading {model_config['name']}...")
-        # Load in thread to avoid blocking
-        loop = asyncio.get_event_loop()
-        def load_model():
-            device = 0 if torch.cuda.is_available() else -1
-            dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-            return pipeline(
-                task=model_config["task"],
-                model=model_config["model_path"],
-                device=device,
-                torch_dtype=dtype,
-                use_fast=True,
-                trust_remote_code=True,
-                low_cpu_mem_usage=True,
-                # Optimization for faster inference
-                pad_token_id=50256 if "gpt" in model_id else None
-            )
-        pipeline_obj = await loop.run_in_executor(app.state.executor, load_model)
-        app.state.pipelines[model_id] = pipeline_obj
-        logger.info(f"✅ {model_config['name']} loaded successfully")
-        return True
-    except Exception as e:
-        logger.error(f"❌ Failed to load {model_id}: {e}")
-        return False
-    finally:
-        app.state.loading_models.discard(model_id)
 @app.on_event("startup")
 async def load_models():
-    """Load high-priority models on startup"""
-    os.environ['HF_HOME'] = './cache/huggingface'  # Persistent cache
     os.makedirs(os.environ['HF_HOME'], exist_ok=True)
-    # Pre-load top 3 fastest models
-    priority_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])[:3]
-    tasks = []
-    for model_id in priority_models:
-        task = asyncio.create_task(load_model_async(model_id))
-        tasks.append(task)
-    # Load models concurrently
-    await asyncio.gather(*tasks, return_exceptions=True)
-    logger.info("🚀 LyonPoy AI Chat Ready!")
-# Optimized inference
-async def run_inference(model_id: str, message: str):
-    """Run inference in background thread"""
-    if model_id not in app.state.pipelines:
-        # Try to load model if not available
-        success = await load_model_async(model_id)
-        if not success:
-            raise HTTPException(status_code=503, detail=f"Model {model_id} unavailable")
-    pipe = app.state.pipelines[model_id]
-    model_config = MODELS[model_id]
-    loop = asyncio.get_event_loop()
-    def inference():
-        start_time = time.time()
-        try:
-            if model_config["task"] == "text-generation":
-                # Optimized generation parameters
-                result = pipe(
-                    message,
-                    max_new_tokens=min(50, 150 - len(message.split())),  # Shorter responses
-                    temperature=0.7,
-                    do_sample=True,
-                    top_p=0.9,
-                    top_k=50,
-                    repetition_penalty=1.1,
-                    pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256
-                )[0]['generated_text']
-                # Clean output
-                if result.startswith(message):
-                    result = result[len(message):].strip()
-                # Limit response length
-                if len(result) > 200:
-                    result = result[:200] + "..."
-            elif model_config["task"] == "text-classification":
-                output = pipe(message)[0]
-                result = f"Analisis: {output['label']} (Keyakinan: {output['score']:.2f})"
-            elif model_config["task"] == "text2text-generation":
-                result = pipe(message, max_length=100, num_beams=2)[0]['generated_text']
-            inference_time = time.time() - start_time
-            logger.info(f"⚡ Inference time: {inference_time:.2f}s for {model_config['name']}")
-            return result
-        except Exception as e:
-            logger.error(f"Inference error: {e}")
-            raise e
-    return await loop.run_in_executor(app.state.executor, inference)
-# Frontend route - simplified HTML
 @app.get("/", response_class=HTMLResponse)
 async def get_frontend():
     html_content = '''
@@ -223,188 +91,229 @@ async def get_frontend():
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>LyonPoy AI Chat - Fast Mode</title>
     <style>
         * { margin: 0; padding: 0; box-sizing: border-box; }
-        body { font-family: system-ui; background: #f5f5f5; padding: 20px; }
-        .container { max-width: 600px; margin: 0 auto; background: white; border-radius: 10px; overflow: hidden; }
-        .header { background: #007bff; color: white; padding: 15px; }
-        .chat { height: 400px; overflow-y: auto; padding: 15px; background: #fafafa; }
-        .message { margin: 10px 0; padding: 8px 12px; border-radius: 8px; }
-        .user { background: #007bff; color: white; margin-left: 20%; }
-        .bot { background: white; border: 1px solid #ddd; margin-right: 20%; }
-        .input-area { padding: 15px; display: flex; gap: 10px; }
-        input { flex: 1; padding: 10px; border: 1px solid #ddd; border-radius: 5px; }
-        button { padding: 10px 15px; background: #007bff; color: white; border: none; border-radius: 5px; cursor: pointer; }
-        select { padding: 5px; margin-left: 10px; }
-        .loading { color: #666; font-style: italic; }
     </style>
 </head>
 <body>
-    <div class="container">
-        <div class="header">
-            <h1>🚀 LyonPoy AI - Fast Mode</h1>
-            <select id="model">
-                <option value="distil-gpt-2">DistilGPT-2 (Fastest)</option>
-                <option value="gpt-2-tinny">GPT-2 Tinny</option>
                 <option value="tinny-llama">Tinny Llama</option>
-                <option value="gpt-2">GPT-2</option>
                 <option value="bert-tinny">BERT Tinny</option>
                 <option value="albert-base-v2">ALBERT Base V2</option>
                 <option value="distilbert-base-uncased">DistilBERT</option>
                 <option value="electra-small">ELECTRA Small</option>
                 <option value="t5-small">T5 Small</option>
-                <option value="pythia">Pythia</option>
-                <option value="gpt-neo">GPT-Neo (Slowest)</option>
             </select>
         </div>
-        <div class="chat" id="chat"></div>
-        <div class="input-area">
-            <input type="text" id="message" placeholder="Ketik pesan..." maxlength="200">
-            <button onclick="sendMessage()">Kirim</button>
         </div>
     </div>
     <script>
-        const chat = document.getElementById('chat');
-        const messageInput = document.getElementById('message');
-        const modelSelect = document.getElementById('model');
         function addMessage(content, isUser = false) {
-            const div = document.createElement('div');
-            div.className = `message ${isUser ? 'user' : 'bot'}`;
-            div.textContent = content;
-            chat.appendChild(div);
-            chat.scrollTop = chat.scrollHeight;
         }
         async function sendMessage() {
-            const message = messageInput.value.trim();
             if (!message) return;
-            addMessage(message, true);
-            messageInput.value = '';
-            addMessage('⏳ Thinking...', false);
-            const startTime = Date.now();
             try {
                 const response = await fetch('/chat', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({
-                        message: message,
-                        model: modelSelect.value
-                    })
                 });
                 const data = await response.json();
-                const responseTime = ((Date.now() - startTime) / 1000).toFixed(1);
-                // Remove loading message
-                chat.removeChild(chat.lastElementChild);
                 if (data.status === 'success') {
-                    addMessage(`${data.response} (${responseTime}s)`, false);
                 } else {
-                    addMessage('❌ Error occurred', false);
                 }
             } catch (error) {
-                chat.removeChild(chat.lastElementChild);
-                addMessage('❌ Connection error', false);
             }
         }
-        messageInput.addEventListener('keypress', (e) => {
-            if (e.key === 'Enter') sendMessage();
         });
-        // Show welcome message
-        addMessage('👋 Halo! Pilih model dan mulai chat. Model DistilGPT-2 paling cepat!', false);
     </script>
 </body>
 </html>
     '''
     return HTMLResponse(content=html_content)
-# Optimized chat endpoint
 @app.post("/chat")
-async def chat(request: ChatRequest, background_tasks: BackgroundTasks):
     try:
         model_id = request.model.lower()
         if model_id not in MODELS:
             raise HTTPException(status_code=400, detail="Model tidak tersedia")
-        # Limit message length for faster processing
-        message = request.message[:200]  # Max 200 chars
-        # Run inference
-        result = await run_inference(model_id, message)
-        # Load next priority model in background
-        background_tasks.add_task(preload_next_model, model_id)
-        return {
-            "response": result,
-            "model": MODELS[model_id]["name"],
-            "status": "success"
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Chat error: {e}")
-        raise HTTPException(status_code=500, detail="Terjadi kesalahan")
-async def preload_next_model(current_model: str):
-    """Preload next model in background"""
-    try:
-        # Find next unloaded model by priority
-        loaded_models = set(app.state.pipelines.keys())
-        all_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])
-        for model_id in all_models:
-            if model_id not in loaded_models and model_id not in app.state.loading_models:
-                await load_model_async(model_id)
-                break
     except Exception as e:
-        logger.error(f"Background loading error: {e}")
-# Health check with model status
 @app.get("/health")
 async def health():
-    loaded_models = list(app.state.pipelines.keys())
-    return {
-        "status": "healthy",
-        "gpu": torch.cuda.is_available(),
-        "loaded_models": loaded_models,
-        "loading_models": list(app.state.loading_models)
-    }
-# Model status endpoint
-@app.get("/models")
-async def get_models():
-    models_status = {}
-    for model_id, config in MODELS.items():
-        models_status[model_id] = {
-            "name": config["name"],
-            "loaded": model_id in app.state.pipelines,
-            "loading": model_id in app.state.loading_models,
-            "priority": config["priority"]
-        }
-    return models_status
-# Cleanup on shutdown
-@app.on_event("shutdown")
-async def cleanup():
-    app.state.executor.shutdown(wait=True)
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=port,
-        log_level="info",
-        access_log=False  # Disable access log for better performance
-    )

 import os
 import uvicorn
+from fastapi import FastAPI, HTTPException
 from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
+from transformers import pipeline
 import torch
+from typing import Optional
 # Inisialisasi FastAPI
+app = FastAPI(title="LyonPoy AI Chat")
+# All 11 models configuration
 MODELS = {
     "tinny-llama": {
         "name": "Tinny Llama",
         "model_path": "Lyon28/Tinny-Llama",
+        "task": "text-generation"
     },
+    "pythia": {
+        "name": "Pythia",
+        "model_path": "Lyon28/Pythia",
+        "task": "text-generation"
     },
     "bert-tinny": {
         "name": "BERT Tinny",
         "model_path": "Lyon28/Bert-Tinny",
+        "task": "text-classification"
     },
     "albert-base-v2": {
         "name": "ALBERT Base V2",
         "model_path": "Lyon28/Albert-Base-V2",
+        "task": "text-classification"
     },
     "t5-small": {
         "name": "T5 Small",
         "model_path": "Lyon28/T5-Small",
+        "task": "text2text-generation"
     },
+    "gpt-2": {
+        "name": "GPT-2",
+        "model_path": "Lyon28/GPT-2",
+        "task": "text-generation"
     },
     "gpt-neo": {
         "name": "GPT-Neo",
         "model_path": "Lyon28/GPT-Neo",
+        "task": "text-generation"
+    },
+    "distilbert-base-uncased": {
+        "name": "DistilBERT",
+        "model_path": "Lyon28/Distilbert-Base-Uncased",
+        "task": "text-classification"
+    },
+    "distil-gpt-2": {
+        "name": "DistilGPT-2",
+        "model_path": "Lyon28/Distil_GPT-2",
+        "task": "text-generation"
+    },
+    "gpt-2-tinny": {
+        "name": "GPT-2 Tinny",
+        "model_path": "Lyon28/GPT-2-Tinny",
+        "task": "text-generation"
+    },
+    "electra-small": {
+        "name": "ELECTRA Small",
+        "model_path": "Lyon28/Electra-Small",
+        "task": "text-classification"
     }
 }
 class ChatRequest(BaseModel):
     message: str
+    model: Optional[str] = "gpt-2"
+# Startup
 @app.on_event("startup")
 async def load_models():
+    app.state.pipelines = {}
+    os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
     os.makedirs(os.environ['HF_HOME'], exist_ok=True)
+    print("🤖 LyonPoy AI Chat Ready!")
+# Frontend route
 @app.get("/", response_class=HTMLResponse)
 async def get_frontend():
     html_content = '''
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>LyonPoy AI Chat</title>
     <style>
         * { margin: 0; padding: 0; box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            height: 100vh; display: flex; justify-content: center; align-items: center;
+        }
+        .chat-container {
+            width: 400px; height: 600px; background: #fff; border-radius: 15px;
+            box-shadow: 0 20px 40px rgba(0,0,0,0.15); display: flex; flex-direction: column; overflow: hidden;
+        }
+        .chat-header {
+            background: linear-gradient(135deg, #25d366, #128c7e); color: white;
+            padding: 20px; text-align: center;
+        }
+        .chat-header h1 { font-size: 18px; font-weight: 600; margin-bottom: 8px; }
+        .model-selector {
+            background: rgba(255,255,255,0.2); border: none; color: white;
+            padding: 8px 12px; border-radius: 20px; font-size: 12px; cursor: pointer;
+        }
+        .chat-messages {
+            flex: 1; padding: 20px; overflow-y: auto; background: #f0f0f0;
+            display: flex; flex-direction: column; gap: 15px;
+        }
+        .message {
+            max-width: 80%; padding: 12px 16px; border-radius: 15px;
+            font-size: 14px; line-height: 1.4; animation: slideIn 0.3s ease;
+        }
+        .message.user {
+            background: #25d366; color: white; align-self: flex-end; border-bottom-right-radius: 5px;
+        }
+        .message.bot {
+            background: white; color: #333; align-self: flex-start;
+            border-bottom-left-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);
+        }
+        .message-time { font-size: 11px; opacity: 0.7; margin-top: 5px; }
+        .chat-input-container {
+            padding: 20px; background: white; border-top: 1px solid #e0e0e0;
+            display: flex; gap: 10px; align-items: center;
+        }
+        .chat-input {
+            flex: 1; padding: 12px 16px; border: 1px solid #e0e0e0;
+            border-radius: 25px; font-size: 14px; outline: none;
+        }
+        .chat-input:focus { border-color: #25d366; box-shadow: 0 0 0 2px rgba(37, 211, 102, 0.2); }
+        .send-button {
+            background: #25d366; color: white; border: none; border-radius: 50%;
+            width: 45px; height: 45px; cursor: pointer; display: flex;
+            align-items: center; justify-content: center;
+        }
+        .send-button:hover { background: #128c7e; }
+        .send-button:disabled { background: #ccc; cursor: not-allowed; }
+        .welcome-message {
+            text-align: center; color: #666; font-size: 13px;
+            padding: 20px; border-radius: 10px; background: rgba(255,255,255,0.7);
+        }
+        .typing-indicator {
+            display: none; align-items: center; gap: 5px; padding: 12px 16px;
+            background: white; border-radius: 15px; align-self: flex-start;
+        }
+        .typing-dot {
+            width: 8px; height: 8px; background: #999; border-radius: 50%;
+            animation: typing 1.4s infinite;
+        }
+        .typing-dot:nth-child(2) { animation-delay: 0.2s; }
+        .typing-dot:nth-child(3) { animation-delay: 0.4s; }
+        @keyframes typing { 0%, 60%, 100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
+        @keyframes slideIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
+        @media (max-width: 480px) { .chat-container { width: 100vw; height: 100vh; border-radius: 0; } }
     </style>
 </head>
 <body>
+    <div class="chat-container">
+        <div class="chat-header">
+            <h1>🤖 LyonPoy AI Chat</h1>
+            <select class="model-selector" id="modelSelect">
+                <option value="gpt-2">GPT-2 (General)</option>
                 <option value="tinny-llama">Tinny Llama</option>
+                <option value="pythia">Pythia</option>
+                <option value="gpt-neo">GPT-Neo</option>
+                <option value="distil-gpt-2">DistilGPT-2</option>
+                <option value="gpt-2-tinny">GPT-2 Tinny</option>
                 <option value="bert-tinny">BERT Tinny</option>
                 <option value="albert-base-v2">ALBERT Base V2</option>
                 <option value="distilbert-base-uncased">DistilBERT</option>
                 <option value="electra-small">ELECTRA Small</option>
                 <option value="t5-small">T5 Small</option>
             </select>
         </div>
+        <div class="chat-messages" id="chatMessages">
+            <div class="welcome-message">
+                👋 Halo! Saya LyonPoy AI Assistant.<br>
+                Pilih model di atas dan mulai chat dengan saya!
+            </div>
+        </div>
+        <div class="typing-indicator" id="typingIndicator">
+            <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
+        </div>
+        <div class="chat-input-container">
+            <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan..." maxlength="500">
+            <button class="send-button" id="sendButton">➤</button>
         </div>
     </div>
     <script>
+        const chatMessages = document.getElementById('chatMessages');
+        const chatInput = document.getElementById('chatInput');
+        const sendButton = document.getElementById('sendButton');
+        const modelSelect = document.getElementById('modelSelect');
+        const typingIndicator = document.getElementById('typingIndicator');
+        function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
         function addMessage(content, isUser = false) {
+            const messageDiv = document.createElement('div');
+            messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
+            const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
+            messageDiv.innerHTML = `${content}<div class="message-time">${time}</div>`;
+            chatMessages.appendChild(messageDiv);
+            scrollToBottom();
         }
+        function showTyping() { typingIndicator.style.display = 'flex'; scrollToBottom(); }
+        function hideTyping() { typingIndicator.style.display = 'none'; }
         async function sendMessage() {
+            const message = chatInput.value.trim();
             if (!message) return;
+            chatInput.disabled = true; sendButton.disabled = true;
+            addMessage(message, true); chatInput.value = ''; showTyping();
             try {
                 const response = await fetch('/chat', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ message: message, model: modelSelect.value })
                 });
                 const data = await response.json();
+                hideTyping();
                 if (data.status === 'success') {
+                    addMessage(data.response);
                 } else {
+                    addMessage('❌ Maaf, terjadi kesalahan. Coba lagi nanti.');
                 }
             } catch (error) {
+                hideTyping();
+                addMessage('❌ Tidak dapat terhubung ke server.');
             }
+            chatInput.disabled = false; sendButton.disabled = false; chatInput.focus();
         }
+        sendButton.addEventListener('click', sendMessage);
+        chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
+        modelSelect.addEventListener('change', () => {
+            const modelName = modelSelect.options[modelSelect.selectedIndex].text;
+            addMessage(`🔄 Model diubah ke: ${modelName}`);
         });
+        window.addEventListener('load', () => chatInput.focus());
     </script>
 </body>
 </html>
     '''
     return HTMLResponse(content=html_content)
+# Chat API
 @app.post("/chat")
+async def chat(request: ChatRequest):
     try:
         model_id = request.model.lower()
         if model_id not in MODELS:
             raise HTTPException(status_code=400, detail="Model tidak tersedia")
+        model_config = MODELS[model_id]
+        # Load model jika belum ada
+        if model_id not in app.state.pipelines:
+            print(f"⏳ Loading {model_config['name']}...")
+            device = 0 if torch.cuda.is_available() else -1
+            dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+            app.state.pipelines[model_id] = pipeline(
+                task=model_config["task"],
+                model=model_config["model_path"],
+                device=device,
+                torch_dtype=dtype
+            )
+        pipe = app.state.pipelines[model_id]
+        # Process berdasarkan task
+        if model_config["task"] == "text-generation":
+            result = pipe(
+                request.message,
+                max_length=min(len(request.message.split()) + 50, 200),
+                temperature=0.7,
+                do_sample=True,
+                pad_token_id=pipe.tokenizer.eos_token_id
+            )[0]['generated_text']
+            # Clean output
+            if result.startswith(request.message):
+                result = result[len(request.message):].strip()
+        elif model_config["task"] == "text-classification":
+            output = pipe(request.message)[0]
+            result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})"
+        elif model_config["task"] == "text2text-generation":
+            result = pipe(request.message, max_length=150)[0]['generated_text']
+        return {"response": result, "model": model_config["name"], "status": "success"}
     except Exception as e:
+        print(f"❌ Error: {e}")
+        raise HTTPException(status_code=500, detail="Terjadi kesalahan")
+# Health check
 @app.get("/health")
 async def health():
+    return {"status": "healthy", "gpu": torch.cuda.is_available()}
+# Run app
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)