Spaces:

Lyon28
/

AI-Character-Chat

Running

App Files Files Community

Lyon28 commited on Jun 2

Commit

24088e0

verified ·

1 Parent(s): 3d635c7

Update app.py

Browse files

Files changed (1) hide show

app.py +304 -213

app.py CHANGED Viewed

@@ -1,88 +1,220 @@
 import os
 import uvicorn
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import HTMLResponse
-from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
-from transformers import pipeline
 import torch
-from typing import Optional
 # Inisialisasi FastAPI
-app = FastAPI(title="LyonPoy AI Chat")
-# All 11 models configuration
 MODELS = {
     "tinny-llama": {
         "name": "Tinny Llama",
         "model_path": "Lyon28/Tinny-Llama",
-        "task": "text-generation"
     },
-    "pythia": {
-        "name": "Pythia",
-        "model_path": "Lyon28/Pythia",
-        "task": "text-generation"
     },
     "bert-tinny": {
         "name": "BERT Tinny",
         "model_path": "Lyon28/Bert-Tinny",
-        "task": "text-classification"
     },
     "albert-base-v2": {
         "name": "ALBERT Base V2",
         "model_path": "Lyon28/Albert-Base-V2",
-        "task": "text-classification"
     },
     "t5-small": {
         "name": "T5 Small",
         "model_path": "Lyon28/T5-Small",
-        "task": "text2text-generation"
     },
-    "gpt-2": {
-        "name": "GPT-2",
-        "model_path": "Lyon28/GPT-2",
-        "task": "text-generation"
     },
     "gpt-neo": {
         "name": "GPT-Neo",
         "model_path": "Lyon28/GPT-Neo",
-        "task": "text-generation"
-    },
-    "distilbert-base-uncased": {
-        "name": "DistilBERT",
-        "model_path": "Lyon28/Distilbert-Base-Uncased",
-        "task": "text-classification"
-    },
-    "distil-gpt-2": {
-        "name": "DistilGPT-2",
-        "model_path": "Lyon28/Distil_GPT-2",
-        "task": "text-generation"
-    },
-    "gpt-2-tinny": {
-        "name": "GPT-2 Tinny",
-        "model_path": "Lyon28/GPT-2-Tinny",
-        "task": "text-generation"
-    },
-    "electra-small": {
-        "name": "ELECTRA Small",
-        "model_path": "Lyon28/Electra-Small",
-        "task": "text-classification"
     }
 }
 class ChatRequest(BaseModel):
     message: str
-    model: Optional[str] = "gpt-2"
-# Startup
 @app.on_event("startup")
 async def load_models():
-    app.state.pipelines = {}
-    os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
     os.makedirs(os.environ['HF_HOME'], exist_ok=True)
-    print("🤖 LyonPoy AI Chat Ready!")
-# Frontend route
 @app.get("/", response_class=HTMLResponse)
 async def get_frontend():
     html_content = '''
@@ -91,229 +223,188 @@ async def get_frontend():
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>LyonPoy AI Chat</title>
     <style>
         * { margin: 0; padding: 0; box-sizing: border-box; }
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            height: 100vh; display: flex; justify-content: center; align-items: center;
-        }
-        .chat-container {
-            width: 400px; height: 600px; background: #fff; border-radius: 15px;
-            box-shadow: 0 20px 40px rgba(0,0,0,0.15); display: flex; flex-direction: column; overflow: hidden;
-        }
-        .chat-header {
-            background: linear-gradient(135deg, #25d366, #128c7e); color: white;
-            padding: 20px; text-align: center;
-        }
-        .chat-header h1 { font-size: 18px; font-weight: 600; margin-bottom: 8px; }
-        .model-selector {
-            background: rgba(255,255,255,0.2); border: none; color: white;
-            padding: 8px 12px; border-radius: 20px; font-size: 12px; cursor: pointer;
-        }
-        .chat-messages {
-            flex: 1; padding: 20px; overflow-y: auto; background: #f0f0f0;
-            display: flex; flex-direction: column; gap: 15px;
-        }
-        .message {
-            max-width: 80%; padding: 12px 16px; border-radius: 15px;
-            font-size: 14px; line-height: 1.4; animation: slideIn 0.3s ease;
-        }
-        .message.user {
-            background: #25d366; color: white; align-self: flex-end; border-bottom-right-radius: 5px;
-        }
-        .message.bot {
-            background: white; color: #333; align-self: flex-start;
-            border-bottom-left-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);
-        }
-        .message-time { font-size: 11px; opacity: 0.7; margin-top: 5px; }
-        .chat-input-container {
-            padding: 20px; background: white; border-top: 1px solid #e0e0e0;
-            display: flex; gap: 10px; align-items: center;
-        }
-        .chat-input {
-            flex: 1; padding: 12px 16px; border: 1px solid #e0e0e0;
-            border-radius: 25px; font-size: 14px; outline: none;
-        }
-        .chat-input:focus { border-color: #25d366; box-shadow: 0 0 0 2px rgba(37, 211, 102, 0.2); }
-        .send-button {
-            background: #25d366; color: white; border: none; border-radius: 50%;
-            width: 45px; height: 45px; cursor: pointer; display: flex;
-            align-items: center; justify-content: center;
-        }
-        .send-button:hover { background: #128c7e; }
-        .send-button:disabled { background: #ccc; cursor: not-allowed; }
-        .welcome-message {
-            text-align: center; color: #666; font-size: 13px;
-            padding: 20px; border-radius: 10px; background: rgba(255,255,255,0.7);
-        }
-        .typing-indicator {
-            display: none; align-items: center; gap: 5px; padding: 12px 16px;
-            background: white; border-radius: 15px; align-self: flex-start;
-        }
-        .typing-dot {
-            width: 8px; height: 8px; background: #999; border-radius: 50%;
-            animation: typing 1.4s infinite;
-        }
-        .typing-dot:nth-child(2) { animation-delay: 0.2s; }
-        .typing-dot:nth-child(3) { animation-delay: 0.4s; }
-        @keyframes typing { 0%, 60%, 100% { transform: translateY(0); } 30% { transform: translateY(-10px); } }
-        @keyframes slideIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
-        @media (max-width: 480px) { .chat-container { width: 100vw; height: 100vh; border-radius: 0; } }
     </style>
 </head>
 <body>
-    <div class="chat-container">
-        <div class="chat-header">
-            <h1>🤖 LyonPoy AI Chat</h1>
-            <select class="model-selector" id="modelSelect">
-                <option value="gpt-2">GPT-2 (General)</option>
-                <option value="tinny-llama">Tinny Llama</option>
-                <option value="pythia">Pythia</option>
-                <option value="gpt-neo">GPT-Neo</option>
-                <option value="distil-gpt-2">DistilGPT-2</option>
                 <option value="gpt-2-tinny">GPT-2 Tinny</option>
                 <option value="bert-tinny">BERT Tinny</option>
                 <option value="albert-base-v2">ALBERT Base V2</option>
                 <option value="distilbert-base-uncased">DistilBERT</option>
                 <option value="electra-small">ELECTRA Small</option>
                 <option value="t5-small">T5 Small</option>
             </select>
         </div>
-        <div class="chat-messages" id="chatMessages">
-            <div class="welcome-message">
-                👋 Halo! Saya LyonPoy AI Assistant.<br>
-                Pilih model di atas dan mulai chat dengan saya!
-            </div>
-        </div>
-        <div class="typing-indicator" id="typingIndicator">
-            <div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div>
-        </div>
-        <div class="chat-input-container">
-            <input type="text" class="chat-input" id="chatInput" placeholder="Ketik pesan..." maxlength="500">
-            <button class="send-button" id="sendButton">➤</button>
         </div>
     </div>
     <script>
-        const chatMessages = document.getElementById('chatMessages');
-        const chatInput = document.getElementById('chatInput');
-        const sendButton = document.getElementById('sendButton');
-        const modelSelect = document.getElementById('modelSelect');
-        const typingIndicator = document.getElementById('typingIndicator');
-        function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
         function addMessage(content, isUser = false) {
-            const messageDiv = document.createElement('div');
-            messageDiv.className = `message ${isUser ? 'user' : 'bot'}`;
-            const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
-            messageDiv.innerHTML = `${content}<div class="message-time">${time}</div>`;
-            chatMessages.appendChild(messageDiv);
-            scrollToBottom();
         }
-        function showTyping() { typingIndicator.style.display = 'flex'; scrollToBottom(); }
-        function hideTyping() { typingIndicator.style.display = 'none'; }
         async function sendMessage() {
-            const message = chatInput.value.trim();
             if (!message) return;
-            chatInput.disabled = true; sendButton.disabled = true;
-            addMessage(message, true); chatInput.value = ''; showTyping();
             try {
                 const response = await fetch('/chat', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ message: message, model: modelSelect.value })
                 });
                 const data = await response.json();
-                hideTyping();
                 if (data.status === 'success') {
-                    addMessage(data.response);
                 } else {
-                    addMessage('❌ Maaf, terjadi kesalahan. Coba lagi nanti.');
                 }
             } catch (error) {
-                hideTyping();
-                addMessage('❌ Tidak dapat terhubung ke server.');
             }
-            chatInput.disabled = false; sendButton.disabled = false; chatInput.focus();
         }
-        sendButton.addEventListener('click', sendMessage);
-        chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
-        modelSelect.addEventListener('change', () => {
-            const modelName = modelSelect.options[modelSelect.selectedIndex].text;
-            addMessage(`🔄 Model diubah ke: ${modelName}`);
         });
-        window.addEventListener('load', () => chatInput.focus());
     </script>
 </body>
 </html>
     '''
     return HTMLResponse(content=html_content)
-# Chat API
 @app.post("/chat")
-async def chat(request: ChatRequest):
     try:
         model_id = request.model.lower()
         if model_id not in MODELS:
             raise HTTPException(status_code=400, detail="Model tidak tersedia")
-        model_config = MODELS[model_id]
-        # Load model jika belum ada
-        if model_id not in app.state.pipelines:
-            print(f"⏳ Loading {model_config['name']}...")
-            device = 0 if torch.cuda.is_available() else -1
-            dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-            app.state.pipelines[model_id] = pipeline(
-                task=model_config["task"],
-                model=model_config["model_path"],
-                device=device,
-                torch_dtype=dtype
-            )
-        pipe = app.state.pipelines[model_id]
-        # Process berdasarkan task
-        if model_config["task"] == "text-generation":
-            result = pipe(
-                request.message,
-                max_length=min(len(request.message.split()) + 50, 200),
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=pipe.tokenizer.eos_token_id
-            )[0]['generated_text']
-            # Clean output
-            if result.startswith(request.message):
-                result = result[len(request.message):].strip()
-        elif model_config["task"] == "text-classification":
-            output = pipe(request.message)[0]
-            result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})"
-        elif model_config["task"] == "text2text-generation":
-            result = pipe(request.message, max_length=150)[0]['generated_text']
-        return {"response": result, "model": model_config["name"], "status": "success"}
     except Exception as e:
-        print(f"❌ Error: {e}")
         raise HTTPException(status_code=500, detail="Terjadi kesalahan")
-# Health check
 @app.get("/health")
 async def health():
-    return {"status": "healthy", "gpu": torch.cuda.is_available()}
-# Run app
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    uvicorn.run(app, host="0.0.0.0", port=port)

 import os
 import uvicorn
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
+from typing import Optional, Dict
+import time
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Inisialisasi FastAPI
+app = FastAPI(title="LyonPoy AI Chat - Optimized")
+# Optimized model configuration - prioritize smaller, faster models
 MODELS = {
+    "distil-gpt-2": {
+        "name": "DistilGPT-2",
+        "model_path": "Lyon28/Distil_GPT-2",
+        "task": "text-generation",
+        "priority": 1  # Highest priority - smallest model
+    },
+    "gpt-2-tinny": {
+        "name": "GPT-2 Tinny",
+        "model_path": "Lyon28/GPT-2-Tinny",
+        "task": "text-generation",
+        "priority": 2
+    },
     "tinny-llama": {
         "name": "Tinny Llama",
         "model_path": "Lyon28/Tinny-Llama",
+        "task": "text-generation",
+        "priority": 3
     },
+    "gpt-2": {
+        "name": "GPT-2",
+        "model_path": "Lyon28/GPT-2",
+        "task": "text-generation",
+        "priority": 4
     },
     "bert-tinny": {
         "name": "BERT Tinny",
         "model_path": "Lyon28/Bert-Tinny",
+        "task": "text-classification",
+        "priority": 5
     },
     "albert-base-v2": {
         "name": "ALBERT Base V2",
         "model_path": "Lyon28/Albert-Base-V2",
+        "task": "text-classification",
+        "priority": 6
+    },
+    "distilbert-base-uncased": {
+        "name": "DistilBERT",
+        "model_path": "Lyon28/Distilbert-Base-Uncased",
+        "task": "text-classification",
+        "priority": 7
+    },
+    "electra-small": {
+        "name": "ELECTRA Small",
+        "model_path": "Lyon28/Electra-Small",
+        "task": "text-classification",
+        "priority": 8
     },
     "t5-small": {
         "name": "T5 Small",
         "model_path": "Lyon28/T5-Small",
+        "task": "text2text-generation",
+        "priority": 9
     },
+    "pythia": {
+        "name": "Pythia",
+        "model_path": "Lyon28/Pythia",
+        "task": "text-generation",
+        "priority": 10
     },
     "gpt-neo": {
         "name": "GPT-Neo",
         "model_path": "Lyon28/GPT-Neo",
+        "task": "text-generation",
+        "priority": 11  # Largest model - lowest priority
     }
 }
 class ChatRequest(BaseModel):
     message: str
+    model: Optional[str] = "distil-gpt-2"  # Default to fastest model
+# Global state
+app.state.pipelines = {}
+app.state.loading_models = set()
+app.state.executor = ThreadPoolExecutor(max_workers=2)
+# Optimized model loading
+async def load_model_async(model_id: str):
+    """Load model in background thread"""
+    if model_id in app.state.loading_models:
+        return False
+    app.state.loading_models.add(model_id)
+    try:
+        model_config = MODELS[model_id]
+        logger.info(f"🔄 Loading {model_config['name']}...")
+        # Load in thread to avoid blocking
+        loop = asyncio.get_event_loop()
+        def load_model():
+            device = 0 if torch.cuda.is_available() else -1
+            dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+            return pipeline(
+                task=model_config["task"],
+                model=model_config["model_path"],
+                device=device,
+                torch_dtype=dtype,
+                use_fast=True,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                # Optimization for faster inference
+                pad_token_id=50256 if "gpt" in model_id else None
+            )
+        pipeline_obj = await loop.run_in_executor(app.state.executor, load_model)
+        app.state.pipelines[model_id] = pipeline_obj
+        logger.info(f"✅ {model_config['name']} loaded successfully")
+        return True
+    except Exception as e:
+        logger.error(f"❌ Failed to load {model_id}: {e}")
+        return False
+    finally:
+        app.state.loading_models.discard(model_id)
 @app.on_event("startup")
 async def load_models():
+    """Load high-priority models on startup"""
+    os.environ['HF_HOME'] = './cache/huggingface'  # Persistent cache
     os.makedirs(os.environ['HF_HOME'], exist_ok=True)
+    # Pre-load top 3 fastest models
+    priority_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])[:3]
+    tasks = []
+    for model_id in priority_models:
+        task = asyncio.create_task(load_model_async(model_id))
+        tasks.append(task)
+    # Load models concurrently
+    await asyncio.gather(*tasks, return_exceptions=True)
+    logger.info("🚀 LyonPoy AI Chat Ready!")
+# Optimized inference
+async def run_inference(model_id: str, message: str):
+    """Run inference in background thread"""
+    if model_id not in app.state.pipelines:
+        # Try to load model if not available
+        success = await load_model_async(model_id)
+        if not success:
+            raise HTTPException(status_code=503, detail=f"Model {model_id} unavailable")
+    pipe = app.state.pipelines[model_id]
+    model_config = MODELS[model_id]
+    loop = asyncio.get_event_loop()
+    def inference():
+        start_time = time.time()
+        try:
+            if model_config["task"] == "text-generation":
+                # Optimized generation parameters
+                result = pipe(
+                    message,
+                    max_new_tokens=min(50, 150 - len(message.split())),  # Shorter responses
+                    temperature=0.7,
+                    do_sample=True,
+                    top_p=0.9,
+                    top_k=50,
+                    repetition_penalty=1.1,
+                    pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256
+                )[0]['generated_text']
+                # Clean output
+                if result.startswith(message):
+                    result = result[len(message):].strip()
+                # Limit response length
+                if len(result) > 200:
+                    result = result[:200] + "..."
+            elif model_config["task"] == "text-classification":
+                output = pipe(message)[0]
+                result = f"Analisis: {output['label']} (Keyakinan: {output['score']:.2f})"
+            elif model_config["task"] == "text2text-generation":
+                result = pipe(message, max_length=100, num_beams=2)[0]['generated_text']
+            inference_time = time.time() - start_time
+            logger.info(f"⚡ Inference time: {inference_time:.2f}s for {model_config['name']}")
+            return result
+        except Exception as e:
+            logger.error(f"Inference error: {e}")
+            raise e
+    return await loop.run_in_executor(app.state.executor, inference)
+# Frontend route - simplified HTML
 @app.get("/", response_class=HTMLResponse)
 async def get_frontend():
     html_content = '''
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>LyonPoy AI Chat - Fast Mode</title>
     <style>
         * { margin: 0; padding: 0; box-sizing: border-box; }
+        body { font-family: system-ui; background: #f5f5f5; padding: 20px; }
+        .container { max-width: 600px; margin: 0 auto; background: white; border-radius: 10px; overflow: hidden; }
+        .header { background: #007bff; color: white; padding: 15px; }
+        .chat { height: 400px; overflow-y: auto; padding: 15px; background: #fafafa; }
+        .message { margin: 10px 0; padding: 8px 12px; border-radius: 8px; }
+        .user { background: #007bff; color: white; margin-left: 20%; }
+        .bot { background: white; border: 1px solid #ddd; margin-right: 20%; }
+        .input-area { padding: 15px; display: flex; gap: 10px; }
+        input { flex: 1; padding: 10px; border: 1px solid #ddd; border-radius: 5px; }
+        button { padding: 10px 15px; background: #007bff; color: white; border: none; border-radius: 5px; cursor: pointer; }
+        select { padding: 5px; margin-left: 10px; }
+        .loading { color: #666; font-style: italic; }
     </style>
 </head>
 <body>
+    <div class="container">
+        <div class="header">
+            <h1>🚀 LyonPoy AI - Fast Mode</h1>
+            <select id="model">
+                <option value="distil-gpt-2">DistilGPT-2 (Fastest)</option>
                 <option value="gpt-2-tinny">GPT-2 Tinny</option>
+                <option value="tinny-llama">Tinny Llama</option>
+                <option value="gpt-2">GPT-2</option>
                 <option value="bert-tinny">BERT Tinny</option>
                 <option value="albert-base-v2">ALBERT Base V2</option>
                 <option value="distilbert-base-uncased">DistilBERT</option>
                 <option value="electra-small">ELECTRA Small</option>
                 <option value="t5-small">T5 Small</option>
+                <option value="pythia">Pythia</option>
+                <option value="gpt-neo">GPT-Neo (Slowest)</option>
             </select>
         </div>
+        <div class="chat" id="chat"></div>
+        <div class="input-area">
+            <input type="text" id="message" placeholder="Ketik pesan..." maxlength="200">
+            <button onclick="sendMessage()">Kirim</button>
         </div>
     </div>
     <script>
+        const chat = document.getElementById('chat');
+        const messageInput = document.getElementById('message');
+        const modelSelect = document.getElementById('model');
         function addMessage(content, isUser = false) {
+            const div = document.createElement('div');
+            div.className = `message ${isUser ? 'user' : 'bot'}`;
+            div.textContent = content;
+            chat.appendChild(div);
+            chat.scrollTop = chat.scrollHeight;
         }
         async function sendMessage() {
+            const message = messageInput.value.trim();
             if (!message) return;
+            addMessage(message, true);
+            messageInput.value = '';
+            addMessage('⏳ Thinking...', false);
+            const startTime = Date.now();
             try {
                 const response = await fetch('/chat', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        message: message,
+                        model: modelSelect.value
+                    })
                 });
                 const data = await response.json();
+                const responseTime = ((Date.now() - startTime) / 1000).toFixed(1);
+                // Remove loading message
+                chat.removeChild(chat.lastElementChild);
                 if (data.status === 'success') {
+                    addMessage(`${data.response} (${responseTime}s)`, false);
                 } else {
+                    addMessage('❌ Error occurred', false);
                 }
             } catch (error) {
+                chat.removeChild(chat.lastElementChild);
+                addMessage('❌ Connection error', false);
             }
         }
+        messageInput.addEventListener('keypress', (e) => {
+            if (e.key === 'Enter') sendMessage();
         });
+        // Show welcome message
+        addMessage('👋 Halo! Pilih model dan mulai chat. Model DistilGPT-2 paling cepat!', false);
     </script>
 </body>
 </html>
     '''
     return HTMLResponse(content=html_content)
+# Optimized chat endpoint
 @app.post("/chat")
+async def chat(request: ChatRequest, background_tasks: BackgroundTasks):
     try:
         model_id = request.model.lower()
         if model_id not in MODELS:
             raise HTTPException(status_code=400, detail="Model tidak tersedia")
+        # Limit message length for faster processing
+        message = request.message[:200]  # Max 200 chars
+        # Run inference
+        result = await run_inference(model_id, message)
+        # Load next priority model in background
+        background_tasks.add_task(preload_next_model, model_id)
+        return {
+            "response": result,
+            "model": MODELS[model_id]["name"],
+            "status": "success"
+        }
+    except HTTPException:
+        raise
     except Exception as e:
+        logger.error(f"Chat error: {e}")
         raise HTTPException(status_code=500, detail="Terjadi kesalahan")
+async def preload_next_model(current_model: str):
+    """Preload next model in background"""
+    try:
+        # Find next unloaded model by priority
+        loaded_models = set(app.state.pipelines.keys())
+        all_models = sorted(MODELS.keys(), key=lambda x: MODELS[x]['priority'])
+        for model_id in all_models:
+            if model_id not in loaded_models and model_id not in app.state.loading_models:
+                await load_model_async(model_id)
+                break
+    except Exception as e:
+        logger.error(f"Background loading error: {e}")
+# Health check with model status
 @app.get("/health")
 async def health():
+    loaded_models = list(app.state.pipelines.keys())
+    return {
+        "status": "healthy",
+        "gpu": torch.cuda.is_available(),
+        "loaded_models": loaded_models,
+        "loading_models": list(app.state.loading_models)
+    }
+# Model status endpoint
+@app.get("/models")
+async def get_models():
+    models_status = {}
+    for model_id, config in MODELS.items():
+        models_status[model_id] = {
+            "name": config["name"],
+            "loaded": model_id in app.state.pipelines,
+            "loading": model_id in app.state.loading_models,
+            "priority": config["priority"]
+        }
+    return models_status
+# Cleanup on shutdown
+@app.on_event("shutdown")
+async def cleanup():
+    app.state.executor.shutdown(wait=True)
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=port,
+        log_level="info",
+        access_log=False  # Disable access log for better performance
+    )