import os import uvicorn from fastapi import FastAPI, HTTPException from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from transformers import pipeline import torch from typing import Optional # Inisialisasi FastAPI app = FastAPI(title="LyonPoy AI Chat") # All 11 models configuration MODELS = { "tinny-llama": { "name": "Tinny Llama", "model_path": "Lyon28/Tinny-Llama", "task": "text-generation" }, "pythia": { "name": "Pythia", "model_path": "Lyon28/Pythia", "task": "text-generation" }, "bert-tinny": { "name": "BERT Tinny", "model_path": "Lyon28/Bert-Tinny", "task": "text-classification" }, "albert-base-v2": { "name": "ALBERT Base V2", "model_path": "Lyon28/Albert-Base-V2", "task": "text-classification" }, "t5-small": { "name": "T5 Small", "model_path": "Lyon28/T5-Small", "task": "text2text-generation" }, "gpt-2": { "name": "GPT-2", "model_path": "Lyon28/GPT-2", "task": "text-generation" }, "gpt-neo": { "name": "GPT-Neo", "model_path": "Lyon28/GPT-Neo", "task": "text-generation" }, "distilbert-base-uncased": { "name": "DistilBERT", "model_path": "Lyon28/Distilbert-Base-Uncased", "task": "text-classification" }, "distil-gpt-2": { "name": "DistilGPT-2", "model_path": "Lyon28/Distil_GPT-2", "task": "text-generation" }, "gpt-2-tinny": { "name": "GPT-2 Tinny", "model_path": "Lyon28/GPT-2-Tinny", "task": "text-generation" }, "electra-small": { "name": "ELECTRA Small", "model_path": "Lyon28/Electra-Small", "task": "text-classification" } } class ChatRequest(BaseModel): message: str model: Optional[str] = "gpt-2" # Startup @app.on_event("startup") async def load_models(): app.state.pipelines = {} os.environ['HF_HOME'] = '/tmp/.cache/huggingface' os.makedirs(os.environ['HF_HOME'], exist_ok=True) print("🤖 LyonPoy AI Chat Ready!") # Frontend route @app.get("/", response_class=HTMLResponse) async def get_frontend(): html_content = ''' LyonPoy AI Chat

🤖 LyonPoy AI Chat

👋 Halo! Saya LyonPoy AI Assistant.
Pilih model di atas dan mulai chat dengan saya!

''' return HTMLResponse(content=html_content) # Chat API @app.post("/chat") async def chat(request: ChatRequest): try: model_id = request.model.lower() if model_id not in MODELS: raise HTTPException(status_code=400, detail="Model tidak tersedia") model_config = MODELS[model_id] # Load model jika belum ada if model_id not in app.state.pipelines: print(f"⏳ Loading {model_config['name']}...") device = 0 if torch.cuda.is_available() else -1 dtype = torch.float16 if torch.cuda.is_available() else torch.float32 app.state.pipelines[model_id] = pipeline( task=model_config["task"], model=model_config["model_path"], device=device, torch_dtype=dtype ) pipe = app.state.pipelines[model_id] # Process berdasarkan task if model_config["task"] == "text-generation": result = pipe( request.message, max_length=min(len(request.message.split()) + 50, 200), temperature=0.7, do_sample=True, pad_token_id=pipe.tokenizer.eos_token_id )[0]['generated_text'] # Clean output if result.startswith(request.message): result = result[len(request.message):].strip() elif model_config["task"] == "text-classification": output = pipe(request.message)[0] result = f"Sentimen: {output['label']} (Confidence: {output['score']:.2f})" elif model_config["task"] == "text2text-generation": result = pipe(request.message, max_length=150)[0]['generated_text'] return {"response": result, "model": model_config["name"], "status": "success"} except Exception as e: print(f"❌ Error: {e}") raise HTTPException(status_code=500, detail="Terjadi kesalahan") # Health check @app.get("/health") async def health(): return {"status": "healthy", "gpu": torch.cuda.is_available()} # Run app if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) uvicorn.run(app, host="0.0.0.0", port=port)