Spaces:
Sleeping
Sleeping
File size: 1,472 Bytes
55567e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser
from langchain.prompts import ChatPromptTemplate
import logging
from functools import lru_cache
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
MODEL_NAME = 'tinyllama'
@lru_cache()
def get_llm():
return ChatOllama(model=MODEL_NAME)
@lru_cache()
def get_chain():
llm = get_llm()
prompt = ChatPromptTemplate.from_template("Question: {question}\n\nAnswer:")
return prompt | llm | StrOutputParser()
class Question(BaseModel):
text: str
@app.get("/")
def read_root():
return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}
@app.post("/ask")
async def ask_question(question: Question):
try:
logger.info(f"Received question: {question.text}")
chain = get_chain()
response = chain.invoke({"question": question.text})
logger.info("Response generated successfully")
return {"answer": response}
except Exception as e:
logger.error(f"Error in /ask endpoint: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.on_event("startup")
async def startup_event():
logger.info(f"Starting up with model: {MODEL_NAME}")
# Warm up the cache
get_chain()
@app.on_event("shutdown")
async def shutdown_event():
logger.info("Shutting down") |