Spaces:
Sleeping
Sleeping
import os | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from langchain_ollama import ChatOllama | |
from langchain.schema import StrOutputParser | |
from langchain.prompts import ChatPromptTemplate | |
import logging | |
from functools import lru_cache | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
app = FastAPI() | |
MODEL_NAME = 'phi3:mini' | |
def get_llm(): | |
return ChatOllama(model=MODEL_NAME) | |
def get_chain(): | |
llm = get_llm() | |
prompt = ChatPromptTemplate.from_template("Question: {question}\n\nAnswer:") | |
return prompt | llm | StrOutputParser() | |
class Question(BaseModel): | |
text: str | |
def read_root(): | |
return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"} | |
async def ask_question(question: Question): | |
try: | |
logger.info(f"Received question: {question.text}") | |
chain = get_chain() | |
response = chain.invoke({"question": question.text}) | |
logger.info("Response generated successfully") | |
return {"answer": response} | |
except Exception as e: | |
logger.error(f"Error in /ask endpoint: {str(e)}") | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def startup_event(): | |
logger.info(f"Starting up with model: {MODEL_NAME}") | |
# Warm up the cache | |
get_chain() | |
async def shutdown_event(): | |
logger.info("Shutting down") |