quickprep / app.py
ragul2607's picture
initialize project structure with Dockerfile, app.py, and requirements.txt
68d3b08
raw
history blame
800 Bytes
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
app = FastAPI()
class PromptRequest(BaseModel):
prompt: str
# Load small LLaMA 3.2B model (or any other compatible)
MODEL_NAME = "TheBloke/Llama-3-OpenOrca-2.2B-GGUF"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
@app.get("/")
def root():
return {"message": "LLaMA 3.2B API for QuizForge is live!"}
@app.post("/generate")
def generate_text(data: PromptRequest):
inputs = tokenizer(data.prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"response": output_text}