howtomakepplragequit's picture
Upload 2 files
ac58efe verified
raw
history blame
836 Bytes
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
app = FastAPI()
model_name = "howtomakepplragequit/phi2-lora-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
@app.post("/generate")
async def generate(request: Request):
data = await request.json()
prompt = data.get("prompt", "")
formatted = f"### Instruction:\n{prompt}\n\n### Response:\n"
result = pipe(formatted, max_new_tokens=200)[0]["generated_text"]
return {"response": result.split("### Response:")[-1].strip()}