howtomakepplragequit commited on
Commit
ac58efe
·
verified ·
1 Parent(s): e015985

Upload 2 files

Browse files
Files changed (2) hide show
  1. DOCKERFILE +11 -0
  2. main.py +22 -0
DOCKERFILE ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
4
+
5
+ RUN pip install --upgrade pip
6
+ RUN pip install torch transformers fastapi uvicorn bitsandbytes accelerate
7
+
8
+ WORKDIR /app
9
+ COPY main.py .
10
+
11
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
+ import torch
4
+
5
+ app = FastAPI()
6
+
7
+ model_name = "howtomakepplragequit/phi2-lora-instruct"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
+ device_map="auto"
13
+ )
14
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
15
+
16
+ @app.post("/generate")
17
+ async def generate(request: Request):
18
+ data = await request.json()
19
+ prompt = data.get("prompt", "")
20
+ formatted = f"### Instruction:\n{prompt}\n\n### Response:\n"
21
+ result = pipe(formatted, max_new_tokens=200)[0]["generated_text"]
22
+ return {"response": result.split("### Response:")[-1].strip()}