Upload 2 files
Browse files- DOCKERFILE +11 -0
- main.py +22 -0
DOCKERFILE
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
4 |
+
|
5 |
+
RUN pip install --upgrade pip
|
6 |
+
RUN pip install torch transformers fastapi uvicorn bitsandbytes accelerate
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
COPY main.py .
|
10 |
+
|
11 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
3 |
+
import torch
|
4 |
+
|
5 |
+
app = FastAPI()
|
6 |
+
|
7 |
+
model_name = "howtomakepplragequit/phi2-lora-instruct"
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
+
model = AutoModelForCausalLM.from_pretrained(
|
10 |
+
model_name,
|
11 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
12 |
+
device_map="auto"
|
13 |
+
)
|
14 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
15 |
+
|
16 |
+
@app.post("/generate")
|
17 |
+
async def generate(request: Request):
|
18 |
+
data = await request.json()
|
19 |
+
prompt = data.get("prompt", "")
|
20 |
+
formatted = f"### Instruction:\n{prompt}\n\n### Response:\n"
|
21 |
+
result = pipe(formatted, max_new_tokens=200)[0]["generated_text"]
|
22 |
+
return {"response": result.split("### Response:")[-1].strip()}
|