howtomakepplragequit commited on
Commit
097de50
·
verified ·
1 Parent(s): 1638d7e

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +15 -16
main.py CHANGED
@@ -1,27 +1,26 @@
1
  import os
 
 
 
 
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
-
6
- # Force Hugging Face cache to a writable dir
7
- os.environ["HF_HOME"] = "/data"
8
 
9
- model_name = "howtomakepplragequit/phi2-lora-instruct"
 
10
 
11
- # Load tokenizer and model
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForCausalLM.from_pretrained(model_name)
14
-
15
- # Create pipeline
16
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
17
 
18
- # FastAPI app setup
19
  app = FastAPI()
20
 
21
  class Prompt(BaseModel):
22
- prompt: str
23
 
24
- @app.post("/generate")
25
- def generate_text(data: Prompt):
26
- output = generator(data.prompt, max_length=200, do_sample=True)[0]["generated_text"]
27
- return {"response": output}
 
 
 
1
  import os
2
+ os.environ["HF_HOME"] = "/tmp"
3
+
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ from peft import PeftModel
6
  from fastapi import FastAPI
7
  from pydantic import BaseModel
 
 
 
 
8
 
9
+ model_name = "microsoft/phi-2"
10
+ adapter_path = "howtomakepplragequit/phi2-lora-instruct"
11
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ base_model = AutoModelForCausalLM.from_pretrained(model_name)
14
+ model = PeftModel.from_pretrained(base_model, adapter_path)
 
 
15
 
 
16
  app = FastAPI()
17
 
18
  class Prompt(BaseModel):
19
+ input: str
20
 
21
+ @app.post("/chat")
22
+ def chat(prompt: Prompt):
23
+ inputs = tokenizer(prompt.input, return_tensors="pt")
24
+ output = model.generate(**inputs, max_new_tokens=50)
25
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
26
+ return {"response": response}