GodSaveMoney / core /make_pipeline.py
Jeong-hun Kim
add "start with localhost", code refactored
aab927d
raw
history blame
2.27 kB
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import os
class MakePipeline:
# ๋ชจ๋ธ๋ช…
MODEL_ID = "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-1.5B"
# ๋ณ€์ˆ˜์ดˆ๊ธฐํ™”
# model_id
# tokenizer
# llm
def __init__(self, model_id: str = MODEL_ID):
print("[torch] is available:", torch.cuda.is_available())
print("[device] default:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
self.model_id = model_id
self.tokenizer = None
self.llm = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ๋ชจ๋ธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
def build(self, type: str):
if(type == 'hf'):
# ํ—ˆ๊น… ํŽ˜์ด์Šค secret์— ๋“ฑ๋ก๋œ ํ† ํฐ ๋กœ๋“œ
access_token = os.environ.get("HF_TOKEN")
else:
# ๋กœ์ปฌ ์‹คํ–‰์‹œ token.txt์—์„œ ํ† ํฐ ๋กœ๋“œ
with open("token.txt", "r") as f:
access_token = f.read().strip()
tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=access_token)
model = AutoModelForCausalLM.from_pretrained(self.model_id, token=access_token)
self.tokenizer = tokenizer
# ํ—ˆ๊น… ํŽ˜์ด์Šค ์—…๋กœ๋“œ ์‹œ f16 ์‚ฌ์šฉ ์•ˆ ํ•จ
if(type == 'hf'):
llm = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
else:
model.eval()
llm = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float16
)
if torch.cuda.is_available():
model.to("cuda")
self.llm = llm
# ๋ชจ๋ธ ์ถœ๋ ฅ ์ƒ์„ฑ ํ•จ์ˆ˜
def character_chat(self, prompt):
print("[debug] generating...")
outputs = self.llm(
prompt,
do_sample=True,
max_new_tokens=96,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.05,
eos_token_id=self.tokenizer.eos_token_id,
return_full_text=True
)
full_text = outputs[0]['generated_text']
return full_text