from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import os

class MakePipeline:
    # 모델명
    MODEL_ID = "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-1.5B"
    
    # 변수초기화
    # model_id
    # tokenizer
    # llm
    def __init__(self, model_id: str = MODEL_ID):
        print("[torch] is available:", torch.cuda.is_available())
        print("[device] default:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        self.model_id = model_id
        self.tokenizer = None
        self.llm = None
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 모델 불러오기
    def build(self, type: str):
        if(type == 'hf'):
            # 허깅 페이스 secret에 등록된 토큰 로드
            access_token = os.environ.get("HF_TOKEN")
        else:
            # 로컬 실행시 token.txt에서 토큰 로드
            with open("token.txt", "r") as f:
                access_token = f.read().strip()

        tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=access_token)
        model = AutoModelForCausalLM.from_pretrained(self.model_id, token=access_token)
        self.tokenizer = tokenizer

        # 허깅 페이스 업로드 시 f16 사용 안 함
        if(type == 'hf'):
            llm = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
            )

        else:
            model.eval()
            llm = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                torch_dtype=torch.float16
            )
            if torch.cuda.is_available():
                model.to("cuda")

        self.llm = llm
    
    # 모델 출력 생성 함수
    def character_chat(self, prompt):
        print("[debug] generating...")
        outputs = self.llm(
            prompt,
            do_sample=True,
            max_new_tokens=96,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.05,
            eos_token_id=self.tokenizer.eos_token_id,
            return_full_text=True
        )
        full_text = outputs[0]['generated_text']
        return full_text