File size: 1,466 Bytes

1ef9911
f50f84a
 
 
1ef9911
 
b4ab8ea
f50f84a
 
 
 
 
 
1ef9911
 
 
f50f84a
 
 
b4ab8ea
 
f50f84a
 
 
1ef9911
b4ab8ea
 
 
 
 
f50f84a
b4ab8ea
f50f84a
b4ab8ea
f50f84a
9066e80

from typing import Dict, List, Any
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

class EndpointHandler():
    def __init__(self, path=""):
        model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
        model = PeftModel.from_pretrained(model, "srmorfi/phi3-mini-med-adapter")
        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
        self.model = model
        self.tokenizer = tokenizer
        self.model.eval()

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Process input data and generate predictions using the model.
        
        Args:
            data (Dict[str, Any]): Input data containing either an "inputs" key
                                  or the input directly in the data dictionary.
        
        Returns:
            List[Dict[str, Any]]: Processed model predictions that will be serialized and returned.
        """
        inputs = data.pop("inputs", data)
        inputs = self.tokenizer(inputs, return_tensors="pt")

        print(inputs)

        with torch.no_grad():
            outputs = self.model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)
            output = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
            predictions = [{"generated_text": output}]
        
            return predictions