nbroad
/

minilm-l6-onnx-hfie

Feature Extraction

text-embeddings-inference

Model card Files Files and versions Community

minilm-l6-onnx-hfie / handler.py

nbroad's picture

Update handler.py

9a6f2f9 about 2 years ago

history blame contribute delete

2.87 kB

	from optimum.onnxruntime import ORTModelForFeatureExtraction
	from transformers import AutoTokenizer, Pipeline
	import torch.nn.functional as F
	import torch
	from typing import Any, Dict, List

	# copied from the model card
	def mean_pooling(model_output, attention_mask):
	token_embeddings = model_output[0] #First element of model_output contains all token embeddings
	input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
	return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


	class SentenceEmbeddingPipeline(Pipeline):
	def _sanitize_parameters(self, **kwargs):
	# we don't have any hyperameters to sanitize
	preprocess_kwargs = {}
	return preprocess_kwargs, {}, {}

	def preprocess(self, inputs):
	encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')
	return encoded_inputs

	def _forward(self, model_inputs):
	outputs = self.model(**model_inputs)
	return {"outputs": outputs, "attention_mask": model_inputs["attention_mask"]}

	def postprocess(self, model_outputs):
	# Perform pooling
	sentence_embeddings = mean_pooling(model_outputs["outputs"], model_outputs['attention_mask'])
	# Normalize embeddings
	sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
	return sentence_embeddings


	class EndpointHandler():
	def __init__(self, path="./"):
	# load the optimized model
	model = ORTModelForFeatureExtraction.from_pretrained(
	path,file_name="model_optimized.onnx",
	# provider="CPUExecutionProvider",
	)
	tokenizer = AutoTokenizer.from_pretrained(path)
	# create inference pipeline

	self.pl = SentenceEmbeddingPipeline(model=model, tokenizer=tokenizer)



	def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
	"""
	Args:
	data (:obj:):
	includes the input data and the parameters for the inference.
	Return:
	A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
	- "label": A string representing what the label/class is. There can be multiple labels.
	- "score": A score between 0 and 1 describing how confident the model is for this label/class.
	"""
	inputs = data.pop("inputs", data)
	parameters = data.pop("parameters", None)

	# pass inputs with all kwargs in data
	if parameters is not None:
	prediction = self.pl(inputs, **parameters)
	else:
	prediction = self.pl(inputs)
	# postprocess the prediction
	return {
	"embeddings": prediction.cpu().tolist()
	}