meisaicheck-api / prepare.py
vumichien's picture
update model v1_0_7_8
4245809
from sentence_transformers import (
SentenceTransformer,
export_static_quantized_openvino_model,
export_dynamic_quantized_onnx_model,
)
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v1_0_7_8"
def export_model(backend="onnx", use_qint8=False):
if backend == "openvino":
model = SentenceTransformer(MODEL_NAME, backend="openvino")
if use_qint8:
export_static_quantized_openvino_model(
model,
quantization_config=None,
model_name_or_path=MODEL_NAME,
push_to_hub=True,
create_pr=True,
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
elif backend == "onnx":
model = SentenceTransformer(MODEL_NAME, backend="onnx")
if use_qint8:
export_dynamic_quantized_onnx_model(
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
else:
raise ValueError(f"Invalid backend: {backend}")
# Export all combinations
BACKEND = "openvino"
USE_QINT8 = False
print(f"Exporting {BACKEND} model with QINT8={USE_QINT8}")
export_model(backend=BACKEND, use_qint8=USE_QINT8)
# import tensorrt as trt
# print(trt.__version__)