Spaces:
Sleeping
Sleeping
from sentence_transformers import ( | |
SentenceTransformer, | |
export_static_quantized_openvino_model, | |
export_dynamic_quantized_onnx_model, | |
) | |
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v1_0_7_8" | |
def export_model(backend="onnx", use_qint8=False): | |
if backend == "openvino": | |
model = SentenceTransformer(MODEL_NAME, backend="openvino") | |
if use_qint8: | |
export_static_quantized_openvino_model( | |
model, | |
quantization_config=None, | |
model_name_or_path=MODEL_NAME, | |
push_to_hub=True, | |
create_pr=True, | |
) | |
else: | |
model.push_to_hub(MODEL_NAME, create_pr=True) | |
elif backend == "onnx": | |
model = SentenceTransformer(MODEL_NAME, backend="onnx") | |
if use_qint8: | |
export_dynamic_quantized_onnx_model( | |
model, "avx512_vnni", MODEL_NAME, push_to_hub=True | |
) | |
else: | |
model.push_to_hub(MODEL_NAME, create_pr=True) | |
else: | |
raise ValueError(f"Invalid backend: {backend}") | |
# Export all combinations | |
BACKEND = "openvino" | |
USE_QINT8 = False | |
print(f"Exporting {BACKEND} model with QINT8={USE_QINT8}") | |
export_model(backend=BACKEND, use_qint8=USE_QINT8) | |
# import tensorrt as trt | |
# print(trt.__version__) | |