Morgan Funtowicz
commited on
Commit
·
5460614
1
Parent(s):
159c6be
feat(http): move from openai only to http frontend package
Browse files- handler.py +6 -7
handler.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
import platform
|
2 |
-
from functools import reduce
|
3 |
from operator import itemgetter
|
4 |
from typing import Generator, Tuple
|
5 |
|
6 |
import torch
|
7 |
-
from hfendpoints.
|
8 |
-
from hfendpoints.
|
9 |
from intel_extension_for_pytorch.cpu.runtime import pin
|
10 |
from loguru import logger
|
11 |
-
from hfendpoints import EndpointConfig, Handler, __version__
|
12 |
from sentence_transformers import SentenceTransformer
|
13 |
-
from torch.nn import Module
|
14 |
from torch.backends.mkldnn import VERBOSE_ON_CREATION, VERBOSE_OFF
|
|
|
|
|
|
|
15 |
|
16 |
# Not used for now
|
17 |
SUPPORTED_AMP_DTYPES = {torch.float32, torch.bfloat16}
|
@@ -59,7 +59,7 @@ def get_usage(mask: torch.IntTensor) -> Usage:
|
|
59 |
|
60 |
|
61 |
class SentenceTransformerWithUsage(Module):
|
62 |
-
__slots__ = ("_model",
|
63 |
|
64 |
def __init__(self, model: SentenceTransformer):
|
65 |
super().__init__()
|
@@ -88,7 +88,6 @@ class SentenceTransformerHandler(Handler):
|
|
88 |
dtype = torch.bfloat16 if has_bf16_support() else torch.float32
|
89 |
model = SentenceTransformer(self._config.model_id, device="cpu", model_kwargs={"torch_dtype": dtype})
|
90 |
|
91 |
-
|
92 |
if platform.machine() == "x86_64":
|
93 |
import intel_extension_for_pytorch as ipex
|
94 |
logger.info(f"x64 platform detected: {platform.processor()}")
|
|
|
1 |
import platform
|
|
|
2 |
from operator import itemgetter
|
3 |
from typing import Generator, Tuple
|
4 |
|
5 |
import torch
|
6 |
+
from hfendpoints.http import Context, run
|
7 |
+
from hfendpoints.http.embeddings import Embedding, EmbeddingEndpoint, EmbeddingRequest, EmbeddingResponse, Usage
|
8 |
from intel_extension_for_pytorch.cpu.runtime import pin
|
9 |
from loguru import logger
|
|
|
10 |
from sentence_transformers import SentenceTransformer
|
|
|
11 |
from torch.backends.mkldnn import VERBOSE_ON_CREATION, VERBOSE_OFF
|
12 |
+
from torch.nn import Module
|
13 |
+
|
14 |
+
from hfendpoints import EndpointConfig, Handler, __version__
|
15 |
|
16 |
# Not used for now
|
17 |
SUPPORTED_AMP_DTYPES = {torch.float32, torch.bfloat16}
|
|
|
59 |
|
60 |
|
61 |
class SentenceTransformerWithUsage(Module):
|
62 |
+
__slots__ = ("_model",)
|
63 |
|
64 |
def __init__(self, model: SentenceTransformer):
|
65 |
super().__init__()
|
|
|
88 |
dtype = torch.bfloat16 if has_bf16_support() else torch.float32
|
89 |
model = SentenceTransformer(self._config.model_id, device="cpu", model_kwargs={"torch_dtype": dtype})
|
90 |
|
|
|
91 |
if platform.machine() == "x86_64":
|
92 |
import intel_extension_for_pytorch as ipex
|
93 |
logger.info(f"x64 platform detected: {platform.processor()}")
|