Spaces:
Running
on
Zero
Running
on
Zero
Lord-Raven
commited on
Commit
·
fec5fd6
1
Parent(s):
f9883d5
Trying ONNX models on CPU.
Browse files- app.py +6 -12
- requirements.txt +1 -1
app.py
CHANGED
@@ -4,10 +4,9 @@ import gradio
|
|
4 |
import json
|
5 |
import time
|
6 |
from datetime import datetime
|
7 |
-
from transformers import AutoTokenizer, pipeline
|
8 |
from fastapi import FastAPI
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
10 |
-
from optimum.
|
11 |
|
12 |
# CORS Config - This isn't actually working; instead, I am taking a gross approach to origin whitelisting within the service.
|
13 |
app = FastAPI()
|
@@ -25,17 +24,12 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
|
25 |
|
26 |
# "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
|
27 |
|
28 |
-
model_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
|
29 |
-
tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
model_cpu = ORTModelForSequenceClassification.from_pretrained(model_id=model_name_cpu, file_name=file_name)
|
35 |
-
tokenizer_cpu = AutoTokenizer.from_pretrained(model_name_cpu)
|
36 |
-
|
37 |
-
classifier_cpu = pipeline(task="zero-shot-classification", model=model_cpu, tokenizer=tokenizer_cpu)
|
38 |
-
classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
|
39 |
|
40 |
def classify(data_string, request: gradio.Request):
|
41 |
if request:
|
|
|
4 |
import json
|
5 |
import time
|
6 |
from datetime import datetime
|
|
|
7 |
from fastapi import FastAPI
|
8 |
from fastapi.middleware.cors import CORSMiddleware
|
9 |
+
from optimum.pipelines import pipeline
|
10 |
|
11 |
# CORS Config - This isn't actually working; instead, I am taking a gross approach to origin whitelisting within the service.
|
12 |
app = FastAPI()
|
|
|
24 |
|
25 |
# "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
|
26 |
|
27 |
+
# model_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
|
28 |
+
# tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
|
29 |
+
model_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
|
30 |
|
31 |
+
classifier_cpu = pipeline(task="zero-shot-classification", model=model_name)
|
32 |
+
classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, device="cuda:0")
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def classify(data_string, request: gradio.Request):
|
35 |
if request:
|
requirements.txt
CHANGED
@@ -4,5 +4,5 @@ huggingface_hub==0.32.4
|
|
4 |
json5==0.9.25
|
5 |
numpy
|
6 |
uvicorn
|
7 |
-
optimum[exporters,onnxruntime]==1.25.3
|
8 |
transformers==4.51.3
|
|
|
4 |
json5==0.9.25
|
5 |
numpy
|
6 |
uvicorn
|
7 |
+
optimum[pipelines,exporters,onnxruntime]==1.25.3
|
8 |
transformers==4.51.3
|