Lord-Raven commited on
Commit
5deb965
·
1 Parent(s): 402f3c1

Trying ONNX models on CPU.

Browse files
Files changed (2) hide show
  1. app.py +8 -4
  2. requirements.txt +1 -1
app.py CHANGED
@@ -5,7 +5,8 @@ import json
5
  import onnxruntime
6
  import time
7
  from datetime import datetime
8
- from transformers import pipeline
 
9
  from fastapi import FastAPI
10
  from fastapi.middleware.cors import CORSMiddleware
11
 
@@ -24,10 +25,13 @@ print(f"Is CUDA available: {torch.cuda.is_available()}")
24
  if torch.cuda.is_available():
25
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
26
 
27
- model_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
28
- tokenizer_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
29
 
30
- classifier_cpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name)
 
 
 
31
  classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0") if torch.cuda.is_available() else classifier_cpu
32
 
33
  def classify(data_string, request: gradio.Request):
 
5
  import onnxruntime
6
  import time
7
  from datetime import datetime
8
+ from transformers import AutoTokenizer, pipeline
9
+ from optimum.onnxruntime import ORTModelForSequenceClassification
10
  from fastapi import FastAPI
11
  from fastapi.middleware.cors import CORSMiddleware
12
 
 
25
  if torch.cuda.is_available():
26
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
27
 
28
+ model_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
29
+ tokenizer_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
30
 
31
+ model_cpu = ORTModelForSequenceClassification.from_pretrained(model_id=model_name, subfolder="onnx", file_name="model_uint8.onnx")
32
+ tokenizer_cpu = AutoTokenizer.from_pretrained(tokenizer_name)
33
+
34
+ classifier_cpu = pipeline(task="zero-shot-classification", model=model_cpu, tokenizer=tokenizer_cpu)
35
  classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0") if torch.cuda.is_available() else classifier_cpu
36
 
37
  def classify(data_string, request: gradio.Request):
requirements.txt CHANGED
@@ -5,4 +5,4 @@ json5==0.9.25
5
  numpy
6
  uvicorn
7
  optimum[onnxruntime-gpu]==1.24.0
8
- transformers==4.36
 
5
  numpy
6
  uvicorn
7
  optimum[onnxruntime-gpu]==1.24.0
8
+ transformers==4.48.0