Lord-Raven commited on
Commit
ff70002
·
1 Parent(s): 5deb965

Trying ONNX models on CPU.

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -5,8 +5,7 @@ import json
5
  import onnxruntime
6
  import time
7
  from datetime import datetime
8
- from transformers import AutoTokenizer, pipeline
9
- from optimum.onnxruntime import ORTModelForSequenceClassification
10
  from fastapi import FastAPI
11
  from fastapi.middleware.cors import CORSMiddleware
12
 
@@ -25,13 +24,10 @@ print(f"Is CUDA available: {torch.cuda.is_available()}")
25
  if torch.cuda.is_available():
26
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
27
 
28
- model_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
29
- tokenizer_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
30
 
31
- model_cpu = ORTModelForSequenceClassification.from_pretrained(model_id=model_name, subfolder="onnx", file_name="model_uint8.onnx")
32
- tokenizer_cpu = AutoTokenizer.from_pretrained(tokenizer_name)
33
-
34
- classifier_cpu = pipeline(task="zero-shot-classification", model=model_cpu, tokenizer=tokenizer_cpu)
35
  classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0") if torch.cuda.is_available() else classifier_cpu
36
 
37
  def classify(data_string, request: gradio.Request):
 
5
  import onnxruntime
6
  import time
7
  from datetime import datetime
8
+ from transformers import pipeline
 
9
  from fastapi import FastAPI
10
  from fastapi.middleware.cors import CORSMiddleware
11
 
 
24
  if torch.cuda.is_available():
25
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
26
 
27
+ model_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
28
+ tokenizer_name = "MoritzLaurer/roberta-large-zeroshot-v2.0-c"
29
 
30
+ classifier_cpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name)
 
 
 
31
  classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0") if torch.cuda.is_available() else classifier_cpu
32
 
33
  def classify(data_string, request: gradio.Request):