Spaces:
Running
Running
from transformers import AutoTokenizer | |
from transformers import AutoModelForSequenceClassification | |
from transformers import pipeline, Pipeline | |
from joblib import load | |
def load_model(path2chkpt: str, path2mapping: str): | |
model = AutoModelForSequenceClassification.from_pretrained(path2chkpt) | |
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased") | |
pipe = pipeline("text-classification", | |
model=model, | |
tokenizer=tokenizer) | |
class2name = load(path2mapping) | |
return pipe, class2name | |
def top_95_labels(pipe: Pipeline, class2name: dict[str, str], title: str, abstract: str): | |
inputs = ".".join([title, abstract]) | |
result = pipe(inputs, top_k=20) | |
proba = 0 | |
labels = [] | |
i = 0 | |
while proba < 0.95: | |
proba += result[i]["score"] | |
labels.append(result[i]["label"]) | |
i += 1 | |
return [class2name[label] for label in labels] | |