Spaces:

juliaannjose
/

hupd_patent_classifier

Runtime error

App Files Files Community

juliaannjose commited on Apr 28, 2023

Commit

bbf7aff

1 Parent(s): fd97dee

inference code

Browse files

Files changed (1) hide show

app.py +29 -10

app.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import streamlit as st
-from transformers import pipeline
 from datasets import load_dataset
 # load the dataset and
 # use the patent number, abstract and claim columns for UI
 with st.spinner("Setting up the app..."):
@@ -16,11 +20,6 @@ with st.spinner("Setting up the app..."):
         val_filing_end_date="2016-01-31",
     )
-    # widget for selecting our finetuned langugae model
-    language_model_path = "juliaannjose/finetuned_model"
-# pass the model to transformers pipeline - model selection component.
-classifier_model = pipeline(model=language_model_path)
 # drop down menu with patent numbers
 _patent_id = st.selectbox(
@@ -28,19 +27,39 @@ _patent_id = st.selectbox(
     dataset_dict["train"]["patent_number"],
 )
 # display abstract and claim
 @st.cache(persist=True)
 def get_abs_claim(_patent_id):
     # get abstract and claim corresponding to this patent id
     _abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
     _claim = dataset_dict["train"][["patent_number"] == _patent_id]["claims"]
-    return _abstract,_claim
-_abstract,_claim = get_abs_claim(_patent_id)
 st.write(_abstract)
 st.write(_claim)
 # when submit button clicked, run the model and get result
 if st.button("Submit"):
-    results = classifier_model([_abstract + _claim])
-    st.write(results)

 import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from datasets import load_dataset
+# finetuned model
+language_model_path = "juliaannjose/finetuned_model"
 # load the dataset and
 # use the patent number, abstract and claim columns for UI
 with st.spinner("Setting up the app..."):
         val_filing_end_date="2016-01-31",
     )
 # drop down menu with patent numbers
 _patent_id = st.selectbox(
     dataset_dict["train"]["patent_number"],
 )
 # display abstract and claim
 @st.cache(persist=True)
 def get_abs_claim(_patent_id):
     # get abstract and claim corresponding to this patent id
     _abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
     _claim = dataset_dict["train"][["patent_number"] == _patent_id]["claims"]
+    return _abstract, _claim
+_abstract, _claim = get_abs_claim(_patent_id)
 st.write(_abstract)
 st.write(_claim)
+input_text = _abstract + _claim
+# model and tokenizer initialization
+tokenizer = AutoTokenizer.from_pretrained(language_model_path)
+inputs = tokenizer(
+    input_text,
+    truncation=True,
+    padding=True,
+    return_tensors="pt",
+)
+model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
+# get predictions
+id2label = {0: "REJECTED", 1: "ACCEPTED"}
 # when submit button clicked, run the model and get result
 if st.button("Submit"):
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    predicted_class_id = logits.argmax().item()
+    pred_label = id2label[predicted_class_id]
+    st.write(pred_label)