Spaces:

ItsNikolor
/

ShadML2-article-classification

Sleeping

App Files Files Community

ItsNikolor commited on Apr 8

Commit

0efe602

verified ·

1 Parent(s): 45b90e5

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -24

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
@@ -8,10 +9,10 @@ def combine_title_summary(title, summary):
 tag2ind = {
-    "bio": 0,
-    "physics": 1,
-    "math": 2,
-    "cs": 3,
 }
@@ -19,12 +20,10 @@ tag2ind = {
 def load_model():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # assert torch.cuda.is_available()
-    save_dir = "./distilbert/distilbert-base-cased/checkpoint-738"
-    tokenizer = AutoTokenizer.from_pretrained(save_dir)
-    model = AutoModelForSequenceClassification.from_pretrained(
-        save_dir
-    ).to(device)
     return tokenizer, model
@@ -48,20 +47,36 @@ def run_model(model, tokenizer, title, summary):
         out = model(**tokens_info)
         probs = torch.nn.functional.softmax(out.logits, dim=-1)[0]
-        result = f"Text: `{text}`\nPrediction (prob): \n" + "\n".join(
-            [f"{tag}={tag_prob}" for tag, tag_prob in zip(tag2ind, probs)]
-        )
-        return result
-title = st.text_input(label="Title", value="")
-abstract = st.text_input(label="Abstract", value="")
-if st.button("Submit"):
-    if title == "" and abstract == "":
-        st.error("At least one of title or abstract must be provided")
-    else:
-        result = combine_title_summary(title, abstract)
-        st.success(result)
-        result = run_model(model, tokenizer, title, abstract)
-        st.success(result)

+import pandas as pd
 import streamlit as st
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 tag2ind = {
+    "Biology": 0,
+    "Physics": 1,
+    "Math": 2,
+    "Computer Science": 3,
 }
 def load_model():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # dir_name = "./distilbert/distilbert-base-cased/checkpoint-738"
+    dir_name = "./microsoft/deberta-v3-small/checkpoint-4915"
+    tokenizer = AutoTokenizer.from_pretrained(dir_name, use_fast=False)
+    model = AutoModelForSequenceClassification.from_pretrained(dir_name).to(device)
     return tokenizer, model
         out = model(**tokens_info)
         probs = torch.nn.functional.softmax(out.logits, dim=-1)[0]
+    ids = torch.argsort(probs, descending=True)
+    p = 0
+    best_tags, best_probs = [], []
+    for ind in ids:
+        p += probs[ind]
+        best_tags.append(list(tag2ind.keys())[ind])
+        best_probs.append(probs[ind])
+        if p >= 0.95:
+            break
+    return best_tags, best_probs
+def main():
+    title = st.text_input(label="Title", value="")
+    abstract = st.text_area(label="Abstract", value="", height=200)
+    if st.button("Classify"):
+        if title == "" and abstract == "":
+            st.error("At least one of title or abstract must be provided")
+        else:
+            best_tags, best_probs = run_model(model, tokenizer, title, abstract)
+            df = pd.DataFrame(
+                dict(zip(best_tags, best_probs)).items(),
+                columns=["Theme", "Probability"],
+            )
+            st.table(df)
+if __name__ == "__main__":
+    main()