File size: 2,030 Bytes
875cfff
89d4e60
875cfff
ad3d6a3
875cfff
 
 
 
6c23168
aabbe07
9baaef5
875cfff
 
 
 
 
 
 
 
 
 
8d3f7b8
875cfff
954b6e7
9baaef5
875cfff
8d3f7b8
 
875cfff
 
 
 
ad3d6a3
 
 
875cfff
8d3f7b8
875cfff
8d3f7b8
 
 
 
875cfff
 
 
ad3d6a3
5c1deee
ad3d6a3
8d3f7b8
df1dde0
875cfff
9baaef5
 
875cfff
 
 
5c1780d
875cfff
 
 
 
 
 
5c1780d
875cfff
ad3d6a3
8d3f7b8
875cfff
954b6e7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
import tokenizers

import pandas as pd
import streamlit as st
import torch.nn as nn
from transformers import RobertaTokenizer, RobertaModel


@st.cache(suppress_st_warning=True)
def init_model():
    model = RobertaModel.from_pretrained("roberta-large-mnli")

    model.pooler = nn.Sequential(
        nn.Linear(1024, 256),
        nn.LayerNorm(256),
        nn.ReLU(),
        nn.Linear(256, 8),
        nn.Sigmoid()
    )
    
    model_path = "model.pt"
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()
    return model

cats = ["Computer Science", "Economics", "Electrical Engineering", 
        "Mathematics", "Physics", "Biology", "Finance", "Statistics"]

def predict(outputs):
    top = 0
    probs = nn.functional.softmax(outputs, dim=1).tolist()[0]
    
    top_cats = []
    top_probs = []

    first = True
    for prob, cat in sorted(zip(probs, cats), reverse=True):
        if first:
            if cat == "Computer Science":
                st.write("Today everything is connected with Computer Science"
            first = False
        if top < 95:
            percent = prob * 100
            top += percent
            top_cats.append(cat)
            top_probs.append(round(percent, 1))
            
    chart_data = pd.DataFrame(top_probs, index=top_cats, columns=["percent"])
    st.bar_chart(chart_data)

tokenizer = RobertaTokenizer.from_pretrained("roberta-large-mnli")
model = init_model()
        
st.markdown("### Title")

title = st.text_area("*Enter title (required)", height=20)

st.markdown("### Abstract")

abstract = st.text_area("Enter abstract", height=200)

if not title:
    st.warning("Please fill in required fields")
else:    
    st.markdown("### Result")
    encoded_input = tokenizer(title + ". " + abstract, return_tensors="pt", padding=True, 
                          max_length = 512, truncation=True)
    with torch.no_grad():
        outputs = model(**encoded_input).pooler_output[:, 0, :]
        predict(outputs)