File size: 1,595 Bytes
875cfff
 
 
 
 
 
 
954b6e7
875cfff
 
 
 
 
 
 
 
 
 
 
 
 
 
954b6e7
 
875cfff
 
 
 
 
 
 
 
 
 
 
 
 
 
954b6e7
875cfff
 
 
 
 
 
 
 
 
 
 
 
 
 
954b6e7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import torch

import streamlit as st
import torch.nn as nn
from transformers import RobertaTokenizer, RobertaModel

@st.cache(suppress_st_warning=True)
def init():
    tokenizer = RobertaTokenizer.from_pretrained("roberta-large-mnli")

    model = RobertaModel.from_pretrained("roberta-large-mnli")

    model.pooler = nn.Sequential(
        nn.Linear(1024, 256),
        nn.LayerNorm(256),
        nn.ReLU(),
        nn.Linear(256, 8),
        nn.Sigmoid()
    )
    
    model_path = 'model.pt'
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()
    return tokenizer, model

cats = ['Computer Science', 'Economics', 'Electrical Engineering', 
        'Mathematics', 'Physics', 'Biology', 'Finance', 'Statistics']

def predict(outputs):
    top = 0
    probs = nn.functional.softmax(outputs, dim=1).tolist()[0]

    for prob, cat in sorted(zip(probs, cats), reverse=True):
        if top < 95:
            percent = prob * 100
            top += percent
            st.write(f'{cat}: {round(percent, 1)}')

tokenizer, model = init()
        
st.markdown("### Title")

title = st.text_area("Enter title", height=20)

st.markdown("### Abstract")

abstract = st.text_area("Enter abstract", height=200)

if not title:
    st.warning("Please fill out so required fields")
else:    
    encoded_input = tokenizer(title + '. ' + abstract, return_tensors='pt', padding=True, 
                          max_length = 512, truncation=True)
    with torch.no_grad():
        outputs = model(**encoded_input).pooler_output[:, 0, :]
        predict(outputs)