File size: 1,291 Bytes
e922469
acf0ee9
6b02e3d
 
e922469
 
 
6b02e3d
4c9ba47
e922469
4c9ba47
 
 
 
 
 
 
e922469
 
4c9ba47
e922469
 
 
acf0ee9
e922469
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import torch
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# load the pre-trained and fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

# set the app title
st.title("Brazilian Portuguese Sentence Similarity Checker")

# get the input sentences from the user
sentence1 = st.text_input("Enter the first sentence:")
sentence2 = st.text_input("Enter the second sentence:")

# check if both sentences are not empty
if sentence1 and sentence2:
    # tokenize the sentences and get their IDs
    input_ids = tokenizer.encode(sentence1, sentence2, truncation=True, padding=True, return_tensors='pt')
    
    # pass the IDs through the model to get the logits
    with torch.no_grad():
        logits = model(input_ids)[0]
    
    # apply softmax to the logits to get the predicted probabilities
    probs = torch.softmax(logits, dim=1).squeeze().tolist()
    
    # display the predicted probabilities to the user
    st.write("Probability that the sentences are similar:", probs[1])
    st.write("Probability that the sentences are dissimilar:", probs[0])