emiliosheinz's picture
Update app.py
e922469
raw
history blame
1.29 kB
import torch
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# load the pre-trained and fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
# set the app title
st.title("Brazilian Portuguese Sentence Similarity Checker")
# get the input sentences from the user
sentence1 = st.text_input("Enter the first sentence:")
sentence2 = st.text_input("Enter the second sentence:")
# check if both sentences are not empty
if sentence1 and sentence2:
# tokenize the sentences and get their IDs
input_ids = tokenizer.encode(sentence1, sentence2, truncation=True, padding=True, return_tensors='pt')
# pass the IDs through the model to get the logits
with torch.no_grad():
logits = model(input_ids)[0]
# apply softmax to the logits to get the predicted probabilities
probs = torch.softmax(logits, dim=1).squeeze().tolist()
# display the predicted probabilities to the user
st.write("Probability that the sentences are similar:", probs[1])
st.write("Probability that the sentences are dissimilar:", probs[0])