Spaces:
Runtime error
Runtime error
File size: 1,270 Bytes
acf0ee9 9c7a582 6b02e3d 9c7a582 6b02e3d 4c9ba47 e922469 4c9ba47 e922469 9c7a582 4c9ba47 9c7a582 e922469 9c7a582 acf0ee9 9c7a582 e922469 9c7a582 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModel
# load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
# set the app title
st.title("Brazilian Portuguese Sentence Similarity Checker")
# get the input sentences from the user
sentence1 = st.text_input("Enter the first sentence:")
sentence2 = st.text_input("Enter the second sentence:")
# check if both sentences are not empty
if sentence1 and sentence2:
# tokenize the sentences and get their IDs
input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt')
# pass the IDs through the model to get the embeddings
with torch.no_grad():
embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
# calculate the cosine similarity between the embeddings
similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item()
# display the predicted similarity to the user
st.write("Similarity score between the sentences:", similarity)
|