import streamlit as st import torch from transformers import AutoTokenizer, AutoModel # load the pre-trained model and tokenizer tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1') model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1') # set the app title st.title("Brazilian Portuguese Sentence Similarity Checker") # get the input sentences from the user sentence1 = st.text_input("Enter the first sentence:") sentence2 = st.text_input("Enter the second sentence:") # check if both sentences are not empty if sentence1 and sentence2: # tokenize the sentences and get their IDs input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt') # pass the IDs through the model to get the embeddings with torch.no_grad(): embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0] # calculate the cosine similarity between the embeddings similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item() # display the predicted similarity to the user st.write("Similarity score between the sentences:", similarity)