Spaces:

brisklyapp
/

strings-similarity

Runtime error

File size: 1,270 Bytes

acf0ee9
9c7a582
 
6b02e3d
9c7a582
 
 
6b02e3d
4c9ba47
e922469
4c9ba47
 
 
 
 
 
 
e922469
9c7a582
4c9ba47
9c7a582
e922469
9c7a582
acf0ee9
9c7a582
 
e922469
9c7a582

import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModel

# load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')

# set the app title
st.title("Brazilian Portuguese Sentence Similarity Checker")

# get the input sentences from the user
sentence1 = st.text_input("Enter the first sentence:")
sentence2 = st.text_input("Enter the second sentence:")

# check if both sentences are not empty
if sentence1 and sentence2:
    # tokenize the sentences and get their IDs
    input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt')
    
    # pass the IDs through the model to get the embeddings
    with torch.no_grad():
        embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
    
    # calculate the cosine similarity between the embeddings
    similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item()
    
    # display the predicted similarity to the user
    st.write("Similarity score between the sentences:", similarity)