File size: 1,435 Bytes
2359bda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
"""
This example computes the score between a query and all possible
sentences in a corpus using a Cross-Encoder for semantic textual similarity (STS).
It output then the most similar sentences for the given query.
"""
from sentence_transformers.cross_encoder import CrossEncoder
import numpy as np
# Pre-trained cross encoder
model = CrossEncoder('cross-encoder/stsb-distilroberta-base')
# We want to compute the similarity between the query sentence
query = 'A man is eating pasta.'
# With all sentences in the corpus
corpus = ['A man is eating food.',
'A man is eating a piece of bread.',
'The girl is carrying a baby.',
'A man is riding a horse.',
'A woman is playing violin.',
'Two men pushed carts through the woods.',
'A man is riding a white horse on an enclosed ground.',
'A monkey is playing drums.',
'A cheetah is running behind its prey.'
]
# So we create the respective sentence combinations
sentence_combinations = [[query, corpus_sentence] for corpus_sentence in corpus]
# Compute the similarity scores for these combinations
similarity_scores = model.predict(sentence_combinations)
# Sort the scores in decreasing order
sim_scores_argsort = reversed(np.argsort(similarity_scores))
# Print the scores
print("Query:", query)
for idx in sim_scores_argsort:
print("{:.2f}\t{}".format(similarity_scores[idx], corpus[idx]))
|