Spaces:
Runtime error
Runtime error
File size: 3,215 Bytes
48d28d5 6a00324 745b80e 9e0ab0f a35fa20 48d28d5 98b274f 9e0ab0f 745b80e 96664a2 2ebfb5d 745b80e 562e111 745b80e 1e717cf e0ddd95 1e717cf 96b8bca bb29b0a 10520fd c1157cb 24890d4 ff8b6b5 4070c67 b5bcb4b f0dea8f 903cf88 41c8dfe dc2b889 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd
model = SentenceTransformer('gart-labor/eng-distilBERT-se-eclass')
corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8')
def predict(name, description):
text = 'Description: '+ description + '; Name: ' + name
query_embedding = model.encode(text, convert_to_tensor=True)
corpus_embeddings = torch.Tensor(corpus["embeddings"])
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5)
preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2]
definition1 = corpus.iloc[output[0][0].get('corpus_id'),1]
IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4]
score1 = output[0][0].get('score')
preferedName2 = corpus.iloc[output[0][1].get('corpus_id'),2]
definition2 = corpus.iloc[output[0][1].get('corpus_id'),1]
IRDI2 = corpus.iloc[output[0][1].get('corpus_id'),4]
score2 = output[0][1].get('score')
preferedName3 = corpus.iloc[output[0][2].get('corpus_id'),2]
definition3 = corpus.iloc[output[0][2].get('corpus_id'),1]
IRDI3 = corpus.iloc[output[0][2].get('corpus_id'),4]
score3 = output[0][2].get('score')
preferedName4 = corpus.iloc[output[0][3].get('corpus_id'),2]
definition4 = corpus.iloc[output[0][3].get('corpus_id'),1]
IRDI4 = corpus.iloc[output[0][3].get('corpus_id'),4]
score4 = output[0][3].get('score')
preferedName5 = corpus.iloc[output[0][4].get('corpus_id'),2]
definition5 = corpus.iloc[output[0][4].get('corpus_id'),1]
IRDI5 = corpus.iloc[output[0][4].get('corpus_id'),4]
score5 = output[0][4].get('score')
df = [[preferedName1, IRDI1, score1], [preferedName2, IRDI2, score2],[preferedName3, IRDI3, score3],[preferedName4, IRDI4, score4], [preferedName5, IRDI5, score5]]
return pd.DataFrame(df)
interface = gr.Interface(fn = predict,
inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)],
#outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'score')],
outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])],
examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'],
['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']],
#theme = 'huggingface',
title = 'ECLASS-Property-Search', description = "This is a semantic search algorithm that mapps unknown pump properties to the ECLASS standard.")
interface.launch() |