File size: 3,215 Bytes
48d28d5
 
 
6a00324
745b80e
9e0ab0f
 
a35fa20
48d28d5
98b274f
9e0ab0f
745b80e
 
 
96664a2
2ebfb5d
745b80e
 
 
 
 
 
 
562e111
 
 
 
 
745b80e
1e717cf
 
 
 
 
 
 
 
 
e0ddd95
1e717cf
 
 
 
 
 
96b8bca
bb29b0a
10520fd
c1157cb
24890d4
ff8b6b5
4070c67
b5bcb4b
f0dea8f
 
903cf88
41c8dfe
dc2b889
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd

model = SentenceTransformer('gart-labor/eng-distilBERT-se-eclass')

corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8')

def predict(name, description):
    text = 'Description: '+ description + '; Name: ' + name 
    query_embedding = model.encode(text, convert_to_tensor=True)

    corpus_embeddings = torch.Tensor(corpus["embeddings"])

    output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5)

    preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2]
    definition1 = corpus.iloc[output[0][0].get('corpus_id'),1]
    IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4]
    score1 = output[0][0].get('score')

    preferedName2 = corpus.iloc[output[0][1].get('corpus_id'),2]
    definition2 = corpus.iloc[output[0][1].get('corpus_id'),1]
    IRDI2 = corpus.iloc[output[0][1].get('corpus_id'),4]
    score2 = output[0][1].get('score')

    preferedName3 = corpus.iloc[output[0][2].get('corpus_id'),2]
    definition3 = corpus.iloc[output[0][2].get('corpus_id'),1]
    IRDI3 = corpus.iloc[output[0][2].get('corpus_id'),4]
    score3 = output[0][2].get('score')

    preferedName4 = corpus.iloc[output[0][3].get('corpus_id'),2]
    definition4 = corpus.iloc[output[0][3].get('corpus_id'),1]
    IRDI4 = corpus.iloc[output[0][3].get('corpus_id'),4]
    score4 = output[0][3].get('score')
        
    preferedName5 = corpus.iloc[output[0][4].get('corpus_id'),2]
    definition5 = corpus.iloc[output[0][4].get('corpus_id'),1]
    IRDI5 = corpus.iloc[output[0][4].get('corpus_id'),4]
    score5 = output[0][4].get('score')

    df = [[preferedName1, IRDI1, score1], [preferedName2, IRDI2, score2],[preferedName3, IRDI3, score3],[preferedName4, IRDI4, score4], [preferedName5, IRDI5, score5]]

    return pd.DataFrame(df)

interface = gr.Interface(fn = predict, 
            inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)], 
            #outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'score')],
            outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])],
            examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'], 
                        ['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']],
                         #theme = 'huggingface',
            title = 'ECLASS-Property-Search', description = "This is a semantic search algorithm that mapps unknown pump properties to the ECLASS standard.")
    
interface.launch()