File size: 10,267 Bytes
efecb81
 
 
 
 
 
8b91e86
ef4dea0
7fbedaa
8b91e86
955e6e8
d67ec36
b5d5ceb
 
ef4dea0
6608d38
e5946dc
a9262b6
 
6608d38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cc15a6
6608d38
c9fad14
6608d38
 
 
 
 
13fca63
4cc15a6
13fca63
 
 
 
6608d38
 
 
 
 
 
 
 
d9d263e
6608d38
 
 
 
 
 
 
c9fad14
6608d38
4cc15a6
6608d38
 
4cc15a6
 
 
6608d38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd
import requests
from datasets import load_dataset
import os

#Import corpus embeddings
corpus_ger = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_ger')['train'], token=str(os.environ['hf_token']))
corpus_eng = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_eng')['train']) #, token=str(os.environ['hf_token'])
corpus_fr = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_fr')['train']) #, token=str(os.environ['hf_token'])

#Import models
model_ger = SentenceTransformer('ECLASS-Standard/gbert-base-eclass', token=str(os.environ['user_token']))
model_eng = SentenceTransformer('ECLASS-Standard/mboth-distil-eng-quora-sentence') #, token=str(os.environ['hf_token'])
model_fr = SentenceTransformer('ECLASS-Standard/Sahajtomar-french_semantic') #, token=str(os.environ['hf_token'])

#Definition of search function
def predict(name, description, language, classCode='nofilter', top_k=10):
  #language detection
  if language == 'german':
    model = model_ger
    corpus = corpus_ger
  if language == 'english':
    model = model_eng
    corpus = corpus_eng
  if language == 'french':
    model = model_fr
    corpus = corpus_fr

  text = name + '; ' + description #Verkettung name und description
  query_embedding = model.encode(text, convert_to_tensor=True) #Erzeugung Query Embedding

  #Filterung ECLASS Corpus
  if classCode == 'nofilter':
    corpus_filtered = corpus

  else:
    url = f'https://bcon2-api.azurewebsites.net/api/eclass?codedname={classCode}'
    response = requests.get(url) #http request um auf Filter API zuzugreifen
    lines = response.text.split('\n')
    properties_filtered_list = [line[-21:-1] for line in lines]
    corpus_filtered = corpus[corpus['irdi'].isin(properties_filtered_list)] #Der ECLASS Corpus wird mit Hilfe der Liste gefiltert
    corpus_filtered.reset_index(drop=True,inplace=True) #Index wird zurückgesetzt

  #Umwandlung corpus Embeddings in Tensor
  corpus_embeddings = torch.Tensor(corpus_filtered["Embeddings"])

  #Einspeisung Modell
  output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = top_k)

  #Auslesen der Modellausgabe
  preferedNames = []
  definitions = []
  irdis = []
  scores = []
  for i in range(0,top_k):
    preferedNames.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),1])
    definitions.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),2])
    irdis.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),0])
    scores.append(round(output[0][i].get('score'),2))

  predictions = pd.DataFrame({'preferedName' : preferedNames, 'irdi' : irdis,'score' : scores,'definition' : definitions})
  return predictions

#gradio user interface
with gr.Blocks() as demo:
  with gr.Row():
    with gr.Column(scale=15):
      gr.Markdown(""" # ECLASS-Search-Demo
        This is a semantic search application that maps unknown product properties to the ECLASS standard. It is created by ECLASS e.V. in collaboration with the GART-labortory of the cologne university of applied science.""")
    with gr.Column(scale=1):
      gr.Markdown("""<p align="right"><img src='' width=300p></p>""")


  with gr.Row():
    #inputs
    name_tx = gr.Textbox(label="Name:",placeholder="Name of the Property", lines=1)
    description_tx = gr.Textbox(label="Description:", placeholder="Description of the Property", lines=1)

  with gr.Row():
    #inputs
    classcode_tx = gr.Textbox(value='nofilter', label="Filter with ECLASS ClassCode", placeholder="for no filter type: 'nofilter'", lines=1)
    top_k_nu = gr.Number(value=10, label="Number of Matches")
    language_drop = gr.Dropdown(["german", "english","french"], value='german', label="Select language")

  #button
  search = gr.Button("search")

  #output
  prediction_df = gr.Dataframe(headers = ['preferedName', 'irdi', 'score', 'definition'])

  #defines search function for button “search”
  search.click(fn=predict, inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu], outputs=prediction_df)

  #examples
  gr.Examples(examples=[["LED grün", "","german","nofilter",10],["Abmessungen", "","german","27272803",10],["Kabel", "","german","27272803",10],["Umgebungstemperatur", "","german","27272803",10],["Reproduzierbarkeit", "","german","27272803",10],["Repeat accuracy", "","english","27272803",10]],inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu])

demo.launch(debug=True)