Spaces:
Runtime error
Runtime error
File size: 10,267 Bytes
efecb81 8b91e86 ef4dea0 7fbedaa 8b91e86 955e6e8 d67ec36 b5d5ceb ef4dea0 6608d38 e5946dc a9262b6 6608d38 4cc15a6 6608d38 c9fad14 6608d38 13fca63 4cc15a6 13fca63 6608d38 d9d263e 6608d38 c9fad14 6608d38 4cc15a6 6608d38 4cc15a6 6608d38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd
import requests
from datasets import load_dataset
import os
#Import corpus embeddings
corpus_ger = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_ger')['train'], token=str(os.environ['hf_token']))
corpus_eng = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_eng')['train']) #, token=str(os.environ['hf_token'])
corpus_fr = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_fr')['train']) #, token=str(os.environ['hf_token'])
#Import models
model_ger = SentenceTransformer('ECLASS-Standard/gbert-base-eclass', token=str(os.environ['user_token']))
model_eng = SentenceTransformer('ECLASS-Standard/mboth-distil-eng-quora-sentence') #, token=str(os.environ['hf_token'])
model_fr = SentenceTransformer('ECLASS-Standard/Sahajtomar-french_semantic') #, token=str(os.environ['hf_token'])
#Definition of search function
def predict(name, description, language, classCode='nofilter', top_k=10):
#language detection
if language == 'german':
model = model_ger
corpus = corpus_ger
if language == 'english':
model = model_eng
corpus = corpus_eng
if language == 'french':
model = model_fr
corpus = corpus_fr
text = name + '; ' + description #Verkettung name und description
query_embedding = model.encode(text, convert_to_tensor=True) #Erzeugung Query Embedding
#Filterung ECLASS Corpus
if classCode == 'nofilter':
corpus_filtered = corpus
else:
url = f'https://bcon2-api.azurewebsites.net/api/eclass?codedname={classCode}'
response = requests.get(url) #http request um auf Filter API zuzugreifen
lines = response.text.split('\n')
properties_filtered_list = [line[-21:-1] for line in lines]
corpus_filtered = corpus[corpus['irdi'].isin(properties_filtered_list)] #Der ECLASS Corpus wird mit Hilfe der Liste gefiltert
corpus_filtered.reset_index(drop=True,inplace=True) #Index wird zurückgesetzt
#Umwandlung corpus Embeddings in Tensor
corpus_embeddings = torch.Tensor(corpus_filtered["Embeddings"])
#Einspeisung Modell
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = top_k)
#Auslesen der Modellausgabe
preferedNames = []
definitions = []
irdis = []
scores = []
for i in range(0,top_k):
preferedNames.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),1])
definitions.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),2])
irdis.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),0])
scores.append(round(output[0][i].get('score'),2))
predictions = pd.DataFrame({'preferedName' : preferedNames, 'irdi' : irdis,'score' : scores,'definition' : definitions})
return predictions
#gradio user interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=15):
gr.Markdown(""" # ECLASS-Search-Demo
This is a semantic search application that maps unknown product properties to the ECLASS standard. It is created by ECLASS e.V. in collaboration with the GART-labortory of the cologne university of applied science.""")
with gr.Column(scale=1):
gr.Markdown("""<p align="right"><img src='' width=300p></p>""")
with gr.Row():
#inputs
name_tx = gr.Textbox(label="Name:",placeholder="Name of the Property", lines=1)
description_tx = gr.Textbox(label="Description:", placeholder="Description of the Property", lines=1)
with gr.Row():
#inputs
classcode_tx = gr.Textbox(value='nofilter', label="Filter with ECLASS ClassCode", placeholder="for no filter type: 'nofilter'", lines=1)
top_k_nu = gr.Number(value=10, label="Number of Matches")
language_drop = gr.Dropdown(["german", "english","french"], value='german', label="Select language")
#button
search = gr.Button("search")
#output
prediction_df = gr.Dataframe(headers = ['preferedName', 'irdi', 'score', 'definition'])
#defines search function for button “search”
search.click(fn=predict, inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu], outputs=prediction_df)
#examples
gr.Examples(examples=[["LED grün", "","german","nofilter",10],["Abmessungen", "","german","27272803",10],["Kabel", "","german","27272803",10],["Umgebungstemperatur", "","german","27272803",10],["Reproduzierbarkeit", "","german","27272803",10],["Repeat accuracy", "","english","27272803",10]],inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu])
demo.launch(debug=True) |