Spaces:
Sleeping
Sleeping
import gradio as gr | |
import sentence_transformers | |
from sentence_transformers import SentenceTransformer | |
import torch | |
from sentence_transformers.util import semantic_search | |
import pandas as pd | |
import requests | |
from datasets import load_dataset | |
import os | |
#Import corpus embeddings | |
corpus_ger = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_ger', token=str(os.environ['private_token']))['train']) | |
corpus_eng = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_eng', token=str(os.environ['private_token']))['train']) | |
corpus_fr = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_fr', token=str(os.environ['private_token']))['train']) | |
#Import models | |
model_ger = SentenceTransformer('ECLASS-Standard/gbert-base-eclass', token=str(os.environ['private_token'])) | |
model_eng = SentenceTransformer('ECLASS-Standard/mboth-distil-eng-quora-sentence', token=str(os.environ['private_token'])) | |
model_fr = SentenceTransformer('ECLASS-Standard/Sahajtomar-french_semantic', token=str(os.environ['private_token'])) | |
#Definition of search function | |
def predict(name, description, language, classCode='nofilter', top_k=10): | |
#language detection | |
if language == 'german': | |
model = model_ger | |
corpus = corpus_ger | |
if language == 'english': | |
model = model_eng | |
corpus = corpus_eng | |
if language == 'french': | |
model = model_fr | |
corpus = corpus_fr | |
text = name + '; ' + description #Verkettung name und description | |
query_embedding = model.encode(text, convert_to_tensor=True) #Erzeugung Query Embedding | |
#Filterung ECLASS Corpus | |
if classCode == 'nofilter': | |
corpus_filtered = corpus | |
else: | |
url = f'https://bcon2-api.azurewebsites.net/api/eclass?codedname={classCode}' | |
response = requests.get(url) #http request um auf Filter API zuzugreifen | |
lines = response.text.split('\n') | |
properties_filtered_list = [line[-21:-1] for line in lines] | |
corpus_filtered = corpus[corpus['irdi'].isin(properties_filtered_list)] #Der ECLASS Corpus wird mit Hilfe der Liste gefiltert | |
corpus_filtered.reset_index(drop=True,inplace=True) #Index wird zurückgesetzt | |
#Umwandlung corpus Embeddings in Tensor | |
corpus_embeddings = torch.Tensor(corpus_filtered["Embeddings"]) | |
#Einspeisung Modell | |
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = top_k) | |
#Auslesen der Modellausgabe | |
preferedNames = [] | |
definitions = [] | |
irdis = [] | |
scores = [] | |
for i in range(0,top_k): | |
preferedNames.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),1]) | |
definitions.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),2]) | |
irdis.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),0]) | |
scores.append(round(output[0][i].get('score'),2)) | |
predictions = pd.DataFrame({'preferedName' : preferedNames, 'irdi' : irdis,'score' : scores,'definition' : definitions}) | |
return predictions | |
#gradio user interface | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(scale=15): | |
gr.Markdown(""" # ECLASS-Search-Demo | |
This is a semantic search application that maps unknown product properties to the ECLASS standard. It is created by ECLASS e.V. in collaboration with the GART-labortory of the cologne university of applied science. You can find further information in our [paper](https://www.researchgate.net/publication/382002596_Erstellung_eines_semantischen_Suchalgorithmus_zur_Abbildung_proprietarer_Merkmale_auf_den_ECLASS-Standard)""") | |
with gr.Column(scale=1): | |
gr.Markdown("""<p align="right"><img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcAAAABwCAMAAAC+RlCAAAAA3lBMVEX///8AAFUAudcAAFMAAEwAAEkAAFAAAE+vr8YAt9YAAEr8/P0AAF2yssr6+v0QEFhZWYPCws9x0eWD0+UGBlja2ucAAFgAAEYAAEMAAEG9vcnPz9x2dpNOTnfp6fDy8vSBgaC66PKOjqhmZpELC1hHR34eHmVOxd3g4OvKytvBwdKAgKG6us9FRXc0NG2dnbd1dZo/P3okJGxTU4MaGmV3d5xra5QlJWSZmbUyMmksLG8+PnE9PXlaWosnJ2NNTYFxcZ5FRXEmJm3m+fvL7/ampsAzM3JhYYQcHGmZma/PpHebAAAPWUlEQVR4nO1d7V/iuhKGTZp2iyKeY4Hdnm25gBzA4stF0RVX73oWz/r//0MXRGkyybRJrVv2Z58PfsA2L/M0yWRmMqlUSpQoUaJEiRIlSpQoUaJEiRIlSpQoUaJEiRIlfh0iJyf4r2pGPfBG42jXiaLxaBjk1LdfgvrQH0eRs7tsuBfUMhXB9z00fPXGojnBbWRrfd3/fHTWtO24JNu2z3sn+0P0Ff8zh3E9U7VrBHxJnz2zl2vLlt9UxZY3D7smxQz3T76diyWw65kz0qbxwq3mBisy6/8Kwbj3tUkpI6AsYlN7MOl6yo6cMItHIzuD0bnLl0SODFq+v245bDijtD+f+RqTSM3rTgbLfir6Tm6nDv798vgCG/AK0K5+99c98E8HLan9vCzY5EpB4UBstJ158q4TsSRCx5pvDo9urYSW2/S4k0Jh6EyZ/OHGfbeqvbHGlJYrgR3N3j/DmybI4BnMap5I3dgTn2ntm9UbY7cFqrOnWu/VTylNaTmh9lHSzBBdtlJET+z2JP3TLJDAblurbmLdw24AAq3MBHZsqTqdxWdcpVry6KPjOZy20j7dFZh1kbY8FEbg8ExLCE/dcMHcnBeBtb4kRTt9Ha8dWTrCr65GIbKmjg7kL0cNep6yFBZFoH+r24UVLFEQeRHowxl0SWCqGlPraX95S5H0VMtY1NSXOmsmT6MFERj0zeq1hKLzIrArU0F20l66MOBv2bYLuQS/qjmCn8D6iWOwIAKPTcbfU9kO93ZOBNYbis63UzZxUdus4a2FVK3hx2vfJK3LxRDYtUzLJk1OK8+JwEA1EOhV8jtNk9HzBDiAzIbwEm7StF4IgUNjISwL5+ainAhcqIwY7C5R7zOWftVugM7LC28aSMKsUAiBR8ZSWMKKh2BOBKpm0KXAkzbgQ1lvXYEQwhgjyHcpaiHIJ5BUgj3DW1QEgcGlsp1PRqXmkxFN9X/3ZFNAPgQizbCkRYvDiWLuZ7R6+XV+eDef3zYly1pVnDuWtR4oaqWsvyrhcP51j6ksBC18VpiaahMJcB/1JDdWTF0ry9FiPBr5/mi86O1YcrPYZKOR50PgvnooJH3ulYlEkG09LF4Mn6Ef3VGpVNLnS4ikGZTQ6tH4ZaEcjjpNV6LQcioYxi2b5QRa1fQCzSR2iNUYCY+MerKlgm6UsXwIlJuxbss93o3hALaq1QPrU3AkqamM79sprNUewCHvnEvPdPBu+LNGTrjQ9eIdS+b3A9nmNDqHH3tr81A+BO4hS5aLFzeGbYI2ohUc+JCwBYJrFnuQxRacgYfIQ5JmVc8JuoILpbVH6U7w4NdON4tgLgSOOCGRA76eU/SdCEz+9jfVU1BH4700Afx6qWqb7kmbldf4PPPGsA/5U2+9HCCt2LCYC4GPnJgHN5zAyFfUi/MIqOkr9fvgVhQ/v5EYnoP/dZQVdUBFbUM//ZvCOxAbV22qBVZ7AH396+U/uRD4EI9A8vV/wqqDWh+B7Y0hziewzrGz+F9wH8LUdQVA1WkZxgq8KSCBdg95EIgrXwKDT3EB5PsuP9q5DUtyiyjy4FjcbJD/cp0XCSQDdQn1a5Hn1kj9XCGABFJs4xWJcsiXQN4Mw6Ye/8WzB+wlQCAWQwKMLUkEXqtLqE1FNWarCUTlD7w9+RLIbyLs07qg+1NMnwYEtpC5NhCnUHLPdV4kkJ9dBYBZeKsJRNX2obijypXAcIcT5HIqFBQmiu2bdQkEkTb4CGSHSE0lgSkY8aN7SYQgVxvbSMApFKnYgMAbpKaSwBRcCALyKneizJG3NJUY4KfiFzpNJQa0Dx3rhUCbQF5RzJnAcM5v/C4DsPHCPnjNbUR4yYufuNyABgSinqJHwZTI6DbvA3HTFeHdEqS92e+/nkCPlzD7Uq84AjVYhCvcyB8g4ncuVx6mZ+xNOJ1oCAx4WAxOOO/HJTRvkzwkvxz6BI5v9vob7PVyNGZf8UyslrxImLLYRG26gsYhNkHKD/wYgq0MWmmqFFvdPK6I7Torok/g6sjLBtyvrydQcAutxpsvyhXZSIyhr4hODE0kwty9ArG3anTpwIRANV5NoCeYY1f78VBslKs2z/pNQGCVtRpm+oUcBuAeLLZrhKVhCwgUV7zWSn73onrfUL5X/yq7oGxyfGUwDK8UoYx00Nv/jTjcAgIFQxU5COFPS/VeHYw5U3nxidv8cqVLwFBZgk2PT/1t0jSTsAUECmSx6UpjATsEpFE+EkLEXNJztAioqUOplhxaZ90sPod//vwjC/6ToapnFE9gJO4ZOqvf9kUFE3OR4DFEtmX3Io3zYHhkMKHt6xO9U4Exfnz4mA1/G1YUo3gCxV27+6QGeqJ+Qi7VGwkvKQjMpmyWfl44KY6MUPdhYcThHx8/ZMPHHybVCDIonEDRK752lgZAPyHI/uwqOS7Xtm8vUhazIRY/ugaje1NHX6X5OyN/Hz5mnkQLJxC46+j6V+CBw4wxtbuUQExC7UlnmDQOpagniUN3MBtrqjTvkUBRk39xyQEthvxE3g6loD+ZAGpNTxIISD8aQuzW+ZGWSvMOCQR64MtQGwJePmHTWHCtcTKAWdUuTuFVO/10yFKlmWrYCN4hgcDZYz8vdnUwh7qoiStURB0rQMkVOpFGRCcinlnpFL5DAh1hAiN7L7+DUD401mqJ6NbVOVNC56gfNuix1AQPq0ZU0zJ/vEMCe8LHHx+FgAeumwmrWOjMic5EWsUPNfizgeooDITVS96XvD8CQWx0nBmmvgNC+ZJzxvhHS30zTfykjTNYCZzj1GwlS/EcJjL4/gj0hZGz8sa/4C+Rj/R8B1HPSp1K24neIq+7k5DpaA16nLSjeH8EigcsebfDLojHTRTcGuHJXdogoimJS8YXO1byV0DVrpE13h+BokucP3lXB4sg1dmI1YdHczdpELHbNLNK4DSqiV8BFjy1wrsj0AfC4aX7oGeMgQj3G3sJ+ghVZBqB8LrHLGFBTYhp+jurLfTDb2oLPQEzaA39Hxp0pkDgXLvoIGI6ls3aqMfQrwAJEFjhR0ZnxMc/tDsHUSyB10ozzBpgdBL16TEEXrePpK9LmgF51J1pW/0VkHNcE/3nz0z4Xf2BoeiNa4muG3D60jVLgFqLesoUBfhhGQlB51Kp0fyegb0oXkEgMMNcix5YcLZWfQA3CUH3UrG/bxucrw3HU8VHYJyK9S1RKIHimQO7Ewr4DDwSB+Ynm4MjWSVNMQlARJeSPoOe1igCRRIIrS12WwDMf2KbSX6NSHLY4nZxNcIpHMar4PGtgT6BQeeQQyyF7ASOzPJcgdRF3s4nDjeYcunDFAVx/rX6rM2VYKN2NpjSkhxvUdihPoHXLp+HJjYrZicQJg9IATkQ3v4pTG1iCkweC+CwjVewhfABkR2MlhCegUlIXvPLoU1gCE4nbTbEmQkM5UxLyWjzWmpN1GDxtHjhPQjU3xAIkgu10PglGB8w+B0JzP984NA0M5ywhYMEnqHrEjCLxwT2dI/9gbm+JHCNK9NbMhifMlmfQHgQ1JzAQKyrJHCNqekIrDLOCvkrCQxLAlUFSrnqUmHtxq+XBK5RHIGfzXTQp0q5HXRJ4BpSnpjPyIO554np6YSCieA3YIBAUhL43D3MWJ97piZ1vuRkcF5dSOAA9dJ1NAnEE7nCZE/bTCC7Qx7MO1faGAxAWwloRYkjYwCBVYISeKq5jcBt1DDd2jYRKKWbRO4rggnfXk0g+CD2/lLiO8jSM9+8XwMGFvS6vaCPbeRhHkPUZQw38tdbRGAAU+Ui4V8nMLvqa/OFijoolht7H4zT5mYOrYF2Myz014GmtI0tFJryDpBFMARpRZMCrGpvCHWNMNNG1VIZ/X2Y3j3+jrMRCPbGaJolsNfgEqdJOYTVXgaYzIfbi0h5SubKEiqngGiG344aDXbeDM0zZWj5jbSdVoTejaWsw7EYshEIIl7a2KQEsm5xCQUbUjJy1UfgSVdbxdf5SEm3bWVGmgt4fClePiAWbfKGYG3VFKG498O6E0621kdT+Yq3Fpbohy48FHGOHHAoCcvUqfAlbFq2kBpu9aA5OrxSXE63+a+0fFTtc+njHT/I9aDqqrlxyQhUdZWE6sIGZt9+6/q+7/n+fvfbrSLGjh1j90Ys1ykcey8TbwhahiqAHpwoNxL2ZFOq3Z9e+cNgxXEtHHrON0VIBb/aHkryJvbx6b4X1Fe9qweefzRnMid4XGGeN7cooLzMRUo39dwRarl2dflXfXMLdyuVRGACyMs5P3DjBn5bLvQ5cTrWvSpkidKD+ffGv40v84GtDPHlq4KL4LrnVvX2+5d///3y/bhpqWILkbQ1RRGY7e4kbs0yIXCj7AMN3sYTShyBhLuxV9dRN/zp3iP05iMhpaR07cDLQ2unNWJpSAiqKYRA5b1vaSVx85AZgbsqyaHWg4ocdxHrILWHDOJyhfVrYX55WZWh63VBBGboBTnnVoEsBPrAupVw7CvcwfbhFT85w4QKVNxv1qSApVQQkpB4pBgCK98Me0EIP4lkIRBM21imzyfMoL0k1pBV15clwoZX6wQDU5HjsU/FERjem9Ur7rqzECimeSTIBnoNeDMk4TjomDFIdqTR4xldoZsW1FsQgZXg3sS10xL7kIFAkIgJux/8uXHAai3swmYmDDLVEbWRwSXWy74nH24qisBKcKO9DrIWMHhkIBDojzT5rMFPYEYXjJ5drQQH67qvlauXf68dm8NayXf6FkdgpXLFtBZCZklZcTMQKF7oRhR3vgkcAV/ApfC4P9FKM1KlpIu4fOud9OP1KzDrLu2AVIEEVoJZM/WMP7PmjiQF831gHWwCU44ajIB4wYCtR/PUBBOEVk8TvpLhLPlQ7rrvk/T0h0USuFyZTi/VZpfnHtDqXJWF0yg26dNqFgNGsNTTWg8g9AHqgaEzUd6X+wxi08vTlLGT2vfmJNI4EHFqfKO7EaSeQwRRb4dSCk0QhNnLXx9O1IK+alFtuOsjYgcu95uVsq4sZ8lzoRCmIMNbPDBKbanlq4Y3ZzoplINxr6ns+/JH+2yhd7o0PPxkvR1aSRcKbzB0Hr/9ZJ/ardbTO61W22redBYJJ4NGji6iZy2iNuZ+0/Fu81VgL9RGi87Zjrts+HPL221y3njcNUj4Odx9bPxkba7v7s5hZ2Fy085o983gmBxRrgXeaBztRiN/+Lskj35GfeiPlg0fj7yscQ9c37foGFmJEiVKlChRokSJEiVKlChRokSJEiVKlCiRHf8HRF6XA7CpR0kAAAAASUVORK5CYII=' width=300p></p>""") | |
with gr.Row(): | |
#inputs | |
name_tx = gr.Textbox(label="Name:",placeholder="Name of the Property", lines=1) | |
description_tx = gr.Textbox(label="Description:", placeholder="Description of the Property", lines=1) | |
with gr.Row(): | |
#inputs | |
classcode_tx = gr.Textbox(value='nofilter', label="Filter with ECLASS ClassCode", placeholder="for no filter type: 'nofilter'", lines=1) | |
top_k_nu = gr.Number(value=10, label="Number of Matches") | |
language_drop = gr.Dropdown(["german", "english","french"], value='german', label="Select language") | |
#button | |
search = gr.Button("search") | |
#output | |
prediction_df = gr.Dataframe(headers = ['preferedName', 'irdi', 'score', 'definition']) | |
#defines search function for button “search” | |
search.click(fn=predict, inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu], outputs=prediction_df) | |
#examples | |
gr.Examples(examples=[["LED grün", "","german","nofilter",10],["Abmessungen", "","german","27272803",10],["Kabel", "","german","27272803",10],["Umgebungstemperatur", "","german","27272803",10],["Reproduzierbarkeit", "","german","27272803",10],["Repeat accuracy", "","english","27272803",10]],inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu]) | |
demo.launch(debug=True) |