Spaces:
Sleeping
Sleeping
import gradio as gr | |
import sentence_transformers | |
from sentence_transformers import SentenceTransformer | |
import torch | |
from sentence_transformers.util import semantic_search | |
import pandas as pd | |
import requests | |
from datasets import load_dataset | |
import os | |
#Import corpus embeddings | |
corpus_ger = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_ger', token=str(os.environ['private_token']))['train']) | |
corpus_eng = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_eng', token=str(os.environ['private_token']))['train']) | |
corpus_fr = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_fr', token=str(os.environ['private_token']))['train']) | |
#Import models | |
model_ger = SentenceTransformer('ECLASS-Standard/gbert-base-eclass', token=str(os.environ['private_token'])) | |
model_eng = SentenceTransformer('ECLASS-Standard/mboth-distil-eng-quora-sentence', token=str(os.environ['private_token'])) | |
model_fr = SentenceTransformer('ECLASS-Standard/Sahajtomar-french_semantic', token=str(os.environ['private_token'])) | |
#Definition of search function | |
def predict(name, description, language, classCode='nofilter', top_k=10): | |
#language detection | |
if language == 'german': | |
model = model_ger | |
corpus = corpus_ger | |
if language == 'english': | |
model = model_eng | |
corpus = corpus_eng | |
if language == 'french': | |
model = model_fr | |
corpus = corpus_fr | |
text = name + '; ' + description #Verkettung name und description | |
query_embedding = model.encode(text, convert_to_tensor=True) #Erzeugung Query Embedding | |
#Filterung ECLASS Corpus | |
if classCode == 'nofilter': | |
corpus_filtered = corpus | |
else: | |
url = f'https://bcon2-api.azurewebsites.net/api/eclass?codedname={classCode}' | |
response = requests.get(url) #http request um auf Filter API zuzugreifen | |
lines = response.text.split('\n') | |
properties_filtered_list = [line[-21:-1] for line in lines] | |
corpus_filtered = corpus[corpus['irdi'].isin(properties_filtered_list)] #Der ECLASS Corpus wird mit Hilfe der Liste gefiltert | |
corpus_filtered.reset_index(drop=True,inplace=True) #Index wird zurückgesetzt | |
#Umwandlung corpus Embeddings in Tensor | |
corpus_embeddings = torch.Tensor(corpus_filtered["Embeddings"]) | |
#Einspeisung Modell | |
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = top_k) | |
#Auslesen der Modellausgabe | |
preferedNames = [] | |
definitions = [] | |
irdis = [] | |
scores = [] | |
for i in range(0,top_k): | |
preferedNames.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),1]) | |
definitions.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),2]) | |
irdis.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),0]) | |
scores.append(round(output[0][i].get('score'),2)) | |
predictions = pd.DataFrame({'preferedName' : preferedNames, 'irdi' : irdis,'score' : scores,'definition' : definitions}) | |
return predictions | |
#gradio user interface | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(scale=15): | |
gr.Markdown(""" # ECLASS-Search-Demo | |
This is a semantic search application that maps unknown product properties to the ECLASS standard. It is created by ECLASS e.V. in collaboration with the GART-labortory of the cologne university of applied science. You can find further information in our [paper](https://www.researchgate.net/publication/382002596_Erstellung_eines_semantischen_Suchalgorithmus_zur_Abbildung_proprietarer_Merkmale_auf_den_ECLASS-Standard)""") | |
with gr.Column(scale=1): | |
gr.Markdown("""<p align="right"><img src='' width=300p></p>""") | |
with gr.Row(): | |
#inputs | |
name_tx = gr.Textbox(label="Name:",placeholder="Name of the Property", lines=1) | |
description_tx = gr.Textbox(label="Description:", placeholder="Description of the Property", lines=1) | |
with gr.Row(): | |
#inputs | |
classcode_tx = gr.Textbox(value='nofilter', label="Filter with ECLASS ClassCode", placeholder="for no filter type: 'nofilter'", lines=1) | |
top_k_nu = gr.Number(value=10, label="Number of Matches") | |
language_drop = gr.Dropdown(["german", "english","french"], value='german', label="Select language") | |
#button | |
search = gr.Button("search") | |
#output | |
prediction_df = gr.Dataframe(headers = ['preferedName', 'irdi', 'score', 'definition']) | |
#defines search function for button “search” | |
search.click(fn=predict, inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu], outputs=prediction_df) | |
#examples | |
gr.Examples(examples=[["LED grün", "","german","nofilter",10],["Abmessungen", "","german","27272803",10],["Kabel", "","german","27272803",10],["Umgebungstemperatur", "","german","27272803",10],["Reproduzierbarkeit", "","german","27272803",10],["Repeat accuracy", "","english","27272803",10]],inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu]) | |
demo.launch(debug=True) |