Spaces:
Running
Running
import gradio as gr | |
import sentence_transformers | |
from sentence_transformers import SentenceTransformer | |
import torch | |
from sentence_transformers.util import semantic_search | |
import pandas as pd | |
import requests | |
#Import corpus embeddings | |
corpus_ger = pd.read_json('corpus_embed_ger_model5.json') | |
corpus_eng = pd.read_json('corpus_embed_eng_mboth.json') | |
corpus_fr = pd.read_json('corpus_embed_fr_Sahajtomar.json') | |
#Import models | |
model_ger = SentenceTransformer('JoBeer/model5') | |
model_eng = SentenceTransformer('mboth/distil-eng-quora-sentence') | |
model_fr = SentenceTransformer('Sahajtomar/french_semantic') | |
#Definition of search function | |
def predict(name, description, language, classCode='nofilter', top_k=10): | |
#language detection | |
if language == 'german': | |
model = model_ger | |
corpus = corpus_ger | |
if language == 'english': | |
model = model_eng | |
corpus = corpus_eng | |
if language == 'french': | |
model = model_fr | |
corpus = corpus_fr | |
text = name + '; ' + description #Verkettung name und description | |
query_embedding = model.encode(text, convert_to_tensor=True) #Erzeugung Query Embedding | |
#Filterung ECLASS Corpus | |
if classCode == 'nofilter': | |
corpus_filtered = corpus | |
else: | |
url = f'https://bcon2-api.azurewebsites.net/api/eclass?codedname={classCode}' | |
response = requests.get(url) #http request um auf Filter API zuzugreifen | |
lines = response.text.split('\n') | |
properties_filtered_list = [line[-21:-1] for line in lines] | |
corpus_filtered = corpus[corpus['irdi'].isin(properties_filtered_list)] #Der ECLASS Corpus wird mit Hilfe der Liste gefiltert | |
corpus_filtered.reset_index(drop=True,inplace=True) #Index wird zurückgesetzt | |
#Umwandlung corpus Embeddings in Tensor | |
corpus_embeddings = torch.Tensor(corpus_filtered["Embeddings"]) | |
#Einspeisung Modell | |
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = top_k) | |
#Auslesen der Modellausgabe | |
preferedNames = [] | |
definitions = [] | |
irdis = [] | |
scores = [] | |
for i in range(0,top_k): | |
preferedNames.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),1]) | |
definitions.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),2]) | |
irdis.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),0]) | |
scores.append(output[0][i].get('score')) | |
predictions = pd.DataFrame({'preferedName' : preferedNames, 'irdi' : irdis,'score' : scores,'definition' : definitions}) | |
return predictions | |
#gradio user interface | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
gr.Markdown("""# ECLASS-Search-Demo | |
This is a semantic search application that maps unknown product properties to the ECLASS standard. It is created by ECLASS e.V. in collaboration with the GART-labortory of the cologne university of applied science.""") #This demo provides a semantic search application for ECLASS features. | |
with gr.Row(): | |
#inputs | |
name_tx = gr.Textbox(label="Name:",placeholder="Name of the Property", lines=1) | |
description_tx = gr.Textbox(label="Description:", placeholder="Description of the Property", lines=1) | |
with gr.Row(): | |
#inputs | |
classcode_tx = gr.Textbox(value='nofilter', label="Filter with ECLASS ClassCode", placeholder="type nofilter", lines=1) | |
top_k_nu = gr.Number(value=10, label="Number of Matches") | |
language_drop = gr.Dropdown(["german", "english","french"], value='german', label="Select language") | |
#button | |
search = gr.Button("search") | |
#output | |
prediction_df = gr.Dataframe(headers = ['preferedName', 'irdi', 'score', 'definition']) | |
#Hinterlegt Search-Function für button "search" | |
search.click(fn=predict, inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu], outputs=prediction_df) | |
demo.launch(debug=True) |