import gradio as gr import numpy as np from usearch.index import Index from sentence_transformers import SentenceTransformer from datasets import load_dataset from sentencex import segment from usearch.index import search, MetricKind, Matches, BatchMatches import csv HTML_Output = """""" model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True) rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function'] rf = list(rf) rf_emb = model.encode(rf) def get_matches(inputs): global index, model, rf paragraph_matches = [] for input in inputs: embs = model.encode(input,batch_size=128) matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True) sentence_matches = [] for match_ in matches: sentence_matches.append((rf[match_.key],str(round(match_.distance,2)))) paragraph_matches.append(sentence_matches) return paragraph_matches def return_rf_scores(abstract): sentences = list(segment("en", abstract)) matches = get_matches(sentences) output = HTML_Output for s,m in zip(sentences,matches): tooltip = [f"{mm[0]} : {mm[1]})
" for mm in m] tooltip = "\n".join(tooltip) output+=f"""
""" output += "" return output examples = [] with open("examples.tsv","r") as ex: rd = csv.reader(ex, delimiter="\t", quotechar='"') for row in rd: examples.append(row) demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=examples) demo.launch()