Spaces:
Runtime error
Runtime error
import gradio as gr | |
from functools import partial | |
from transformers import pipeline, pipelines | |
from sentence_transformers import SentenceTransformer, util | |
import json | |
###################### | |
##### INFERENCE ###### | |
###################### | |
class SentenceSimilarity: | |
def __init__(self, model: str, corpus_path: str): | |
f = open(corpus_path) | |
data = json.load(f) | |
self.id, self.url, self.title, self.text = ( | |
data["id"], | |
data["url"], | |
data["title"], | |
data["text"], | |
) | |
self.model = SentenceTransformer(model) | |
self.corpus_embeddings = self.model.encode(self.text) | |
def __call__(self, query: str, corpus: list[str], top_k: int = 5): | |
query_embedding = self.model.encode(query) | |
output = util.semantic_search( | |
query_embedding, self.corpus_embeddings, top_k=top_k | |
) | |
return output[0] | |
# Sentence Similarity | |
def sentence_similarity( | |
query: str, | |
texts: list[str], | |
titles: list[str], | |
urls: list[str], | |
pipe: SentenceSimilarity, | |
top_k: int, | |
) -> list[str]: | |
answer = pipe(query=query, corpus=texts, top_k=top_k) | |
output = [ | |
f""" | |
Cosine Similarity Score: {round(ans['score'], 3)} | |
## [{titles[ans['corpus_id']]} 🔗]({urls[ans['corpus_id']]}) | |
{texts[ans['corpus_id']]} | |
""" | |
for ans in answer | |
] | |
return output | |
# Text Analysis | |
def cls_inference(input: list[str], pipe: pipeline) -> dict: | |
results = pipe(input, top_k=None) | |
return {x["label"]: x["score"] for x in results} | |
# POSP | |
def tagging(text: str, pipe: pipeline): | |
output = pipe(text) | |
return {"text": text, "entities": output} | |
# Text Analysis | |
def text_analysis(text, pipes: list[pipeline]): | |
outputs = [] | |
for pipe in pipes: | |
if isinstance(pipe, pipelines.token_classification.TokenClassificationPipeline): | |
outputs.append(tagging(text, pipe)) | |
else: | |
outputs.append(cls_inference(text, pipe)) | |
return outputs | |
###################### | |
##### INTERFACE ###### | |
###################### | |
def text_interface( | |
pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str | |
): | |
return gr.Interface( | |
fn=partial(cls_inference, pipe=pipe), | |
inputs=[ | |
gr.Textbox(lines=5, label="Input Text"), | |
], | |
title=title, | |
description=desc, | |
outputs=[gr.Label(label=output_label)], | |
examples=examples, | |
allow_flagging="never", | |
) | |
def search_interface( | |
pipe: SentenceSimilarity, | |
examples: list[str], | |
output_label: str, | |
title: str, | |
desc: str, | |
top_k: int, | |
): | |
with gr.Blocks() as sentence_similarity_interface: | |
gr.Markdown(title) | |
gr.Markdown(desc) | |
with gr.Row(): | |
# input on the left | |
with gr.Column(): | |
input_text = gr.Textbox(lines=5, label="Query") | |
# display documents | |
df = gr.DataFrame( | |
[ | |
[id, f"<a href='{url}' target='_blank'>{title} 🔗</a>"] | |
for id, title, url in zip(pipe.id, pipe.title, pipe.url) | |
], | |
headers=["ID", "Title"], | |
wrap=True, | |
datatype=["markdown", "html"], | |
interactive=False, | |
height=300, | |
) | |
button = gr.Button("Search...") | |
with gr.Column(): | |
# outputs top_k results in accordion format | |
outputs = [] | |
for i in range(top_k): | |
# open the first accordion | |
with gr.Accordion(label=f"Document {i + 1}", open=i == 0) as a: | |
output = gr.Markdown() | |
outputs.append(output) | |
gr.Examples(examples, inputs=[input_text], outputs=outputs) | |
button.click( | |
fn=partial( | |
sentence_similarity, | |
pipe=pipe, | |
texts=pipe.text, | |
titles=pipe.title, | |
urls=pipe.url, | |
top_k=top_k, | |
), | |
inputs=[input_text], | |
outputs=outputs, | |
) | |
return sentence_similarity_interface | |
def token_classification_interface( | |
pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str | |
): | |
return gr.Interface( | |
fn=partial(tagging, pipe=pipe), | |
inputs=[ | |
gr.Textbox(placeholder="Masukan kalimat di sini...", label="Input Text"), | |
], | |
outputs=[gr.HighlightedText(label=output_label)], | |
title=title, | |
examples=examples, | |
description=desc, | |
allow_flagging="never", | |
) | |
def text_analysis_interface( | |
pipe: list, examples: list[str], output_label: str, title: str, desc: str | |
): | |
with gr.Blocks() as text_analysis_interface: | |
gr.Markdown(title) | |
gr.Markdown(desc) | |
input_text = gr.Textbox(lines=5, label="Input Text") | |
with gr.Row(): | |
outputs = [ | |
( | |
gr.HighlightedText(label=label) | |
if isinstance( | |
p, pipelines.token_classification.TokenClassificationPipeline | |
) | |
else gr.Label(label=label) | |
) | |
for label, p in zip(output_label, pipe) | |
] | |
btn = gr.Button("Analyze") | |
btn.click( | |
fn=partial(text_analysis, pipes=pipe), | |
inputs=[input_text], | |
outputs=outputs, | |
) | |
gr.Examples( | |
examples=examples, | |
inputs=input_text, | |
outputs=outputs, | |
) | |
return text_analysis_interface | |
# Summary | |
# summary_interface = gr.Interface.from_pipeline( | |
# pipes["summarization"], | |
# title="Summarization", | |
# examples=details["summarization"]["examples"], | |
# description=details["summarization"]["description"], | |
# allow_flagging="never", | |
# ) | |