Spaces:
Running
Running
import json | |
from collections import defaultdict, Counter | |
import matplotlib.pyplot as plt | |
import gradio as gr | |
import pandas as pd | |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification | |
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") | |
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") | |
plt.switch_backend("Agg") | |
examples = {} | |
with open("examples.json", "r") as f: | |
content = json.load(f) | |
examples = {x["text"]: x["label"] for x in content} | |
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
def plot_to_figure(grouped): | |
fig = plt.figure() | |
plt.bar(x=list(grouped.keys()), height=list(grouped.values())) | |
plt.margins(0.2) | |
plt.subplots_adjust(bottom=0.4) | |
plt.xticks(rotation=90) | |
return fig | |
def run_ner(text): | |
raw = pipe(text) | |
ner_content = { | |
"text": text, | |
"entities": [ | |
{ | |
"entity": x["entity_group"], | |
"word": x["word"], | |
"score": x["score"], | |
"start": x["start"], | |
"end": x["end"], | |
} | |
for x in raw | |
], | |
} | |
grouped = Counter((x["entity_group"] for x in raw)) | |
rows = [[k, v] for k, v in grouped.items()] | |
figure = plot_to_figure(grouped) | |
return ner_content, rows, figure | |
with gr.Blocks() as demo: | |
note = gr.Textbox(label="Note text") | |
with gr.Accordion("Examples", open=False): | |
examples = gr.Examples(examples=list(examples.keys()), inputs=note) | |
with gr.Tab("NER"): | |
highlight = gr.HighlightedText(label="NER", combine_adjacent=True) | |
with gr.Tab("Bar"): | |
plot = gr.Plot(label="Bar") | |
with gr.Tab("Table"): | |
table = gr.Dataframe(headers=["Entity", "Count"]) | |
note.submit(run_ner, [note], [highlight, table, plot]) | |
demo.launch() | |