import json from collections import defaultdict import gradio as gr import pandas as pd from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") EXAMPLE_MAP = {} with open("examples.json", "r") as f: example_json = json.load(f) EXAMPLE_MAP = {x["text"]: x["label"] for x in example_json} pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def group_by_entity(raw): out = defaultdict(int) for ent in raw: out[ent["entity_group"]] += 1 out["total"] = sum(out.values()) return out def ner(text): raw = pipe(text) ner_content = { "text": text, "entities": [ { "entity": x["entity_group"], "word": x["word"], "score": x["score"], "start": x["start"], "end": x["end"], } for x in raw ], } grouped = group_by_entity(raw) df = pd.DataFrame({"Entity": grouped.keys(), "Count": grouped.values()}) label = EXAMPLE_MAP.get(text, None) return (ner_content, grouped, label, df.hist()) interface = gr.Interface( ner, inputs=gr.Textbox(label="Note text", value=""), outputs=[ gr.HighlightedText(label="NER", combine_adjacent=True), gr.JSON(label="Entity Counts"), gr.Label(label="Rating"), "plot", ], examples=list(EXAMPLE_MAP.keys()), ) interface.launch()