Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files- README.txt +13 -0
- app.py +81 -0
- requirements.txt +71 -0
- test.json +12 -0
README.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: ⚕️MedNER - Biomed Entity Recognizer
|
3 |
+
emoji: 👩⚕️🩺⚕️🙋
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: red
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.8
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
from collections import defaultdict
|
5 |
+
|
6 |
+
# Create tokenizer for biomed model
|
7 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
|
9 |
+
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
|
10 |
+
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
11 |
+
|
12 |
+
# Matplotlib for entity graph
|
13 |
+
import matplotlib.pyplot as plt
|
14 |
+
plt.switch_backend("Agg")
|
15 |
+
|
16 |
+
# Load examples from JSON
|
17 |
+
EXAMPLES = {}
|
18 |
+
with open("examples.json", "r") as f:
|
19 |
+
example_json = json.load(f)
|
20 |
+
EXAMPLES = {x["text"]: x["label"] for x in example_json}
|
21 |
+
|
22 |
+
def group_by_entity(raw):
|
23 |
+
out = defaultdict(int)
|
24 |
+
for ent in raw:
|
25 |
+
out[ent["entity_group"]] += 1
|
26 |
+
# out["total"] = sum(out.values())
|
27 |
+
return out
|
28 |
+
|
29 |
+
|
30 |
+
def plot_to_figure(grouped):
|
31 |
+
fig = plt.figure()
|
32 |
+
plt.bar(x=list(grouped.keys()), height=list(grouped.values()))
|
33 |
+
plt.margins(0.2)
|
34 |
+
plt.subplots_adjust(bottom=0.4)
|
35 |
+
plt.xticks(rotation=90)
|
36 |
+
return fig
|
37 |
+
|
38 |
+
|
39 |
+
def ner(text):
|
40 |
+
raw = pipe(text)
|
41 |
+
ner_content = {
|
42 |
+
"text": text,
|
43 |
+
"entities": [
|
44 |
+
{
|
45 |
+
"entity": x["entity_group"],
|
46 |
+
"word": x["word"],
|
47 |
+
"score": x["score"],
|
48 |
+
"start": x["start"],
|
49 |
+
"end": x["end"],
|
50 |
+
}
|
51 |
+
for x in raw
|
52 |
+
],
|
53 |
+
}
|
54 |
+
|
55 |
+
grouped = group_by_entity(raw)
|
56 |
+
figure = plot_to_figure(grouped)
|
57 |
+
label = EXAMPLES.get(text, "Unknown")
|
58 |
+
|
59 |
+
meta = {
|
60 |
+
"entity_counts": grouped,
|
61 |
+
"entities": len(set(grouped.keys())),
|
62 |
+
"counts": sum(grouped.values()),
|
63 |
+
}
|
64 |
+
|
65 |
+
return (ner_content, meta, label, figure)
|
66 |
+
|
67 |
+
|
68 |
+
interface = gr.Interface(
|
69 |
+
ner,
|
70 |
+
inputs=gr.Textbox(label="Note text", value=""),
|
71 |
+
outputs=[
|
72 |
+
gr.HighlightedText(label="NER", combine_adjacent=True),
|
73 |
+
gr.JSON(label="Entity Counts"),
|
74 |
+
gr.Label(label="Rating"),
|
75 |
+
gr.Plot(label="Bar"),
|
76 |
+
],
|
77 |
+
examples=list(EXAMPLES.keys()),
|
78 |
+
allow_flagging="never",
|
79 |
+
)
|
80 |
+
|
81 |
+
interface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.3
|
2 |
+
aiosignal==1.2.0
|
3 |
+
anyio==3.6.1
|
4 |
+
async-timeout==4.0.2
|
5 |
+
attrs==22.1.0
|
6 |
+
bcrypt==4.0.1
|
7 |
+
black==22.10.0
|
8 |
+
certifi==2022.9.24
|
9 |
+
cffi==1.15.1
|
10 |
+
charset-normalizer==2.1.1
|
11 |
+
click==8.1.3
|
12 |
+
contourpy==1.0.5
|
13 |
+
cryptography==38.0.1
|
14 |
+
cycler==0.11.0
|
15 |
+
fastapi==0.85.0
|
16 |
+
ffmpy==0.3.0
|
17 |
+
filelock==3.8.0
|
18 |
+
fonttools==4.37.4
|
19 |
+
frozenlist==1.3.1
|
20 |
+
fsspec==2022.8.2
|
21 |
+
gradio==3.4.1
|
22 |
+
h11==0.12.0
|
23 |
+
httpcore==0.15.0
|
24 |
+
httpx==0.23.0
|
25 |
+
huggingface-hub==0.10.0
|
26 |
+
idna==3.4
|
27 |
+
Jinja2==3.1.2
|
28 |
+
kiwisolver==1.4.4
|
29 |
+
linkify-it-py==1.0.3
|
30 |
+
markdown-it-py==2.1.0
|
31 |
+
MarkupSafe==2.1.1
|
32 |
+
matplotlib==3.6.1
|
33 |
+
mdit-py-plugins==0.3.1
|
34 |
+
mdurl==0.1.2
|
35 |
+
multidict==6.0.2
|
36 |
+
mypy-extensions==0.4.3
|
37 |
+
numpy==1.23.3
|
38 |
+
orjson==3.8.0
|
39 |
+
packaging==21.3
|
40 |
+
pandas==1.5.0
|
41 |
+
paramiko==2.11.0
|
42 |
+
pathspec==0.10.1
|
43 |
+
Pillow==9.2.0
|
44 |
+
platformdirs==2.5.2
|
45 |
+
pycparser==2.21
|
46 |
+
pycryptodome==3.15.0
|
47 |
+
pydantic==1.10.2
|
48 |
+
pydub==0.25.1
|
49 |
+
PyNaCl==1.5.0
|
50 |
+
pyparsing==3.0.9
|
51 |
+
python-dateutil==2.8.2
|
52 |
+
python-multipart==0.0.5
|
53 |
+
pytz==2022.4
|
54 |
+
PyYAML==6.0
|
55 |
+
regex==2022.9.13
|
56 |
+
requests==2.28.1
|
57 |
+
rfc3986==1.5.0
|
58 |
+
six==1.16.0
|
59 |
+
sniffio==1.3.0
|
60 |
+
starlette==0.20.4
|
61 |
+
tokenizers==0.12.1
|
62 |
+
tomli==2.0.1
|
63 |
+
torch==1.12.1
|
64 |
+
tqdm==4.64.1
|
65 |
+
transformers==4.22.2
|
66 |
+
typing_extensions==4.4.0
|
67 |
+
uc-micro-py==1.0.1
|
68 |
+
urllib3==1.26.12
|
69 |
+
uvicorn==0.18.3
|
70 |
+
websockets==10.3
|
71 |
+
yarl==1.8.1
|
test.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "clinical-nlp-b9117",
|
4 |
+
"private_key_id": "6972d02311e8ee0c5b582551fbcf9c99b9169b58",
|
5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCmrSoB92G/ihxL\nzIk7Y8RUNc6Iezr6pZ+eSz2RGxEz2qPMfWjNeOJEAlACYJp4aUwyX5IHGb8Eh/oj\nkr7nVsgvuDyrTWpCAv16AuRycKgxvqj0+uDaVrF0vLgTumy62x5QM7i+n2YTDXoP\nXHMHX7yXZ6zc9Ibmm065f2kgWyjmIZDt+flTBYeBS203ZIzMBHhN1e1jdtzR36z/\n1MBmLjpRKvmuHF2SnraVjoRh7Xe6R99K8DxRQ61TJt9xLukvLBYelnqf2/cK8bZM\n5p2pErR4FE7ki3MX7HWdMJQSe+Uj10hurjNBdHcCaNUou5EL5+NRgqLow0tfatWC\n+Jpiw3K9AgMBAAECggEAGpT7YhzmBfos0RnpuQMMSLHcIoAkw9yuPDybsQy0DaUN\nAovtrvdcfqQvxnFJsXJ5qH79dwxwHnThO9MnhxWcD6A+bMOH8scvTcowTOASsvxJ\nTejE+41f99IxOVQ+Cv7vMrNM/3nEeb1ofhKsdbybAzqRoxuMeDLEt2jOh06Ck1D8\n/YV8kavGYR/VNxO2l7C5DZJYXgcm18ZrTFEXZes8bydZesoHl+JRVO1utjR2IhAj\nnYqqNaf5RXruEzXWxP0+jjEgg4NLFfqVnQTZFrLwokwc8NEMXf3dZJ0k0cHHmxOB\n6BHuPZhMOZ56U74PyWgCmbPp9g/SLt3iInpZ4ahmAQKBgQDhQwdbUEQ1q+KSMsMm\ndJl+ghX/Ff3uaZ7LjdBiOgtmTaIVbuf/bw0V9x8GbRGdJJyp546R5vhUE0zKzkMt\nTNdDNrWk3Zh4tCRHvPEHiqmDn91pWFeDDQf/OjKz+SFV31mQ050BOatZ8dBEy+md\nvHG8yLTB7oJvSpviim4ty15wIQKBgQC9a5jsBFB0fltHNJ0lZp7I2hF+aOqOngJM\nqEipPjJABJ4izGTOK/KW8CyWEP82nb6p7u9v0f4sV8CFWXG178DMv1NlRYzom3CQ\nkXdx+nRgO4oX4eEfYuoP2PxF0hCOwbh55NgFdwTt/dExX6bau4d9yQMV7o0TXpRW\nZzygOOTfHQKBgQC7ayhwyfymZydwmjmSAks/XX5tqN+IgGo1U/1/7GlVqdvkV01B\nUiUiFGTE1PRluXN7TYRqUjBky1YGGsz7oMYtTxScYh6ctszEvygPLUhSki0GnBDb\noXj42nQbF3mr19POUrJ7tX6irDWrN7lcmtBK0PbLr+ToMbw3JRP8mAsv4QKBgEac\nC18/pHYofAIpHMNKY7pff9HtbjJHuHe2648bPkQa9I/oPVOVklKtqREvuNM1LlPO\nW7cFQohpFb0fwIGfo/EvCPlhWcuD1gwuDaaRRDxzNWD9tJusla/epPup+L4efJQD\nuHshCNdmnEqZa2tyKGm9Osc8K56izQ0AYtsfGkIJAoGAMtaXTA96OXUvpEm4waQX\nOTbuEZQEdntnYWHacNrGlvwnNmvNC9hXwB38ijxXHEn0j1QUcV3w5QXFupwzjpZ2\nlIp9vTq1mOTVhHzmQmOb9DKKAE/2pi2HnekItncoQCBtgJ7k6tIk1KEfvXuQS/oM\nh8qPMwuMcQ/vKGhl3xLYo9M=\n-----END PRIVATE KEY-----\n",
|
6 |
+
"client_email": "firebase-adminsdk-qaxaj@clinical-nlp-b9117.iam.gserviceaccount.com",
|
7 |
+
"client_id": "117623958723912081118",
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-qaxaj%40clinical-nlp-b9117.iam.gserviceaccount.com"
|
12 |
+
}
|