Robo2000 commited on
Commit
2c4b317
·
1 Parent(s): 362f53f

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.txt +13 -0
  2. app.py +81 -0
  3. requirements.txt +71 -0
  4. test.json +12 -0
README.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ⚕️MedNER - Biomed Entity Recognizer
3
+ emoji: 👩‍⚕️🩺⚕️🙋
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.8
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import json
4
+ from collections import defaultdict
5
+
6
+ # Create tokenizer for biomed model
7
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
8
+ tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
9
+ model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
10
+ pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
11
+
12
+ # Matplotlib for entity graph
13
+ import matplotlib.pyplot as plt
14
+ plt.switch_backend("Agg")
15
+
16
+ # Load examples from JSON
17
+ EXAMPLES = {}
18
+ with open("examples.json", "r") as f:
19
+ example_json = json.load(f)
20
+ EXAMPLES = {x["text"]: x["label"] for x in example_json}
21
+
22
+ def group_by_entity(raw):
23
+ out = defaultdict(int)
24
+ for ent in raw:
25
+ out[ent["entity_group"]] += 1
26
+ # out["total"] = sum(out.values())
27
+ return out
28
+
29
+
30
+ def plot_to_figure(grouped):
31
+ fig = plt.figure()
32
+ plt.bar(x=list(grouped.keys()), height=list(grouped.values()))
33
+ plt.margins(0.2)
34
+ plt.subplots_adjust(bottom=0.4)
35
+ plt.xticks(rotation=90)
36
+ return fig
37
+
38
+
39
+ def ner(text):
40
+ raw = pipe(text)
41
+ ner_content = {
42
+ "text": text,
43
+ "entities": [
44
+ {
45
+ "entity": x["entity_group"],
46
+ "word": x["word"],
47
+ "score": x["score"],
48
+ "start": x["start"],
49
+ "end": x["end"],
50
+ }
51
+ for x in raw
52
+ ],
53
+ }
54
+
55
+ grouped = group_by_entity(raw)
56
+ figure = plot_to_figure(grouped)
57
+ label = EXAMPLES.get(text, "Unknown")
58
+
59
+ meta = {
60
+ "entity_counts": grouped,
61
+ "entities": len(set(grouped.keys())),
62
+ "counts": sum(grouped.values()),
63
+ }
64
+
65
+ return (ner_content, meta, label, figure)
66
+
67
+
68
+ interface = gr.Interface(
69
+ ner,
70
+ inputs=gr.Textbox(label="Note text", value=""),
71
+ outputs=[
72
+ gr.HighlightedText(label="NER", combine_adjacent=True),
73
+ gr.JSON(label="Entity Counts"),
74
+ gr.Label(label="Rating"),
75
+ gr.Plot(label="Bar"),
76
+ ],
77
+ examples=list(EXAMPLES.keys()),
78
+ allow_flagging="never",
79
+ )
80
+
81
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.3
2
+ aiosignal==1.2.0
3
+ anyio==3.6.1
4
+ async-timeout==4.0.2
5
+ attrs==22.1.0
6
+ bcrypt==4.0.1
7
+ black==22.10.0
8
+ certifi==2022.9.24
9
+ cffi==1.15.1
10
+ charset-normalizer==2.1.1
11
+ click==8.1.3
12
+ contourpy==1.0.5
13
+ cryptography==38.0.1
14
+ cycler==0.11.0
15
+ fastapi==0.85.0
16
+ ffmpy==0.3.0
17
+ filelock==3.8.0
18
+ fonttools==4.37.4
19
+ frozenlist==1.3.1
20
+ fsspec==2022.8.2
21
+ gradio==3.4.1
22
+ h11==0.12.0
23
+ httpcore==0.15.0
24
+ httpx==0.23.0
25
+ huggingface-hub==0.10.0
26
+ idna==3.4
27
+ Jinja2==3.1.2
28
+ kiwisolver==1.4.4
29
+ linkify-it-py==1.0.3
30
+ markdown-it-py==2.1.0
31
+ MarkupSafe==2.1.1
32
+ matplotlib==3.6.1
33
+ mdit-py-plugins==0.3.1
34
+ mdurl==0.1.2
35
+ multidict==6.0.2
36
+ mypy-extensions==0.4.3
37
+ numpy==1.23.3
38
+ orjson==3.8.0
39
+ packaging==21.3
40
+ pandas==1.5.0
41
+ paramiko==2.11.0
42
+ pathspec==0.10.1
43
+ Pillow==9.2.0
44
+ platformdirs==2.5.2
45
+ pycparser==2.21
46
+ pycryptodome==3.15.0
47
+ pydantic==1.10.2
48
+ pydub==0.25.1
49
+ PyNaCl==1.5.0
50
+ pyparsing==3.0.9
51
+ python-dateutil==2.8.2
52
+ python-multipart==0.0.5
53
+ pytz==2022.4
54
+ PyYAML==6.0
55
+ regex==2022.9.13
56
+ requests==2.28.1
57
+ rfc3986==1.5.0
58
+ six==1.16.0
59
+ sniffio==1.3.0
60
+ starlette==0.20.4
61
+ tokenizers==0.12.1
62
+ tomli==2.0.1
63
+ torch==1.12.1
64
+ tqdm==4.64.1
65
+ transformers==4.22.2
66
+ typing_extensions==4.4.0
67
+ uc-micro-py==1.0.1
68
+ urllib3==1.26.12
69
+ uvicorn==0.18.3
70
+ websockets==10.3
71
+ yarl==1.8.1
test.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "clinical-nlp-b9117",
4
+ "private_key_id": "6972d02311e8ee0c5b582551fbcf9c99b9169b58",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCmrSoB92G/ihxL\nzIk7Y8RUNc6Iezr6pZ+eSz2RGxEz2qPMfWjNeOJEAlACYJp4aUwyX5IHGb8Eh/oj\nkr7nVsgvuDyrTWpCAv16AuRycKgxvqj0+uDaVrF0vLgTumy62x5QM7i+n2YTDXoP\nXHMHX7yXZ6zc9Ibmm065f2kgWyjmIZDt+flTBYeBS203ZIzMBHhN1e1jdtzR36z/\n1MBmLjpRKvmuHF2SnraVjoRh7Xe6R99K8DxRQ61TJt9xLukvLBYelnqf2/cK8bZM\n5p2pErR4FE7ki3MX7HWdMJQSe+Uj10hurjNBdHcCaNUou5EL5+NRgqLow0tfatWC\n+Jpiw3K9AgMBAAECggEAGpT7YhzmBfos0RnpuQMMSLHcIoAkw9yuPDybsQy0DaUN\nAovtrvdcfqQvxnFJsXJ5qH79dwxwHnThO9MnhxWcD6A+bMOH8scvTcowTOASsvxJ\nTejE+41f99IxOVQ+Cv7vMrNM/3nEeb1ofhKsdbybAzqRoxuMeDLEt2jOh06Ck1D8\n/YV8kavGYR/VNxO2l7C5DZJYXgcm18ZrTFEXZes8bydZesoHl+JRVO1utjR2IhAj\nnYqqNaf5RXruEzXWxP0+jjEgg4NLFfqVnQTZFrLwokwc8NEMXf3dZJ0k0cHHmxOB\n6BHuPZhMOZ56U74PyWgCmbPp9g/SLt3iInpZ4ahmAQKBgQDhQwdbUEQ1q+KSMsMm\ndJl+ghX/Ff3uaZ7LjdBiOgtmTaIVbuf/bw0V9x8GbRGdJJyp546R5vhUE0zKzkMt\nTNdDNrWk3Zh4tCRHvPEHiqmDn91pWFeDDQf/OjKz+SFV31mQ050BOatZ8dBEy+md\nvHG8yLTB7oJvSpviim4ty15wIQKBgQC9a5jsBFB0fltHNJ0lZp7I2hF+aOqOngJM\nqEipPjJABJ4izGTOK/KW8CyWEP82nb6p7u9v0f4sV8CFWXG178DMv1NlRYzom3CQ\nkXdx+nRgO4oX4eEfYuoP2PxF0hCOwbh55NgFdwTt/dExX6bau4d9yQMV7o0TXpRW\nZzygOOTfHQKBgQC7ayhwyfymZydwmjmSAks/XX5tqN+IgGo1U/1/7GlVqdvkV01B\nUiUiFGTE1PRluXN7TYRqUjBky1YGGsz7oMYtTxScYh6ctszEvygPLUhSki0GnBDb\noXj42nQbF3mr19POUrJ7tX6irDWrN7lcmtBK0PbLr+ToMbw3JRP8mAsv4QKBgEac\nC18/pHYofAIpHMNKY7pff9HtbjJHuHe2648bPkQa9I/oPVOVklKtqREvuNM1LlPO\nW7cFQohpFb0fwIGfo/EvCPlhWcuD1gwuDaaRRDxzNWD9tJusla/epPup+L4efJQD\nuHshCNdmnEqZa2tyKGm9Osc8K56izQ0AYtsfGkIJAoGAMtaXTA96OXUvpEm4waQX\nOTbuEZQEdntnYWHacNrGlvwnNmvNC9hXwB38ijxXHEn0j1QUcV3w5QXFupwzjpZ2\nlIp9vTq1mOTVhHzmQmOb9DKKAE/2pi2HnekItncoQCBtgJ7k6tIk1KEfvXuQS/oM\nh8qPMwuMcQ/vKGhl3xLYo9M=\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "firebase-adminsdk-qaxaj@clinical-nlp-b9117.iam.gserviceaccount.com",
7
+ "client_id": "117623958723912081118",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-qaxaj%40clinical-nlp-b9117.iam.gserviceaccount.com"
12
+ }