Spaces:
Runtime error
Runtime error
mvy
commited on
Commit
·
c1db962
1
Parent(s):
545b919
add app
Browse files- .gitignore +1 -0
- README.md +2 -0
- app.py +32 -0
- ner.py +78 -0
- requirements.txt +2 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
README.md
CHANGED
@@ -8,6 +8,8 @@ sdk_version: 4.14.0
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
+
models:
|
12 |
+
- knowledgator/UTC-DeBERTa-small
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from ner import NER
|
4 |
+
|
5 |
+
examples = [
|
6 |
+
[
|
7 |
+
'scientist, university, city',
|
8 |
+
('Dr. Paul Hammond, a renowned neurologist at'
|
9 |
+
' Johns Hopkins University, has recently published'
|
10 |
+
' a paper in the prestigious journal "Nature Neuroscience".'
|
11 |
+
' His research focuses on a rare genetic mutation, found'
|
12 |
+
' in less than 0.01% of the population, that appears to'
|
13 |
+
' prevent the development of Alzheimer\'s disease.'
|
14 |
+
' Collaborating with researchers at the University'
|
15 |
+
' of California, San Francisco, the team is now working'
|
16 |
+
' to understand the mechanism by which this mutation'
|
17 |
+
' confers its protective effect.\n'
|
18 |
+
'Funded by the National Institutes of Health, their'
|
19 |
+
' research could potentially open new avenues for'
|
20 |
+
' Alzheimer\'s treatment.')
|
21 |
+
],
|
22 |
+
]
|
23 |
+
|
24 |
+
gradio_app = gr.Interface(
|
25 |
+
NER.ner,
|
26 |
+
inputs = ['text', gr.Textbox(placeholder="Enter sentence here..."), gr.Number(value=0.0, label="treshold")],
|
27 |
+
outputs = [gr.HighlightedText()],
|
28 |
+
examples=examples
|
29 |
+
)
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
gradio_app.launch()
|
ner.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
|
2 |
+
import spacy
|
3 |
+
import torch
|
4 |
+
|
5 |
+
nlp = spacy.load('en_core_web_sm', disable = ['lemmatizer', 'parser', 'tagger', 'ner'])
|
6 |
+
nlp.add_pipe('sentencizer')
|
7 |
+
|
8 |
+
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
9 |
+
|
10 |
+
|
11 |
+
class NER:
|
12 |
+
model_name = 'knowledgator/UTC-DeBERTa-small'
|
13 |
+
prompt="""
|
14 |
+
Identify entities in the text having the following classes:
|
15 |
+
{}
|
16 |
+
|
17 |
+
Text:
|
18 |
+
"""
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
20 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
21 |
+
ner_pipeline = pipeline(
|
22 |
+
"ner",
|
23 |
+
model=model,
|
24 |
+
tokenizer=tokenizer,
|
25 |
+
aggregation_strategy='first',
|
26 |
+
batch_size=12,
|
27 |
+
device=device
|
28 |
+
)
|
29 |
+
|
30 |
+
@classmethod
|
31 |
+
def chunkanize(cls, text, prompt_ = '', n_sents = 10):
|
32 |
+
doc = nlp(text)
|
33 |
+
chunks = []
|
34 |
+
starts = []
|
35 |
+
start = 0
|
36 |
+
end = 0
|
37 |
+
proc = False
|
38 |
+
for id, sent in enumerate(doc.sents, start=1):
|
39 |
+
if not proc:
|
40 |
+
start = sent[0].idx
|
41 |
+
starts.append(start)
|
42 |
+
proc = True
|
43 |
+
end = sent[-1].idx+len(sent[-1].text)
|
44 |
+
if id%n_sents==0:
|
45 |
+
chunk_text = prompt_+text[start:end]
|
46 |
+
chunks.append(chunk_text)
|
47 |
+
proc = False
|
48 |
+
if proc:
|
49 |
+
chunk_text = prompt_+text[start:end]
|
50 |
+
chunks.append(chunk_text)
|
51 |
+
return chunks, starts
|
52 |
+
|
53 |
+
|
54 |
+
@classmethod
|
55 |
+
def ner(cls, labels, text, treshold = 0.):
|
56 |
+
chunks, starts, classes = [], [], []
|
57 |
+
label2prompt_len = {}
|
58 |
+
for label in labels.split(', '):
|
59 |
+
prompt_ = cls.prompt.format(label)
|
60 |
+
prompt_len = len(prompt_)
|
61 |
+
label2prompt_len[label] = prompt_len
|
62 |
+
curr_chunks, curr_starts = cls.chunkanize(text, prompt_)
|
63 |
+
curr_labels = [label for _ in range(len(curr_chunks))]
|
64 |
+
chunks+=curr_chunks
|
65 |
+
starts+=curr_starts
|
66 |
+
classes+=curr_labels
|
67 |
+
outputs = []
|
68 |
+
for id, output in enumerate(cls.ner_pipeline(chunks)):
|
69 |
+
label = classes[id]
|
70 |
+
prompt_len = label2prompt_len[label]
|
71 |
+
start = starts[id]-prompt_len
|
72 |
+
for ent in output:
|
73 |
+
if ent['score']>treshold:
|
74 |
+
ent['start'] += start
|
75 |
+
ent['end'] += start
|
76 |
+
ent['entity'] = label
|
77 |
+
outputs.append(ent)
|
78 |
+
return {"text": text, "entities": outputs}
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
spacy
|
2 |
+
https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
|