Spaces:
Runtime error
Runtime error
David Kagramanyan
commited on
Commit
·
8da6c15
1
Parent(s):
37516c9
grouped entities
Browse files
app.py
CHANGED
@@ -9,15 +9,69 @@ os.system("python -m spacy download en_core_web_md")
|
|
9 |
import spacy
|
10 |
|
11 |
|
12 |
-
colors = {
|
13 |
-
"Observation": "#9bddff",
|
14 |
-
"Evaluation": "#f08080",
|
15 |
-
}
|
16 |
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
def compute_ner(input_text_message):
|
21 |
endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'
|
22 |
|
23 |
headers = {
|
@@ -31,60 +85,18 @@ def compute_ner(input_text_message):
|
|
31 |
|
32 |
response = requests.post(endpoint_url, headers=headers, json=json_data)
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
doc = nlp(input_text_message)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
for entity in entities:
|
41 |
-
start = entity["start"]
|
42 |
-
end = entity["end"]
|
43 |
-
label = entity["entity"]
|
44 |
-
|
45 |
-
if label == "I-Observation" or label == "B-Observation":
|
46 |
-
label = "Observation"
|
47 |
-
|
48 |
-
if label == "I-Evaluation" or label == "B-Evaluation":
|
49 |
-
label = "Evaluation"
|
50 |
-
|
51 |
-
entity["entity"]=label
|
52 |
-
|
53 |
-
ent = doc.char_span(start, end, label=label)
|
54 |
-
if ent != None:
|
55 |
-
doc.ents += (ent,)
|
56 |
-
else:
|
57 |
-
potential_entities.append(entity)
|
58 |
-
|
59 |
-
potential_entities.append({"entity": "NONE", "start": -1, "end": -1})
|
60 |
-
|
61 |
-
start = potential_entities[0]["start"]
|
62 |
-
end = potential_entities[0]["end"]
|
63 |
-
label = potential_entities[0]["entity"]
|
64 |
-
|
65 |
-
for item in potential_entities:
|
66 |
-
if item["entity"] == label and item["start"] == end:
|
67 |
-
end = item["end"]
|
68 |
-
continue
|
69 |
-
else:
|
70 |
-
if item["start"] != start:
|
71 |
-
ent = doc.char_span(start, end, label=label)
|
72 |
-
doc.ents += (ent,)
|
73 |
-
|
74 |
-
start = item["start"]
|
75 |
-
end = item["end"]
|
76 |
-
label = item["entity"]
|
77 |
-
|
78 |
-
options = {"ents": colors.keys(), "colors": colors}
|
79 |
-
|
80 |
-
return displacy.render(doc, style="ent", options=options)
|
81 |
-
|
82 |
-
|
83 |
-
examples = ['You are dick',
|
84 |
'My dad is an asshole and took his anger out on my mom by verbally abusing her',
|
85 |
'He eventually moved on to my brother']
|
86 |
|
87 |
-
iface = gr.Interface(fn=
|
88 |
label='Check your text for compliance with the NVC rules'),
|
89 |
outputs="html", examples=examples)
|
90 |
iface.launch()
|
|
|
9 |
import spacy
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
options = {"ents": ["Observation",
|
14 |
+
"Evaluation"],
|
15 |
+
"colors" : {
|
16 |
+
"Observation": "#9bddff",
|
17 |
+
"Evaluation": "#f08080",
|
18 |
+
}
|
19 |
+
|
20 |
+
}
|
21 |
+
|
22 |
+
nlp = spacy.load("en_core_web_md")
|
23 |
+
|
24 |
+
|
25 |
+
def postprocess(classifications):
|
26 |
+
entities = []
|
27 |
+
for i in range(len(classifications)):
|
28 |
+
if classifications[i]['entity'] != 0:
|
29 |
+
if classifications[i]['entity'][0] == 'B':
|
30 |
+
j = i + 1
|
31 |
+
while j < len(classifications) and classifications[j]['entity'][0] == 'I':
|
32 |
+
j += 1
|
33 |
+
entities.append((classifications[i]['entity'].split('-')[1], classifications[i]['start'],
|
34 |
+
classifications[j - 1]['end']))
|
35 |
+
while True:
|
36 |
+
merged = False
|
37 |
+
to_remove = []
|
38 |
+
merged_entities = []
|
39 |
+
for i in range(len(entities)):
|
40 |
+
for j in range(i + 1, len(entities)):
|
41 |
+
if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \
|
42 |
+
(entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]):
|
43 |
+
to_remove.append(entities[i])
|
44 |
+
to_remove.append(entities[j])
|
45 |
+
|
46 |
+
new_start = min(entities[i][1], entities[j][1])
|
47 |
+
new_end = max(entities[i][2], entities[j][2])
|
48 |
+
merged_entities.append((entities[i][0], new_start, new_end))
|
49 |
+
merged = True
|
50 |
+
break
|
51 |
+
if merged:
|
52 |
+
break
|
53 |
+
for ent in to_remove:
|
54 |
+
entities.remove(ent)
|
55 |
+
entities += merged_entities
|
56 |
+
if not merged:
|
57 |
+
break
|
58 |
+
return entities
|
59 |
+
|
60 |
+
|
61 |
+
def set_entities(sentence, entities):
|
62 |
+
doc = nlp(sentence)
|
63 |
+
ents = []
|
64 |
+
for label, start, end in entities:
|
65 |
+
ents.append(doc.char_span(start, end, label))
|
66 |
+
try:
|
67 |
+
doc.ents = ents
|
68 |
+
except TypeError:
|
69 |
+
pass
|
70 |
+
return doc
|
71 |
+
|
72 |
+
|
73 |
+
def apply_ner(input_text_message: str):
|
74 |
|
|
|
75 |
endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'
|
76 |
|
77 |
headers = {
|
|
|
85 |
|
86 |
response = requests.post(endpoint_url, headers=headers, json=json_data)
|
87 |
|
88 |
+
classifications = response.json()
|
89 |
+
entities = postprocess(classifications)
|
90 |
+
doc = set_entities(input_text_message, entities)
|
91 |
+
displacy_html = displacy.render(doc, style="ent", options=options)
|
92 |
+
return displacy_html
|
93 |
|
|
|
94 |
|
95 |
+
examples = ['You are dick','Today i broke my leg and my dad is a dick',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
'My dad is an asshole and took his anger out on my mom by verbally abusing her',
|
97 |
'He eventually moved on to my brother']
|
98 |
|
99 |
+
iface = gr.Interface(fn=apply_ner, inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here",
|
100 |
label='Check your text for compliance with the NVC rules'),
|
101 |
outputs="html", examples=examples)
|
102 |
iface.launch()
|