Spaces:

empy-ai
/

Token-classification

Runtime error

App Files Files Community

David Kagramanyan commited on Nov 26, 2022

Commit

8da6c15

1 Parent(s): 37516c9

grouped entities

Browse files

Files changed (1) hide show

app.py +68 -56

app.py CHANGED Viewed

@@ -9,15 +9,69 @@ os.system("python -m spacy download en_core_web_md")
 import spacy
-colors = {
-    "Observation": "#9bddff",
-    "Evaluation": "#f08080",
-}
-nlp = spacy.load("en_core_web_md") #Esto es para usar displacy y renderizar las entidades
-nlp.disable_pipes("ner")
-def compute_ner(input_text_message):
     endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'
     headers = {
@@ -31,60 +85,18 @@ def compute_ner(input_text_message):
     response = requests.post(endpoint_url, headers=headers, json=json_data)
-    entities = response.json()
-    doc = nlp(input_text_message)
-    potential_entities = []
-    for entity in entities:
-        start = entity["start"]
-        end = entity["end"]
-        label = entity["entity"]
-        if label == "I-Observation" or label == "B-Observation":
-            label = "Observation"
-        if label == "I-Evaluation" or label == "B-Evaluation":
-            label = "Evaluation"
-        entity["entity"]=label
-        ent = doc.char_span(start, end, label=label)
-        if ent != None:
-            doc.ents += (ent,)
-        else:
-            potential_entities.append(entity)
-    potential_entities.append({"entity": "NONE", "start": -1, "end": -1})
-    start = potential_entities[0]["start"]
-    end = potential_entities[0]["end"]
-    label = potential_entities[0]["entity"]
-    for item in potential_entities:
-        if item["entity"] == label and item["start"] == end:
-            end = item["end"]
-            continue
-        else:
-            if item["start"] != start:
-                ent = doc.char_span(start, end, label=label)
-                doc.ents += (ent,)
-            start = item["start"]
-            end = item["end"]
-            label = item["entity"]
-    options = {"ents": colors.keys(), "colors": colors}
-    return displacy.render(doc, style="ent", options=options)
-examples = ['You are dick',
             'My dad is an asshole and took his anger out on my mom by verbally abusing her',
             'He eventually moved on to my brother']
-iface = gr.Interface(fn=compute_ner, inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here",
                                                               label='Check your text for compliance with the NVC rules'),
                      outputs="html", examples=examples)
 iface.launch()

 import spacy
+options = {"ents": ["Observation",
+                    "Evaluation"],
+           "colors" : {
+               "Observation": "#9bddff",
+               "Evaluation": "#f08080",
+           }
+           }
+nlp = spacy.load("en_core_web_md")
+def postprocess(classifications):
+    entities = []
+    for i in range(len(classifications)):
+        if classifications[i]['entity'] != 0:
+            if classifications[i]['entity'][0] == 'B':
+                j = i + 1
+                while j < len(classifications) and classifications[j]['entity'][0] == 'I':
+                    j += 1
+                entities.append((classifications[i]['entity'].split('-')[1], classifications[i]['start'],
+                                 classifications[j - 1]['end']))
+    while True:
+        merged = False
+        to_remove = []
+        merged_entities = []
+        for i in range(len(entities)):
+            for j in range(i + 1, len(entities)):
+                if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \
+                        (entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]):
+                    to_remove.append(entities[i])
+                    to_remove.append(entities[j])
+                    new_start = min(entities[i][1], entities[j][1])
+                    new_end = max(entities[i][2], entities[j][2])
+                    merged_entities.append((entities[i][0], new_start, new_end))
+                    merged = True
+                    break
+            if merged:
+                break
+        for ent in to_remove:
+            entities.remove(ent)
+        entities += merged_entities
+        if not merged:
+            break
+    return entities
+def set_entities(sentence, entities):
+    doc = nlp(sentence)
+    ents = []
+    for label, start, end in entities:
+        ents.append(doc.char_span(start, end, label))
+    try:
+        doc.ents = ents
+    except TypeError:
+        pass
+    return doc
+def apply_ner(input_text_message: str):
     endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'
     headers = {
     response = requests.post(endpoint_url, headers=headers, json=json_data)
+    classifications = response.json()
+    entities = postprocess(classifications)
+    doc = set_entities(input_text_message, entities)
+    displacy_html = displacy.render(doc, style="ent", options=options)
+    return displacy_html
+examples = ['You are dick','Today i broke my leg and my dad is a dick',
             'My dad is an asshole and took his anger out on my mom by verbally abusing her',
             'He eventually moved on to my brother']
+iface = gr.Interface(fn=apply_ner, inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here",
                                                               label='Check your text for compliance with the NVC rules'),
                      outputs="html", examples=examples)
 iface.launch()