File size: 3,261 Bytes
a1b76f2
b063ad5
 
a1b76f2
 
b063ad5
a1b76f2
37516c9
 
 
 
 
8da6c15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1b76f2
 
 
 
 
 
 
 
 
 
 
 
 
 
8da6c15
 
 
 
 
b063ad5
 
8da6c15
a2d5bd8
 
a59a366
8da6c15
a59a366
b063ad5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
import os

import requests

from spacy import displacy

os.system("python -m spacy download en_core_web_md")
import spacy



options = {"ents": ["Observation",
                    "Evaluation"],
           "colors" : {
               "Observation": "#9bddff",
               "Evaluation": "#f08080",
           }

           }

nlp = spacy.load("en_core_web_md")


def postprocess(classifications):
    entities = []
    for i in range(len(classifications)):
        if classifications[i]['entity'] != 0:
            if classifications[i]['entity'][0] == 'B':
                j = i + 1
                while j < len(classifications) and classifications[j]['entity'][0] == 'I':
                    j += 1
                entities.append((classifications[i]['entity'].split('-')[1], classifications[i]['start'],
                                 classifications[j - 1]['end']))
    while True:
        merged = False
        to_remove = []
        merged_entities = []
        for i in range(len(entities)):
            for j in range(i + 1, len(entities)):
                if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \
                        (entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]):
                    to_remove.append(entities[i])
                    to_remove.append(entities[j])

                    new_start = min(entities[i][1], entities[j][1])
                    new_end = max(entities[i][2], entities[j][2])
                    merged_entities.append((entities[i][0], new_start, new_end))
                    merged = True
                    break
            if merged:
                break
        for ent in to_remove:
            entities.remove(ent)
        entities += merged_entities
        if not merged:
            break
    return entities


def set_entities(sentence, entities):
    doc = nlp(sentence)
    ents = []
    for label, start, end in entities:
        ents.append(doc.char_span(start, end, label))
    try:
        doc.ents = ents
    except TypeError:
        pass
    return doc


def apply_ner(input_text_message: str):

    endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'

    headers = {
        'Authorization': 'Bearer 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',
        'Content-Type': 'application/json',
    }

    json_data = {
        'inputs': input_text_message,
    }

    response = requests.post(endpoint_url, headers=headers, json=json_data)

    classifications = response.json()
    entities = postprocess(classifications)
    doc = set_entities(input_text_message, entities)
    displacy_html = displacy.render(doc, style="ent", options=options)
    return displacy_html


examples = ['You are dick','Today i broke my leg and my dad is a dick',
            'My dad is an asshole and took his anger out on my mom by verbally abusing her',
            'He eventually moved on to my brother']

iface = gr.Interface(fn=apply_ner, inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here",
                                                              label='Check your text for compliance with the NVC rules'),
                     outputs="html", examples=examples)
iface.launch()