File size: 1,651 Bytes
f194d43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69b744b
 
 
 
 
f194d43
 
 
 
 
 
 
69b744b
 
f194d43
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Stolen from: https://huggingface.co/spaces/AyushDey/Named_Entity_Recognition/tree/main

import gradio as gr
from transformers import pipeline

ner = pipeline('ner')

def merge_tokens(tokens):
    merged_tokens = []
    for token in tokens:
        if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
            # If current token continues the entity of the last one, merge them
            last_token = merged_tokens[-1]
            last_token['word'] += token['word'].replace('##', '')
            last_token['end'] = token['end']
            last_token['score'] = (last_token['score'] + token['score']) / 2
        else:
            # Otherwise, add the token to the list
            merged_tokens.append(token)

    return merged_tokens

examples = [
    "Johann Carl Friedrich Gauss was a German mathematician, geodesist, and physicist who made significant contributions to many fields in mathematics and science.",
    'At Los Alamos, which was isolated for security, Feynman amused himself by investigating the combination locks on the cabinets and desks of physicists. He often found that they left the lock combinations on the factory settings, wrote the combinations down, or used easily guessable combinations like dates.'
]

def named(input):
    output = ner(input)
    merged_word = merge_tokens(output)
    return {'text': input, 'entities': merged_word}

a = gr.Interface(fn=named, 
                 inputs=[gr.Textbox(label="Text input", lines= 2)],
                 outputs=[gr.HighlightedText(label='Labelled text')],
                 examples=examples)
a.launch()