File size: 2,316 Bytes
4398510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from functools import lru_cache
from ...text import create_entity_color
from typing import List, Tuple, Optional
from ckip_transformers.nlp.util import NerToken


def add_textsubscript(ner_token_list: List[NerToken]) -> Tuple[Tuple[str]]:
    """The add_textsubscript function combines the token word and the
    NER-tag, and specifies the NER-tag to be displayed as subscript.
    Args:
        ner_token_list (NerToken): a list of NerToken
    Returns:
        a tuple: (
            ("<span>傅達仁<sub style='margin-right: 0.1rem'>PERSON</sub></span>", (0, 3))
            ...
        )
    """

    combine = lambda value: (
        f"<span style='color: {create_entity_color(value.ner)};'>{value.word}<sub style='margin-right: 0.6rem'>{value.ner}</sub></span>",
        value.idx,
    )
    return tuple(map(combine, ner_token_list))


@lru_cache(maxsize=None)
def modify_sentence(
    span_tuple: Tuple[Tuple[str]], sentence: str, increased_len: Optional[int] = 0
) -> str:
    if len(list(span_tuple)) == 1:
        span_list = list(span_tuple)
        modified_word, index = span_list[0]
        start_index, end_index = index
        start_index += increased_len
        end_index += increased_len
        return "".join((sentence[:start_index], modified_word, sentence[end_index:]))

    span_list = list(span_tuple)
    modified_word, index = span_list.pop(0)
    span_tuple = tuple(span_list)

    start_index, end_index = index

    if increased_len:
        start_index += increased_len
        end_index += increased_len

    original_word = sentence[start_index:end_index]
    modified_sentence = "".join(
        (sentence[:start_index], modified_word, sentence[end_index:])
    )

    index_gap = len(modified_word) - len(original_word)
    return modify_sentence(span_tuple, modified_sentence, increased_len + index_gap)


def create_ner(sentence: str, ner_token_list: List[NerToken]) -> str:
    """The replace_entities function replaces words that are recognized
    as the token words with opening and closing span tags.
    Args:
        sentence (str): the orignal sentence
        ner_token_list (NerToken): a list of NerToken
    Returns:
        a str
    """
    modified_ner_token_list = add_textsubscript(ner_token_list)
    return modify_sentence(modified_ner_token_list, sentence)