from functools import lru_cache
from ...text import create_entity_color
from typing import List, Tuple, Optional
from ckip_transformers.nlp.util import NerToken
def add_textsubscript(ner_token_list: List[NerToken]) -> Tuple[Tuple[str]]:
"""The add_textsubscript function combines the token word and the
NER-tag, and specifies the NER-tag to be displayed as subscript.
Args:
ner_token_list (NerToken): a list of NerToken
Returns:
a tuple: (
("ๅ
้ไปPERSON", (0, 3))
...
)
"""
combine = lambda value: (
f"{value.word}{value.ner}",
value.idx,
)
return tuple(map(combine, ner_token_list))
@lru_cache(maxsize=None)
def modify_sentence(
span_tuple: Tuple[Tuple[str]], sentence: str, increased_len: Optional[int] = 0
) -> str:
if len(list(span_tuple)) == 1:
span_list = list(span_tuple)
modified_word, index = span_list[0]
start_index, end_index = index
start_index += increased_len
end_index += increased_len
return "".join((sentence[:start_index], modified_word, sentence[end_index:]))
span_list = list(span_tuple)
modified_word, index = span_list.pop(0)
span_tuple = tuple(span_list)
start_index, end_index = index
if increased_len:
start_index += increased_len
end_index += increased_len
original_word = sentence[start_index:end_index]
modified_sentence = "".join(
(sentence[:start_index], modified_word, sentence[end_index:])
)
index_gap = len(modified_word) - len(original_word)
return modify_sentence(span_tuple, modified_sentence, increased_len + index_gap)
def create_ner(sentence: str, ner_token_list: List[NerToken]) -> str:
"""The replace_entities function replaces words that are recognized
as the token words with opening and closing span tags.
Args:
sentence (str): the orignal sentence
ner_token_list (NerToken): a list of NerToken
Returns:
a str
"""
modified_ner_token_list = add_textsubscript(ner_token_list)
return modify_sentence(modified_ner_token_list, sentence)