#Define Helper Function for Merging Tokens to Display Information Relevant for User | |
def merge_tokens(tokens): | |
merged_tokens = [] | |
for token in tokens: | |
if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]): | |
# If current token continues the entity of the last one, merge them | |
last_token = merged_tokens[-1] | |
last_token['word'] += token['word'].replace('##', '') | |
last_token['end'] = token['end'] | |
last_token['score'] = (last_token['score'] + token['score']) / 2 | |
else: | |
# Otherwise, add the token to the list | |
merged_tokens.append(token) | |
return merged_tokens |