File size: 672 Bytes
45311fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from transformers import AutoTokenizer

"""
obtain special tokens
"""
def get_special_token_mapping(tokenizer: AutoTokenizer):
    if "t5" in type(tokenizer).__name__.lower():
        special_token_mapping = {
            "cls": 3, "mask": 32099, "sep": tokenizer.eos_token_id,
            "sep+": tokenizer.eos_token_id,
            "pseudo_token": tokenizer.unk_token_id
        }
    else:
        special_token_mapping = {
            "cls": tokenizer.cls_token_id, "mask": tokenizer.mask_token_id, "sep": tokenizer.sep_token_id,
            "sep+": tokenizer.sep_token_id,
            "pseudo_token": tokenizer.unk_token_id
        }
    return special_token_mapping