sgd-opt-tokenizer / tokenizer_config.json
adibm's picture
Upload tokenizer
cfb5c77
{
"add_bos_token": true,
"add_prefix_space": false,
"additional_special_tokens": [
"<|begintarget|>",
"<|endtarget|>",
"<|begincontext|>",
"<|endcontext|>",
"<|system|>",
"<|user|>",
"<|beginlastuserutterance|>",
"<|endlastuserutterance|>",
"<|begindsts|>",
"<|enddsts|>",
"<|begindst|>",
"<|enddst|>",
"<|beginbelief|>",
"<|endbelief|>",
"<|beginresponse|>",
"<|endresponse|>",
"<|beginaction|>",
"<|endaction|>",
"<|beginuseraction|>",
"<|enduseraction|>",
"<|sysactions|>",
"<|beginintent|>",
"<|endintent|>",
"<|beginrequestedslots|>",
"<|endrequestedslots|>",
"<|promptdst|>",
"<|promptaction|>",
"<|promptresponse|>",
"<|pad|>",
"<|endoftext|>",
"<|startoftext|>",
"<|beginschema|>",
"<|endschema|>",
"<|schemaname|>",
"<|schemadescription|>",
"<|beginschemaintent|>",
"<|endschemaintent|>",
"<|intentrequiredslots|>",
"<|intentresultslots|>",
"<|intentoptionalslots|>",
"<|possiblevalues|>",
"<|endpossiblevalues|>",
"<|beginschemaslot|>",
"<|endschemaslot|>",
"<|schemaslotvalues|>",
"<|beginserviceresults|>",
"<|endserviceresults|>"
],
"bos_token": {
"__type": "AddedToken",
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"clean_up_tokenization_spaces": true,
"eos_token": {
"__type": "AddedToken",
"content": "<|endtarget|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"errors": "replace",
"model_max_length": 1000000000000000019884624838656,
"pad_token": {
"__type": "AddedToken",
"content": "<|pad|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"tokenizer_class": "GPT2Tokenizer",
"unk_token": {
"__type": "AddedToken",
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}