File size: 425 Bytes
b584b5e 1705edb 2a46512 1705edb 2a46512 2c472c3 1705edb 2c472c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
name: wordpiece_tokenizer
config_type: preprocessor
max_length: 512
truncation: longest_first
truncation_side: right
stride: 0
padding: longest
padding_side: right
pad_to_multiple_of: 0
pad_token_type_id: 0
unk_token: '[UNK]'
sep_token: '[SEP]'
pad_token: '[PAD]'
cls_token: '[CLS]'
mask_token: '[MASK]'
wordpieces_prefix: '##'
vocab_size: 30000
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true
|