ageng-anugrah commited on
Commit
66d6b0b
1 Parent(s): f6d4e8f

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +0 -7
  2. tokenizer_config.json +2 -7
special_tokens_map.json CHANGED
@@ -1,11 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "[PAD]",
4
- "[UNK]",
5
- "[CLS]",
6
- "[SEP]",
7
- "[MASK]"
8
- ],
9
  "cls_token": "[CLS]",
10
  "mask_token": "[MASK]",
11
  "pad_token": "[PAD]",
 
1
  {
 
 
 
 
 
 
 
2
  "cls_token": "[CLS]",
3
  "mask_token": "[MASK]",
4
  "pad_token": "[PAD]",
tokenizer_config.json CHANGED
@@ -41,13 +41,7 @@
41
  "special": true
42
  }
43
  },
44
- "additional_special_tokens": [
45
- "[PAD]",
46
- "[UNK]",
47
- "[CLS]",
48
- "[SEP]",
49
- "[MASK]"
50
- ],
51
  "clean_up_tokenization_spaces": true,
52
  "cls_token": "[CLS]",
53
  "do_basic_tokenize": true,
@@ -60,5 +54,6 @@
60
  "strip_accents": null,
61
  "tokenize_chinese_chars": true,
62
  "tokenizer_class": "BertTokenizer",
 
63
  "unk_token": "[UNK]"
64
  }
 
41
  "special": true
42
  }
43
  },
44
+ "additional_special_tokens": [],
 
 
 
 
 
 
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "[CLS]",
47
  "do_basic_tokenize": true,
 
54
  "strip_accents": null,
55
  "tokenize_chinese_chars": true,
56
  "tokenizer_class": "BertTokenizer",
57
+ "tokenizer_file": null,
58
  "unk_token": "[UNK]"
59
  }