ayjays132 commited on
Commit
e311507
1 Parent(s): 89b7083

Upload 6 files

Browse files
special_tokens_map.json CHANGED
@@ -1,5 +1,14 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
3
  "<greeting>",
4
  "<farewell>",
5
  "<thank>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<|pad|>",
5
+ "<bos>",
6
+ "<eos>",
7
+ "<pad>",
8
+ "<unk>",
9
+ "<sep>",
10
+ "<cls>",
11
+ "<mask>",
12
  "<greeting>",
13
  "<farewell>",
14
  "<thank>",
tokenizer.json CHANGED
@@ -57,7 +57,7 @@
57
  "single_word": false,
58
  "lstrip": false,
59
  "rstrip": false,
60
- "normalized": true,
61
  "special": true
62
  },
63
  {
 
57
  "single_word": false,
58
  "lstrip": false,
59
  "rstrip": false,
60
+ "normalized": false,
61
  "special": true
62
  },
63
  {
tokenizer_config.json CHANGED
@@ -36,7 +36,7 @@
36
  "50256": {
37
  "content": "<|endoftext|>",
38
  "lstrip": false,
39
- "normalized": true,
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
@@ -235,6 +235,15 @@
235
  }
236
  },
237
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
238
  "<greeting>",
239
  "<farewell>",
240
  "<thank>",
@@ -245,9 +254,16 @@
245
  "cls_token": "<cls>",
246
  "eos_token": "<eos>",
247
  "mask_token": "<mask>",
 
248
  "model_max_length": 1024,
 
249
  "pad_token": "<eos>",
 
 
250
  "sep_token": "<sep>",
 
251
  "tokenizer_class": "GPT2Tokenizer",
 
 
252
  "unk_token": "<unk>"
253
  }
 
36
  "50256": {
37
  "content": "<|endoftext|>",
38
  "lstrip": false,
39
+ "normalized": false,
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
 
235
  }
236
  },
237
  "additional_special_tokens": [
238
+ "<|endoftext|>",
239
+ "<|pad|>",
240
+ "<bos>",
241
+ "<eos>",
242
+ "<pad>",
243
+ "<unk>",
244
+ "<sep>",
245
+ "<cls>",
246
+ "<mask>",
247
  "<greeting>",
248
  "<farewell>",
249
  "<thank>",
 
254
  "cls_token": "<cls>",
255
  "eos_token": "<eos>",
256
  "mask_token": "<mask>",
257
+ "max_length": 1024,
258
  "model_max_length": 1024,
259
+ "pad_to_multiple_of": null,
260
  "pad_token": "<eos>",
261
+ "pad_token_type_id": 0,
262
+ "padding_side": "right",
263
  "sep_token": "<sep>",
264
+ "stride": 0,
265
  "tokenizer_class": "GPT2Tokenizer",
266
+ "truncation_side": "right",
267
+ "truncation_strategy": "longest_first",
268
  "unk_token": "<unk>"
269
  }
vocab.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2052635e337c0e877b3d02bbb8e64e3749499b355218dbdb2b74a7285f54da80
3
- size 8109044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ba3c3109ff33976c4bd966589c11ee14fcaa1f4c9e5e154c2ed7f99d80709e7
3
+ size 798156