ayjays132 commited on
Commit
44f1c75
1 Parent(s): 3f5b64a

Upload 4 files

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +4 -117
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +8 -1088
special_tokens_map.json CHANGED
@@ -1,119 +1,6 @@
1
  {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": {
105
- "content": "</s>",
106
- "lstrip": false,
107
- "normalized": false,
108
- "rstrip": false,
109
- "single_word": false
110
- },
111
- "pad_token": "</s>",
112
- "unk_token": {
113
- "content": "<unk>",
114
- "lstrip": false,
115
- "normalized": false,
116
- "rstrip": false,
117
- "single_word": false
118
- }
119
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|pad|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,1100 +1,20 @@
1
  {
2
- "adaptive_tokenization": {
3
- "enable": true,
4
- "max_tokens": 100000,
5
- "min_frequency": 3,
6
- "retain_variant_forms": true
7
- },
8
  "added_tokens_decoder": {
9
- "0": {
10
- "content": "<pad>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false,
15
- "special": true
16
- },
17
- "1": {
18
- "content": "</s>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false,
23
- "special": true
24
- },
25
- "2": {
26
- "content": "<unk>",
27
- "lstrip": false,
28
- "normalized": false,
29
- "rstrip": false,
30
- "single_word": false,
31
- "special": true
32
- },
33
- "10452": {
34
- "content": "good",
35
  "lstrip": false,
36
  "normalized": true,
37
  "rstrip": false,
38
  "single_word": false,
39
- "special": false
40
- },
41
- "32000": {
42
- "content": "<extra_id_99>",
43
- "lstrip": false,
44
- "normalized": false,
45
- "rstrip": false,
46
- "single_word": false,
47
- "special": true
48
- },
49
- "32001": {
50
- "content": "<extra_id_98>",
51
- "lstrip": false,
52
- "normalized": false,
53
- "rstrip": false,
54
- "single_word": false,
55
- "special": true
56
- },
57
- "32002": {
58
- "content": "<extra_id_97>",
59
- "lstrip": false,
60
- "normalized": false,
61
- "rstrip": false,
62
- "single_word": false,
63
- "special": true
64
- },
65
- "32003": {
66
- "content": "<extra_id_96>",
67
- "lstrip": false,
68
- "normalized": false,
69
- "rstrip": false,
70
- "single_word": false,
71
- "special": true
72
- },
73
- "32004": {
74
- "content": "<extra_id_95>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false,
79
- "special": true
80
- },
81
- "32005": {
82
- "content": "<extra_id_94>",
83
- "lstrip": false,
84
- "normalized": false,
85
- "rstrip": false,
86
- "single_word": false,
87
- "special": true
88
- },
89
- "32006": {
90
- "content": "<extra_id_93>",
91
- "lstrip": false,
92
- "normalized": false,
93
- "rstrip": false,
94
- "single_word": false,
95
- "special": true
96
- },
97
- "32007": {
98
- "content": "<extra_id_92>",
99
- "lstrip": false,
100
- "normalized": false,
101
- "rstrip": false,
102
- "single_word": false,
103
- "special": true
104
- },
105
- "32008": {
106
- "content": "<extra_id_91>",
107
- "lstrip": false,
108
- "normalized": false,
109
- "rstrip": false,
110
- "single_word": false,
111
- "special": true
112
- },
113
- "32009": {
114
- "content": "<extra_id_90>",
115
- "lstrip": false,
116
- "normalized": false,
117
- "rstrip": false,
118
- "single_word": false,
119
- "special": true
120
- },
121
- "32010": {
122
- "content": "<extra_id_89>",
123
- "lstrip": false,
124
- "normalized": false,
125
- "rstrip": false,
126
- "single_word": false,
127
- "special": true
128
- },
129
- "32011": {
130
- "content": "<extra_id_88>",
131
- "lstrip": false,
132
- "normalized": false,
133
- "rstrip": false,
134
- "single_word": false,
135
- "special": true
136
- },
137
- "32012": {
138
- "content": "<extra_id_87>",
139
- "lstrip": false,
140
- "normalized": false,
141
- "rstrip": false,
142
- "single_word": false,
143
- "special": true
144
- },
145
- "32013": {
146
- "content": "<extra_id_86>",
147
- "lstrip": false,
148
- "normalized": false,
149
- "rstrip": false,
150
- "single_word": false,
151
- "special": true
152
- },
153
- "32014": {
154
- "content": "<extra_id_85>",
155
- "lstrip": false,
156
- "normalized": false,
157
- "rstrip": false,
158
- "single_word": false,
159
- "special": true
160
- },
161
- "32015": {
162
- "content": "<extra_id_84>",
163
- "lstrip": false,
164
- "normalized": false,
165
- "rstrip": false,
166
- "single_word": false,
167
- "special": true
168
- },
169
- "32016": {
170
- "content": "<extra_id_83>",
171
- "lstrip": false,
172
- "normalized": false,
173
- "rstrip": false,
174
- "single_word": false,
175
- "special": true
176
- },
177
- "32017": {
178
- "content": "<extra_id_82>",
179
- "lstrip": false,
180
- "normalized": false,
181
- "rstrip": false,
182
- "single_word": false,
183
- "special": true
184
- },
185
- "32018": {
186
- "content": "<extra_id_81>",
187
- "lstrip": false,
188
- "normalized": false,
189
- "rstrip": false,
190
- "single_word": false,
191
- "special": true
192
- },
193
- "32019": {
194
- "content": "<extra_id_80>",
195
- "lstrip": false,
196
- "normalized": false,
197
- "rstrip": false,
198
- "single_word": false,
199
- "special": true
200
- },
201
- "32020": {
202
- "content": "<extra_id_79>",
203
- "lstrip": false,
204
- "normalized": false,
205
- "rstrip": false,
206
- "single_word": false,
207
- "special": true
208
- },
209
- "32021": {
210
- "content": "<extra_id_78>",
211
- "lstrip": false,
212
- "normalized": false,
213
- "rstrip": false,
214
- "single_word": false,
215
- "special": true
216
- },
217
- "32022": {
218
- "content": "<extra_id_77>",
219
- "lstrip": false,
220
- "normalized": false,
221
- "rstrip": false,
222
- "single_word": false,
223
- "special": true
224
- },
225
- "32023": {
226
- "content": "<extra_id_76>",
227
- "lstrip": false,
228
- "normalized": false,
229
- "rstrip": false,
230
- "single_word": false,
231
- "special": true
232
- },
233
- "32024": {
234
- "content": "<extra_id_75>",
235
- "lstrip": false,
236
- "normalized": false,
237
- "rstrip": false,
238
- "single_word": false,
239
- "special": true
240
- },
241
- "32025": {
242
- "content": "<extra_id_74>",
243
- "lstrip": false,
244
- "normalized": false,
245
- "rstrip": false,
246
- "single_word": false,
247
- "special": true
248
- },
249
- "32026": {
250
- "content": "<extra_id_73>",
251
- "lstrip": false,
252
- "normalized": false,
253
- "rstrip": false,
254
- "single_word": false,
255
- "special": true
256
- },
257
- "32027": {
258
- "content": "<extra_id_72>",
259
- "lstrip": false,
260
- "normalized": false,
261
- "rstrip": false,
262
- "single_word": false,
263
- "special": true
264
- },
265
- "32028": {
266
- "content": "<extra_id_71>",
267
- "lstrip": false,
268
- "normalized": false,
269
- "rstrip": false,
270
- "single_word": false,
271
- "special": true
272
- },
273
- "32029": {
274
- "content": "<extra_id_70>",
275
- "lstrip": false,
276
- "normalized": false,
277
- "rstrip": false,
278
- "single_word": false,
279
- "special": true
280
- },
281
- "32030": {
282
- "content": "<extra_id_69>",
283
- "lstrip": false,
284
- "normalized": false,
285
- "rstrip": false,
286
- "single_word": false,
287
- "special": true
288
- },
289
- "32031": {
290
- "content": "<extra_id_68>",
291
- "lstrip": false,
292
- "normalized": false,
293
- "rstrip": false,
294
- "single_word": false,
295
- "special": true
296
- },
297
- "32032": {
298
- "content": "<extra_id_67>",
299
- "lstrip": false,
300
- "normalized": false,
301
- "rstrip": false,
302
- "single_word": false,
303
- "special": true
304
- },
305
- "32033": {
306
- "content": "<extra_id_66>",
307
- "lstrip": false,
308
- "normalized": false,
309
- "rstrip": false,
310
- "single_word": false,
311
- "special": true
312
- },
313
- "32034": {
314
- "content": "<extra_id_65>",
315
- "lstrip": false,
316
- "normalized": false,
317
- "rstrip": false,
318
- "single_word": false,
319
- "special": true
320
- },
321
- "32035": {
322
- "content": "<extra_id_64>",
323
- "lstrip": false,
324
- "normalized": false,
325
- "rstrip": false,
326
- "single_word": false,
327
- "special": true
328
- },
329
- "32036": {
330
- "content": "<extra_id_63>",
331
- "lstrip": false,
332
- "normalized": false,
333
- "rstrip": false,
334
- "single_word": false,
335
- "special": true
336
- },
337
- "32037": {
338
- "content": "<extra_id_62>",
339
- "lstrip": false,
340
- "normalized": false,
341
- "rstrip": false,
342
- "single_word": false,
343
- "special": true
344
- },
345
- "32038": {
346
- "content": "<extra_id_61>",
347
- "lstrip": false,
348
- "normalized": false,
349
- "rstrip": false,
350
- "single_word": false,
351
- "special": true
352
- },
353
- "32039": {
354
- "content": "<extra_id_60>",
355
- "lstrip": false,
356
- "normalized": false,
357
- "rstrip": false,
358
- "single_word": false,
359
- "special": true
360
- },
361
- "32040": {
362
- "content": "<extra_id_59>",
363
- "lstrip": false,
364
- "normalized": false,
365
- "rstrip": false,
366
- "single_word": false,
367
- "special": true
368
- },
369
- "32041": {
370
- "content": "<extra_id_58>",
371
- "lstrip": false,
372
- "normalized": false,
373
- "rstrip": false,
374
- "single_word": false,
375
- "special": true
376
- },
377
- "32042": {
378
- "content": "<extra_id_57>",
379
- "lstrip": false,
380
- "normalized": false,
381
- "rstrip": false,
382
- "single_word": false,
383
- "special": true
384
- },
385
- "32043": {
386
- "content": "<extra_id_56>",
387
- "lstrip": false,
388
- "normalized": false,
389
- "rstrip": false,
390
- "single_word": false,
391
- "special": true
392
- },
393
- "32044": {
394
- "content": "<extra_id_55>",
395
- "lstrip": false,
396
- "normalized": false,
397
- "rstrip": false,
398
- "single_word": false,
399
- "special": true
400
- },
401
- "32045": {
402
- "content": "<extra_id_54>",
403
- "lstrip": false,
404
- "normalized": false,
405
- "rstrip": false,
406
- "single_word": false,
407
- "special": true
408
- },
409
- "32046": {
410
- "content": "<extra_id_53>",
411
- "lstrip": false,
412
- "normalized": false,
413
- "rstrip": false,
414
- "single_word": false,
415
- "special": true
416
- },
417
- "32047": {
418
- "content": "<extra_id_52>",
419
- "lstrip": false,
420
- "normalized": false,
421
- "rstrip": false,
422
- "single_word": false,
423
- "special": true
424
- },
425
- "32048": {
426
- "content": "<extra_id_51>",
427
- "lstrip": false,
428
- "normalized": false,
429
- "rstrip": false,
430
- "single_word": false,
431
- "special": true
432
- },
433
- "32049": {
434
- "content": "<extra_id_50>",
435
- "lstrip": false,
436
- "normalized": false,
437
- "rstrip": false,
438
- "single_word": false,
439
- "special": true
440
- },
441
- "32050": {
442
- "content": "<extra_id_49>",
443
- "lstrip": false,
444
- "normalized": false,
445
- "rstrip": false,
446
- "single_word": false,
447
- "special": true
448
- },
449
- "32051": {
450
- "content": "<extra_id_48>",
451
- "lstrip": false,
452
- "normalized": false,
453
- "rstrip": false,
454
- "single_word": false,
455
- "special": true
456
- },
457
- "32052": {
458
- "content": "<extra_id_47>",
459
- "lstrip": false,
460
- "normalized": false,
461
- "rstrip": false,
462
- "single_word": false,
463
- "special": true
464
- },
465
- "32053": {
466
- "content": "<extra_id_46>",
467
- "lstrip": false,
468
- "normalized": false,
469
- "rstrip": false,
470
- "single_word": false,
471
- "special": true
472
- },
473
- "32054": {
474
- "content": "<extra_id_45>",
475
- "lstrip": false,
476
- "normalized": false,
477
- "rstrip": false,
478
- "single_word": false,
479
- "special": true
480
- },
481
- "32055": {
482
- "content": "<extra_id_44>",
483
- "lstrip": false,
484
- "normalized": false,
485
- "rstrip": false,
486
- "single_word": false,
487
- "special": true
488
- },
489
- "32056": {
490
- "content": "<extra_id_43>",
491
- "lstrip": false,
492
- "normalized": false,
493
- "rstrip": false,
494
- "single_word": false,
495
- "special": true
496
- },
497
- "32057": {
498
- "content": "<extra_id_42>",
499
- "lstrip": false,
500
- "normalized": false,
501
- "rstrip": false,
502
- "single_word": false,
503
- "special": true
504
- },
505
- "32058": {
506
- "content": "<extra_id_41>",
507
- "lstrip": false,
508
- "normalized": false,
509
- "rstrip": false,
510
- "single_word": false,
511
- "special": true
512
- },
513
- "32059": {
514
- "content": "<extra_id_40>",
515
- "lstrip": false,
516
- "normalized": false,
517
- "rstrip": false,
518
- "single_word": false,
519
- "special": true
520
- },
521
- "32060": {
522
- "content": "<extra_id_39>",
523
- "lstrip": false,
524
- "normalized": false,
525
- "rstrip": false,
526
- "single_word": false,
527
- "special": true
528
- },
529
- "32061": {
530
- "content": "<extra_id_38>",
531
- "lstrip": false,
532
- "normalized": false,
533
- "rstrip": false,
534
- "single_word": false,
535
- "special": true
536
- },
537
- "32062": {
538
- "content": "<extra_id_37>",
539
- "lstrip": false,
540
- "normalized": false,
541
- "rstrip": false,
542
- "single_word": false,
543
- "special": true
544
- },
545
- "32063": {
546
- "content": "<extra_id_36>",
547
- "lstrip": false,
548
- "normalized": false,
549
- "rstrip": false,
550
- "single_word": false,
551
- "special": true
552
- },
553
- "32064": {
554
- "content": "<extra_id_35>",
555
- "lstrip": false,
556
- "normalized": false,
557
- "rstrip": false,
558
- "single_word": false,
559
- "special": true
560
- },
561
- "32065": {
562
- "content": "<extra_id_34>",
563
- "lstrip": false,
564
- "normalized": false,
565
- "rstrip": false,
566
- "single_word": false,
567
- "special": true
568
- },
569
- "32066": {
570
- "content": "<extra_id_33>",
571
- "lstrip": false,
572
- "normalized": false,
573
- "rstrip": false,
574
- "single_word": false,
575
- "special": true
576
- },
577
- "32067": {
578
- "content": "<extra_id_32>",
579
- "lstrip": false,
580
- "normalized": false,
581
- "rstrip": false,
582
- "single_word": false,
583
- "special": true
584
- },
585
- "32068": {
586
- "content": "<extra_id_31>",
587
- "lstrip": false,
588
- "normalized": false,
589
- "rstrip": false,
590
- "single_word": false,
591
- "special": true
592
- },
593
- "32069": {
594
- "content": "<extra_id_30>",
595
- "lstrip": false,
596
- "normalized": false,
597
- "rstrip": false,
598
- "single_word": false,
599
- "special": true
600
- },
601
- "32070": {
602
- "content": "<extra_id_29>",
603
- "lstrip": false,
604
- "normalized": false,
605
- "rstrip": false,
606
- "single_word": false,
607
- "special": true
608
- },
609
- "32071": {
610
- "content": "<extra_id_28>",
611
- "lstrip": false,
612
- "normalized": false,
613
- "rstrip": false,
614
- "single_word": false,
615
- "special": true
616
- },
617
- "32072": {
618
- "content": "<extra_id_27>",
619
- "lstrip": false,
620
- "normalized": false,
621
- "rstrip": false,
622
- "single_word": false,
623
- "special": true
624
- },
625
- "32073": {
626
- "content": "<extra_id_26>",
627
- "lstrip": false,
628
- "normalized": false,
629
- "rstrip": false,
630
- "single_word": false,
631
- "special": true
632
- },
633
- "32074": {
634
- "content": "<extra_id_25>",
635
- "lstrip": false,
636
- "normalized": false,
637
- "rstrip": false,
638
- "single_word": false,
639
- "special": true
640
- },
641
- "32075": {
642
- "content": "<extra_id_24>",
643
- "lstrip": false,
644
- "normalized": false,
645
- "rstrip": false,
646
- "single_word": false,
647
- "special": true
648
- },
649
- "32076": {
650
- "content": "<extra_id_23>",
651
- "lstrip": false,
652
- "normalized": false,
653
- "rstrip": false,
654
- "single_word": false,
655
- "special": true
656
- },
657
- "32077": {
658
- "content": "<extra_id_22>",
659
- "lstrip": false,
660
- "normalized": false,
661
- "rstrip": false,
662
- "single_word": false,
663
- "special": true
664
- },
665
- "32078": {
666
- "content": "<extra_id_21>",
667
- "lstrip": false,
668
- "normalized": false,
669
- "rstrip": false,
670
- "single_word": false,
671
- "special": true
672
- },
673
- "32079": {
674
- "content": "<extra_id_20>",
675
- "lstrip": false,
676
- "normalized": false,
677
- "rstrip": false,
678
- "single_word": false,
679
- "special": true
680
- },
681
- "32080": {
682
- "content": "<extra_id_19>",
683
- "lstrip": false,
684
- "normalized": false,
685
- "rstrip": false,
686
- "single_word": false,
687
- "special": true
688
- },
689
- "32081": {
690
- "content": "<extra_id_18>",
691
- "lstrip": false,
692
- "normalized": false,
693
- "rstrip": false,
694
- "single_word": false,
695
- "special": true
696
- },
697
- "32082": {
698
- "content": "<extra_id_17>",
699
- "lstrip": false,
700
- "normalized": false,
701
- "rstrip": false,
702
- "single_word": false,
703
- "special": true
704
- },
705
- "32083": {
706
- "content": "<extra_id_16>",
707
- "lstrip": false,
708
- "normalized": false,
709
- "rstrip": false,
710
- "single_word": false,
711
- "special": true
712
- },
713
- "32084": {
714
- "content": "<extra_id_15>",
715
- "lstrip": false,
716
- "normalized": false,
717
- "rstrip": false,
718
- "single_word": false,
719
- "special": true
720
- },
721
- "32085": {
722
- "content": "<extra_id_14>",
723
- "lstrip": false,
724
- "normalized": false,
725
- "rstrip": false,
726
- "single_word": false,
727
- "special": true
728
- },
729
- "32086": {
730
- "content": "<extra_id_13>",
731
- "lstrip": false,
732
- "normalized": false,
733
- "rstrip": false,
734
- "single_word": false,
735
- "special": true
736
- },
737
- "32087": {
738
- "content": "<extra_id_12>",
739
- "lstrip": false,
740
- "normalized": false,
741
- "rstrip": false,
742
- "single_word": false,
743
- "special": true
744
- },
745
- "32088": {
746
- "content": "<extra_id_11>",
747
- "lstrip": false,
748
- "normalized": false,
749
- "rstrip": false,
750
- "single_word": false,
751
- "special": true
752
- },
753
- "32089": {
754
- "content": "<extra_id_10>",
755
- "lstrip": false,
756
- "normalized": false,
757
- "rstrip": false,
758
- "single_word": false,
759
- "special": true
760
- },
761
- "32090": {
762
- "content": "<extra_id_9>",
763
- "lstrip": false,
764
- "normalized": false,
765
- "rstrip": false,
766
- "single_word": false,
767
- "special": true
768
- },
769
- "32091": {
770
- "content": "<extra_id_8>",
771
- "lstrip": false,
772
- "normalized": false,
773
- "rstrip": false,
774
- "single_word": false,
775
- "special": true
776
- },
777
- "32092": {
778
- "content": "<extra_id_7>",
779
- "lstrip": false,
780
- "normalized": false,
781
- "rstrip": false,
782
- "single_word": false,
783
- "special": true
784
- },
785
- "32093": {
786
- "content": "<extra_id_6>",
787
- "lstrip": false,
788
- "normalized": false,
789
- "rstrip": false,
790
- "single_word": false,
791
- "special": true
792
- },
793
- "32094": {
794
- "content": "<extra_id_5>",
795
- "lstrip": false,
796
- "normalized": false,
797
- "rstrip": false,
798
- "single_word": false,
799
- "special": true
800
- },
801
- "32095": {
802
- "content": "<extra_id_4>",
803
- "lstrip": false,
804
- "normalized": false,
805
- "rstrip": false,
806
- "single_word": false,
807
- "special": true
808
- },
809
- "32096": {
810
- "content": "<extra_id_3>",
811
- "lstrip": false,
812
- "normalized": false,
813
- "rstrip": false,
814
- "single_word": false,
815
- "special": true
816
- },
817
- "32097": {
818
- "content": "<extra_id_2>",
819
- "lstrip": false,
820
- "normalized": false,
821
- "rstrip": false,
822
- "single_word": false,
823
- "special": true
824
- },
825
- "32098": {
826
- "content": "<extra_id_1>",
827
- "lstrip": false,
828
- "normalized": false,
829
- "rstrip": false,
830
- "single_word": false,
831
- "special": true
832
- },
833
- "32099": {
834
- "content": "<extra_id_0>",
835
- "lstrip": false,
836
- "normalized": false,
837
- "rstrip": false,
838
- "single_word": false,
839
- "special": true
840
- },
841
- "32100": {
842
- "content": "happy",
843
- "lstrip": false,
844
- "normalized": true,
845
- "rstrip": false,
846
- "single_word": false,
847
- "special": false
848
- },
849
- "32101": {
850
- "content": "intelligent",
851
- "lstrip": false,
852
- "normalized": true,
853
- "rstrip": false,
854
- "single_word": false,
855
- "special": false
856
- },
857
- "32102": {
858
- "content": "response",
859
- "lstrip": false,
860
- "normalized": true,
861
- "rstrip": false,
862
- "single_word": false,
863
- "special": false
864
- },
865
- "32103": {
866
- "content": "environment",
867
- "lstrip": false,
868
- "normalized": true,
869
- "rstrip": false,
870
- "single_word": false,
871
- "special": false
872
- },
873
- "32104": {
874
- "content": "amazeballs",
875
- "lstrip": false,
876
- "normalized": true,
877
- "rstrip": false,
878
- "single_word": false,
879
- "special": false
880
- },
881
- "32105": {
882
- "content": "cryptocurrency",
883
- "lstrip": false,
884
- "normalized": true,
885
- "rstrip": false,
886
- "single_word": false,
887
- "special": false
888
- },
889
- "32106": {
890
- "content": "webinar",
891
- "lstrip": false,
892
- "normalized": true,
893
- "rstrip": false,
894
- "single_word": false,
895
- "special": false
896
- },
897
- "32107": {
898
- "content": "vlog",
899
- "lstrip": false,
900
- "normalized": true,
901
- "rstrip": false,
902
- "single_word": false,
903
- "special": false
904
- },
905
- "32108": {
906
- "content": "upcycle",
907
- "lstrip": false,
908
- "normalized": true,
909
- "rstrip": false,
910
- "single_word": false,
911
- "special": false
912
- },
913
- "32109": {
914
- "content": "photobomb",
915
- "lstrip": false,
916
- "normalized": true,
917
- "rstrip": false,
918
- "single_word": false,
919
- "special": false
920
- },
921
- "32110": {
922
- "content": "facepalm",
923
- "lstrip": false,
924
- "normalized": true,
925
- "rstrip": false,
926
- "single_word": false,
927
- "special": false
928
- },
929
- "32111": {
930
- "content": "crowdfunding",
931
- "lstrip": false,
932
- "normalized": true,
933
- "rstrip": false,
934
- "single_word": false,
935
- "special": false
936
- },
937
- "32112": {
938
- "content": "bromance",
939
- "lstrip": false,
940
- "normalized": true,
941
- "rstrip": false,
942
- "single_word": false,
943
- "special": false
944
- },
945
- "32113": {
946
- "content": "hangry",
947
- "lstrip": false,
948
- "normalized": true,
949
- "rstrip": false,
950
- "single_word": false,
951
- "special": false
952
- },
953
- "32114": {
954
- "content": "[PAD]",
955
- "lstrip": false,
956
- "normalized": false,
957
- "rstrip": false,
958
- "single_word": false,
959
  "special": true
960
  }
961
  },
962
- "additional_special_tokens": [
963
- "<extra_id_0>",
964
- "<extra_id_1>",
965
- "<extra_id_2>",
966
- "<extra_id_3>",
967
- "<extra_id_4>",
968
- "<extra_id_5>",
969
- "<extra_id_6>",
970
- "<extra_id_7>",
971
- "<extra_id_8>",
972
- "<extra_id_9>",
973
- "<extra_id_10>",
974
- "<extra_id_11>",
975
- "<extra_id_12>",
976
- "<extra_id_13>",
977
- "<extra_id_14>",
978
- "<extra_id_15>",
979
- "<extra_id_16>",
980
- "<extra_id_17>",
981
- "<extra_id_18>",
982
- "<extra_id_19>",
983
- "<extra_id_20>",
984
- "<extra_id_21>",
985
- "<extra_id_22>",
986
- "<extra_id_23>",
987
- "<extra_id_24>",
988
- "<extra_id_25>",
989
- "<extra_id_26>",
990
- "<extra_id_27>",
991
- "<extra_id_28>",
992
- "<extra_id_29>",
993
- "<extra_id_30>",
994
- "<extra_id_31>",
995
- "<extra_id_32>",
996
- "<extra_id_33>",
997
- "<extra_id_34>",
998
- "<extra_id_35>",
999
- "<extra_id_36>",
1000
- "<extra_id_37>",
1001
- "<extra_id_38>",
1002
- "<extra_id_39>",
1003
- "<extra_id_40>",
1004
- "<extra_id_41>",
1005
- "<extra_id_42>",
1006
- "<extra_id_43>",
1007
- "<extra_id_44>",
1008
- "<extra_id_45>",
1009
- "<extra_id_46>",
1010
- "<extra_id_47>",
1011
- "<extra_id_48>",
1012
- "<extra_id_49>",
1013
- "<extra_id_50>",
1014
- "<extra_id_51>",
1015
- "<extra_id_52>",
1016
- "<extra_id_53>",
1017
- "<extra_id_54>",
1018
- "<extra_id_55>",
1019
- "<extra_id_56>",
1020
- "<extra_id_57>",
1021
- "<extra_id_58>",
1022
- "<extra_id_59>",
1023
- "<extra_id_60>",
1024
- "<extra_id_61>",
1025
- "<extra_id_62>",
1026
- "<extra_id_63>",
1027
- "<extra_id_64>",
1028
- "<extra_id_65>",
1029
- "<extra_id_66>",
1030
- "<extra_id_67>",
1031
- "<extra_id_68>",
1032
- "<extra_id_69>",
1033
- "<extra_id_70>",
1034
- "<extra_id_71>",
1035
- "<extra_id_72>",
1036
- "<extra_id_73>",
1037
- "<extra_id_74>",
1038
- "<extra_id_75>",
1039
- "<extra_id_76>",
1040
- "<extra_id_77>",
1041
- "<extra_id_78>",
1042
- "<extra_id_79>",
1043
- "<extra_id_80>",
1044
- "<extra_id_81>",
1045
- "<extra_id_82>",
1046
- "<extra_id_83>",
1047
- "<extra_id_84>",
1048
- "<extra_id_85>",
1049
- "<extra_id_86>",
1050
- "<extra_id_87>",
1051
- "<extra_id_88>",
1052
- "<extra_id_89>",
1053
- "<extra_id_90>",
1054
- "<extra_id_91>",
1055
- "<extra_id_92>",
1056
- "<extra_id_93>",
1057
- "<extra_id_94>",
1058
- "<extra_id_95>",
1059
- "<extra_id_96>",
1060
- "<extra_id_97>",
1061
- "<extra_id_98>",
1062
- "<extra_id_99>"
1063
- ],
1064
- "bpe_dropout": 0.1,
1065
  "clean_up_tokenization_spaces": true,
1066
- "enable_token_classification": true,
1067
- "eos_token": "</s>",
1068
- "extra_ids": 100,
1069
- "max_length": 1024,
1070
  "model_max_length": 1024,
1071
- "normalization_rules": {
1072
- "enable": true,
1073
- "lowercase": true,
1074
- "nmt_normalization": true,
1075
- "pre_tokenization": {
1076
- "punctuation_split": true,
1077
- "split_digits": true
1078
- },
1079
- "strip_accents": true
1080
- },
1081
- "pad_to_multiple_of": null,
1082
- "pad_token": "</s>",
1083
- "pad_token_type_id": 0,
1084
- "padding_side": "right",
1085
- "spe_tokenization": {
1086
- "coverage": 0.9995,
1087
- "enable": true,
1088
- "nbest_size": 64
1089
- },
1090
- "stride": 0,
1091
- "subword_regularization": {
1092
- "alpha": 0.1,
1093
- "dropout": 0.1,
1094
- "enable": true
1095
- },
1096
- "tokenizer_class": "T5Tokenizer",
1097
- "truncation_side": "right",
1098
- "truncation_strategy": "longest_first",
1099
- "unk_token": "<unk>"
1100
  }
 
1
  {
2
+ "add_prefix_space": false,
 
 
 
 
 
3
  "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "lstrip": false,
7
  "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "special": true
11
  }
12
  },
13
+ "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
 
 
 
16
  "model_max_length": 1024,
17
+ "pad_token": "<|pad|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }