special_tokens_map.json CHANGED
@@ -4,8 +4,8 @@
4
  "<s>",
5
  "</s>",
6
  "▁<PRE>",
7
- "▁<SUF>",
8
  "▁<MID>",
 
9
  "▁<EOT>",
10
  "▁<PRE>",
11
  "▁<MID>",
 
4
  "<s>",
5
  "</s>",
6
  "▁<PRE>",
 
7
  "▁<MID>",
8
+ "▁<SUF>",
9
  "▁<EOT>",
10
  "▁<PRE>",
11
  "▁<MID>",
tokenizer.json CHANGED
@@ -31,7 +31,7 @@
31
  "special": true
32
  },
33
  {
34
- "id": 32007,
35
  "content": "▁<PRE>",
36
  "single_word": false,
37
  "lstrip": true,
@@ -40,8 +40,8 @@
40
  "special": true
41
  },
42
  {
43
- "id": 32008,
44
- "content": "▁<SUF>",
45
  "single_word": false,
46
  "lstrip": true,
47
  "rstrip": true,
@@ -49,8 +49,8 @@
49
  "special": true
50
  },
51
  {
52
- "id": 32009,
53
- "content": "▁<MID>",
54
  "single_word": false,
55
  "lstrip": true,
56
  "rstrip": true,
@@ -58,7 +58,7 @@
58
  "special": true
59
  },
60
  {
61
- "id": 32010,
62
  "content": "▁<EOT>",
63
  "single_word": false,
64
  "lstrip": true,
@@ -32170,23 +32170,7 @@
32170
  "왕": 31996,
32171
  "收": 31997,
32172
  "弘": 31998,
32173
- "给": 31999,
32174
- "▁<SU": 32000,
32175
- "▁<SUF": 32001,
32176
- "▁<PRE": 32002,
32177
- "▁<M": 32003,
32178
- "▁<MID": 32004,
32179
- "▁<E": 32005,
32180
- "▁<EOT": 32006,
32181
- "▁<PRE>": 32007,
32182
- "▁<SUF>": 32008,
32183
- "▁<MID>": 32009,
32184
- "▁<EOT>": 32010,
32185
- "▁<EOT><EOT>": 32011,
32186
- "▁<EOT><EOT><EOT>": 32012,
32187
- "▁<EOT><EOT><EOT><EOT>": 32013,
32188
- "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
32189
- "▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
32190
  },
32191
  "merges": [
32192
  "▁ t",
@@ -93437,18 +93421,7 @@
93437
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93438
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93439
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93440
- "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
93441
- "▁< SU",
93442
- "▁<SU F",
93443
- "▁< PRE",
93444
- "▁< M",
93445
- "▁<M ID",
93446
- "▁< E",
93447
- "▁<E OT",
93448
- "▁<PRE >",
93449
- "▁<SUF >",
93450
- "▁<MID >",
93451
- "▁<EOT >"
93452
  ]
93453
  }
93454
  }
 
31
  "special": true
32
  },
33
  {
34
+ "id": 32000,
35
  "content": "▁<PRE>",
36
  "single_word": false,
37
  "lstrip": true,
 
40
  "special": true
41
  },
42
  {
43
+ "id": 32001,
44
+ "content": "▁<MID>",
45
  "single_word": false,
46
  "lstrip": true,
47
  "rstrip": true,
 
49
  "special": true
50
  },
51
  {
52
+ "id": 32002,
53
+ "content": "▁<SUF>",
54
  "single_word": false,
55
  "lstrip": true,
56
  "rstrip": true,
 
58
  "special": true
59
  },
60
  {
61
+ "id": 32003,
62
  "content": "▁<EOT>",
63
  "single_word": false,
64
  "lstrip": true,
 
32170
  "왕": 31996,
32171
  "收": 31997,
32172
  "弘": 31998,
32173
+ "给": 31999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32174
  },
32175
  "merges": [
32176
  "▁ t",
 
93421
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93422
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93423
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93424
+ "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
 
 
 
 
 
 
 
 
 
 
 
93425
  ]
93426
  }
93427
  }
tokenizer_config.json CHANGED
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "32007": {
28
  "content": "▁<PRE>",
29
  "lstrip": true,
30
  "normalized": false,
@@ -32,23 +32,23 @@
32
  "single_word": false,
33
  "special": true
34
  },
35
- "32008": {
36
- "content": "▁<SUF>",
37
  "lstrip": true,
38
  "normalized": false,
39
  "rstrip": true,
40
  "single_word": false,
41
  "special": true
42
  },
43
- "32009": {
44
- "content": "▁<MID>",
45
  "lstrip": true,
46
  "normalized": false,
47
  "rstrip": true,
48
  "single_word": false,
49
  "special": true
50
  },
51
- "32010": {
52
  "content": "▁<EOT>",
53
  "lstrip": true,
54
  "normalized": false,
@@ -62,8 +62,8 @@
62
  "<s>",
63
  "</s>",
64
  "▁<PRE>",
65
- "▁<SUF>",
66
  "▁<MID>",
 
67
  "▁<EOT>",
68
  "▁<PRE>",
69
  "▁<MID>",
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "32000": {
28
  "content": "▁<PRE>",
29
  "lstrip": true,
30
  "normalized": false,
 
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "32001": {
36
+ "content": "▁<MID>",
37
  "lstrip": true,
38
  "normalized": false,
39
  "rstrip": true,
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "32002": {
44
+ "content": "▁<SUF>",
45
  "lstrip": true,
46
  "normalized": false,
47
  "rstrip": true,
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "32003": {
52
  "content": "▁<EOT>",
53
  "lstrip": true,
54
  "normalized": false,
 
62
  "<s>",
63
  "</s>",
64
  "▁<PRE>",
 
65
  "▁<MID>",
66
+ "▁<SUF>",
67
  "▁<EOT>",
68
  "▁<PRE>",
69
  "▁<MID>",