fernandofinardi
commited on
Commit
•
60f834e
1
Parent(s):
503b198
Upload Model
Browse files- special_tokens_map.json +1 -1
- tokenizer.json +8 -35
- tokenizer_config.json +7 -7
special_tokens_map.json
CHANGED
@@ -4,8 +4,8 @@
|
|
4 |
"<s>",
|
5 |
"</s>",
|
6 |
"▁<PRE>",
|
7 |
-
"▁<SUF>",
|
8 |
"▁<MID>",
|
|
|
9 |
"▁<EOT>",
|
10 |
"▁<PRE>",
|
11 |
"▁<MID>",
|
|
|
4 |
"<s>",
|
5 |
"</s>",
|
6 |
"▁<PRE>",
|
|
|
7 |
"▁<MID>",
|
8 |
+
"▁<SUF>",
|
9 |
"▁<EOT>",
|
10 |
"▁<PRE>",
|
11 |
"▁<MID>",
|
tokenizer.json
CHANGED
@@ -31,7 +31,7 @@
|
|
31 |
"special": true
|
32 |
},
|
33 |
{
|
34 |
-
"id":
|
35 |
"content": "▁<PRE>",
|
36 |
"single_word": false,
|
37 |
"lstrip": true,
|
@@ -40,8 +40,8 @@
|
|
40 |
"special": true
|
41 |
},
|
42 |
{
|
43 |
-
"id":
|
44 |
-
"content": "▁<
|
45 |
"single_word": false,
|
46 |
"lstrip": true,
|
47 |
"rstrip": true,
|
@@ -49,8 +49,8 @@
|
|
49 |
"special": true
|
50 |
},
|
51 |
{
|
52 |
-
"id":
|
53 |
-
"content": "▁<
|
54 |
"single_word": false,
|
55 |
"lstrip": true,
|
56 |
"rstrip": true,
|
@@ -58,7 +58,7 @@
|
|
58 |
"special": true
|
59 |
},
|
60 |
{
|
61 |
-
"id":
|
62 |
"content": "▁<EOT>",
|
63 |
"single_word": false,
|
64 |
"lstrip": true,
|
@@ -32170,23 +32170,7 @@
|
|
32170 |
"왕": 31996,
|
32171 |
"收": 31997,
|
32172 |
"弘": 31998,
|
32173 |
-
"给": 31999
|
32174 |
-
"▁<SU": 32000,
|
32175 |
-
"▁<SUF": 32001,
|
32176 |
-
"▁<PRE": 32002,
|
32177 |
-
"▁<M": 32003,
|
32178 |
-
"▁<MID": 32004,
|
32179 |
-
"▁<E": 32005,
|
32180 |
-
"▁<EOT": 32006,
|
32181 |
-
"▁<PRE>": 32007,
|
32182 |
-
"▁<SUF>": 32008,
|
32183 |
-
"▁<MID>": 32009,
|
32184 |
-
"▁<EOT>": 32010,
|
32185 |
-
"▁<EOT><EOT>": 32011,
|
32186 |
-
"▁<EOT><EOT><EOT>": 32012,
|
32187 |
-
"▁<EOT><EOT><EOT><EOT>": 32013,
|
32188 |
-
"▁<EOT><EOT><EOT><EOT><EOT>": 32014,
|
32189 |
-
"▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
|
32190 |
},
|
32191 |
"merges": [
|
32192 |
"▁ t",
|
@@ -93437,18 +93421,7 @@
|
|
93437 |
"▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
93438 |
"▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
93439 |
"▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
93440 |
-
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
|
93441 |
-
"▁< SU",
|
93442 |
-
"▁<SU F",
|
93443 |
-
"▁< PRE",
|
93444 |
-
"▁< M",
|
93445 |
-
"▁<M ID",
|
93446 |
-
"▁< E",
|
93447 |
-
"▁<E OT",
|
93448 |
-
"▁<PRE >",
|
93449 |
-
"▁<SUF >",
|
93450 |
-
"▁<MID >",
|
93451 |
-
"▁<EOT >"
|
93452 |
]
|
93453 |
}
|
93454 |
}
|
|
|
31 |
"special": true
|
32 |
},
|
33 |
{
|
34 |
+
"id": 32000,
|
35 |
"content": "▁<PRE>",
|
36 |
"single_word": false,
|
37 |
"lstrip": true,
|
|
|
40 |
"special": true
|
41 |
},
|
42 |
{
|
43 |
+
"id": 32001,
|
44 |
+
"content": "▁<MID>",
|
45 |
"single_word": false,
|
46 |
"lstrip": true,
|
47 |
"rstrip": true,
|
|
|
49 |
"special": true
|
50 |
},
|
51 |
{
|
52 |
+
"id": 32002,
|
53 |
+
"content": "▁<SUF>",
|
54 |
"single_word": false,
|
55 |
"lstrip": true,
|
56 |
"rstrip": true,
|
|
|
58 |
"special": true
|
59 |
},
|
60 |
{
|
61 |
+
"id": 32003,
|
62 |
"content": "▁<EOT>",
|
63 |
"single_word": false,
|
64 |
"lstrip": true,
|
|
|
32170 |
"왕": 31996,
|
32171 |
"收": 31997,
|
32172 |
"弘": 31998,
|
32173 |
+
"给": 31999
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32174 |
},
|
32175 |
"merges": [
|
32176 |
"▁ t",
|
|
|
93421 |
"▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
93422 |
"▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
93423 |
"▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
93424 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93425 |
]
|
93426 |
}
|
93427 |
}
|
tokenizer_config.json
CHANGED
@@ -24,7 +24,7 @@
|
|
24 |
"single_word": false,
|
25 |
"special": true
|
26 |
},
|
27 |
-
"
|
28 |
"content": "▁<PRE>",
|
29 |
"lstrip": true,
|
30 |
"normalized": false,
|
@@ -32,23 +32,23 @@
|
|
32 |
"single_word": false,
|
33 |
"special": true
|
34 |
},
|
35 |
-
"
|
36 |
-
"content": "▁<
|
37 |
"lstrip": true,
|
38 |
"normalized": false,
|
39 |
"rstrip": true,
|
40 |
"single_word": false,
|
41 |
"special": true
|
42 |
},
|
43 |
-
"
|
44 |
-
"content": "▁<
|
45 |
"lstrip": true,
|
46 |
"normalized": false,
|
47 |
"rstrip": true,
|
48 |
"single_word": false,
|
49 |
"special": true
|
50 |
},
|
51 |
-
"
|
52 |
"content": "▁<EOT>",
|
53 |
"lstrip": true,
|
54 |
"normalized": false,
|
@@ -62,8 +62,8 @@
|
|
62 |
"<s>",
|
63 |
"</s>",
|
64 |
"▁<PRE>",
|
65 |
-
"▁<SUF>",
|
66 |
"▁<MID>",
|
|
|
67 |
"▁<EOT>",
|
68 |
"▁<PRE>",
|
69 |
"▁<MID>",
|
|
|
24 |
"single_word": false,
|
25 |
"special": true
|
26 |
},
|
27 |
+
"32000": {
|
28 |
"content": "▁<PRE>",
|
29 |
"lstrip": true,
|
30 |
"normalized": false,
|
|
|
32 |
"single_word": false,
|
33 |
"special": true
|
34 |
},
|
35 |
+
"32001": {
|
36 |
+
"content": "▁<MID>",
|
37 |
"lstrip": true,
|
38 |
"normalized": false,
|
39 |
"rstrip": true,
|
40 |
"single_word": false,
|
41 |
"special": true
|
42 |
},
|
43 |
+
"32002": {
|
44 |
+
"content": "▁<SUF>",
|
45 |
"lstrip": true,
|
46 |
"normalized": false,
|
47 |
"rstrip": true,
|
48 |
"single_word": false,
|
49 |
"special": true
|
50 |
},
|
51 |
+
"32003": {
|
52 |
"content": "▁<EOT>",
|
53 |
"lstrip": true,
|
54 |
"normalized": false,
|
|
|
62 |
"<s>",
|
63 |
"</s>",
|
64 |
"▁<PRE>",
|
|
|
65 |
"▁<MID>",
|
66 |
+
"▁<SUF>",
|
67 |
"▁<EOT>",
|
68 |
"▁<PRE>",
|
69 |
"▁<MID>",
|