Updated tokenizer with special tokens for a chat template
Browse files- tokenizer.json +4 -4
tokenizer.json
CHANGED
@@ -32,7 +32,7 @@
|
|
32 |
},
|
33 |
{
|
34 |
"id": 3,
|
35 |
-
"content": "
|
36 |
"single_word": false,
|
37 |
"lstrip": false,
|
38 |
"rstrip": false,
|
@@ -41,7 +41,7 @@
|
|
41 |
},
|
42 |
{
|
43 |
"id": 4,
|
44 |
-
"content": "
|
45 |
"single_word": false,
|
46 |
"lstrip": false,
|
47 |
"rstrip": false,
|
@@ -265,8 +265,8 @@
|
|
265 |
"<unk>": 0,
|
266 |
"<s>": 1,
|
267 |
"</s>": 2,
|
268 |
-
"
|
269 |
-
"
|
270 |
"<extra_id_2>": 5,
|
271 |
"<extra_id_3>": 6,
|
272 |
"<extra_id_4>": 7,
|
|
|
32 |
},
|
33 |
{
|
34 |
"id": 3,
|
35 |
+
"content": "<|im_start|>",
|
36 |
"single_word": false,
|
37 |
"lstrip": false,
|
38 |
"rstrip": false,
|
|
|
41 |
},
|
42 |
{
|
43 |
"id": 4,
|
44 |
+
"content": "<|im_end|>",
|
45 |
"single_word": false,
|
46 |
"lstrip": false,
|
47 |
"rstrip": false,
|
|
|
265 |
"<unk>": 0,
|
266 |
"<s>": 1,
|
267 |
"</s>": 2,
|
268 |
+
"<|im_start|>": 3,
|
269 |
+
"<|im_end|>": 4,
|
270 |
"<extra_id_2>": 5,
|
271 |
"<extra_id_3>": 6,
|
272 |
"<extra_id_4>": 7,
|