fix tokenizer misused
Browse files- tokenizer.json +8 -1
tokenizer.json
CHANGED
@@ -201,6 +201,13 @@
|
|
201 |
"Regex": "[\u0001️⃣]"
|
202 |
},
|
203 |
"content": " "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
}
|
205 |
]
|
206 |
},
|
@@ -96069,4 +96076,4 @@
|
|
96069 |
"▁ent ire"
|
96070 |
]
|
96071 |
}
|
96072 |
-
}
|
|
|
201 |
"Regex": "[\u0001️⃣]"
|
202 |
},
|
203 |
"content": " "
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"type": "Replace",
|
207 |
+
"pattern": {
|
208 |
+
"Regex": " *<mask> *"
|
209 |
+
},
|
210 |
+
"content": "<mask> "
|
211 |
}
|
212 |
]
|
213 |
},
|
|
|
96076 |
"▁ent ire"
|
96077 |
]
|
96078 |
}
|
96079 |
+
}
|