mohalisad commited on
Commit
ccae481
·
1 Parent(s): d7d3b6f

fix tokenizer misused

Browse files
Files changed (1) hide show
  1. tokenizer.json +8 -1
tokenizer.json CHANGED
@@ -201,6 +201,13 @@
201
  "Regex": "[\u0001‪‫‬‭‎‏‮†“”•–—ž„ˆ‰˜‹Œ️⃣]"
202
  },
203
  "content": " "
 
 
 
 
 
 
 
204
  }
205
  ]
206
  },
@@ -96069,4 +96076,4 @@
96069
  "▁ent ire"
96070
  ]
96071
  }
96072
- }
 
201
  "Regex": "[\u0001‪‫‬‭‎‏‮†“”•–—ž„ˆ‰˜‹Œ️⃣]"
202
  },
203
  "content": " "
204
+ },
205
+ {
206
+ "type": "Replace",
207
+ "pattern": {
208
+ "Regex": " *<mask> *"
209
+ },
210
+ "content": "<mask> "
211
  }
212
  ]
213
  },
 
96076
  "▁ent ire"
96077
  ]
96078
  }
96079
+ }