Transformers
martijn75 commited on
Commit
062c2ea
·
verified ·
1 Parent(s): 99620c6

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +473 -59
  2. tokenizer_config.json +427 -59
tokenizer.json CHANGED
@@ -48,9 +48,423 @@
48
  "normalized": false,
49
  "special": true
50
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  {
52
  "id": 2000,
53
- "content": "תוֹ",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
@@ -59,7 +473,7 @@
59
  },
60
  {
61
  "id": 2001,
62
- "content": " ֵי",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,7 +482,7 @@
68
  },
69
  {
70
  "id": 2002,
71
- "content": "הוֹ",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +491,7 @@
77
  },
78
  {
79
  "id": 2003,
80
- "content": " ֵן",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +500,7 @@
86
  },
87
  {
88
  "id": 2004,
89
- "content": "יּוֹ",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +509,7 @@
95
  },
96
  {
97
  "id": 2005,
98
- "content": "תֶ",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +518,7 @@
104
  },
105
  {
106
  "id": 2006,
107
- "content": "תֶּן",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +527,7 @@
113
  },
114
  {
115
  "id": 2007,
116
- "content": "הֵנָּה",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +536,7 @@
122
  },
123
  {
124
  "id": 2008,
125
- "content": " ��ת",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +545,7 @@
131
  },
132
  {
133
  "id": 2009,
134
- "content": "תֵּ",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -140,7 +554,7 @@
140
  },
141
  {
142
  "id": 2010,
143
- "content": " ִי",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
@@ -149,7 +563,7 @@
149
  },
150
  {
151
  "id": 2011,
152
- "content": "יֹּ",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
@@ -158,7 +572,7 @@
158
  },
159
  {
160
  "id": 2012,
161
- "content": "נִּתְ",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
@@ -167,7 +581,7 @@
167
  },
168
  {
169
  "id": 2013,
170
- "content": "תִתְ",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
@@ -176,7 +590,7 @@
176
  },
177
  {
178
  "id": 2014,
179
- "content": " ַי",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
@@ -185,7 +599,7 @@
185
  },
186
  {
187
  "id": 2015,
188
- "content": "נּוֹ",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
@@ -194,7 +608,7 @@
194
  },
195
  {
196
  "id": 2016,
197
- "content": "תִּ",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
@@ -203,7 +617,7 @@
203
  },
204
  {
205
  "id": 2017,
206
- "content": "יֶּ",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
@@ -212,7 +626,7 @@
212
  },
213
  {
214
  "id": 2018,
215
- "content": "תֵ",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
@@ -221,7 +635,7 @@
221
  },
222
  {
223
  "id": 2019,
224
- "content": "וֹ",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
@@ -230,7 +644,7 @@
230
  },
231
  {
232
  "id": 2020,
233
- "content": "נִתְ",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
@@ -239,7 +653,7 @@
239
  },
240
  {
241
  "id": 2021,
242
- "content": "נִּ",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
@@ -248,7 +662,7 @@
248
  },
249
  {
250
  "id": 2022,
251
- "content": "תֶן",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
@@ -257,7 +671,7 @@
257
  },
258
  {
259
  "id": 2023,
260
- "content": "תֶּ",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
@@ -266,7 +680,7 @@
266
  },
267
  {
268
  "id": 2024,
269
- "content": "אוֹ",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
@@ -275,7 +689,7 @@
275
  },
276
  {
277
  "id": 2025,
278
- "content": "תֹּ",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
@@ -284,7 +698,7 @@
284
  },
285
  {
286
  "id": 2026,
287
- "content": "יִּתְ",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
@@ -293,7 +707,7 @@
293
  },
294
  {
295
  "id": 2027,
296
- "content": "תִּי",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
@@ -302,7 +716,7 @@
302
  },
303
  {
304
  "id": 2028,
305
- "content": "כֶן",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
@@ -311,7 +725,7 @@
311
  },
312
  {
313
  "id": 2029,
314
- "content": "תֶם",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
@@ -320,7 +734,7 @@
320
  },
321
  {
322
  "id": 2030,
323
- "content": "תָּ",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
@@ -329,7 +743,7 @@
329
  },
330
  {
331
  "id": 2031,
332
- "content": " ָן",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
@@ -338,7 +752,7 @@
338
  },
339
  {
340
  "id": 2032,
341
- "content": "תְּ",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
@@ -347,7 +761,7 @@
347
  },
348
  {
349
  "id": 2033,
350
- "content": "מוֹ",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
@@ -356,7 +770,7 @@
356
  },
357
  {
358
  "id": 2034,
359
- "content": "נָּה",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
@@ -365,7 +779,7 @@
365
  },
366
  {
367
  "id": 2035,
368
- "content": "אֶתְ",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
@@ -374,7 +788,7 @@
374
  },
375
  {
376
  "id": 2036,
377
- "content": "נָּ",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
@@ -383,7 +797,7 @@
383
  },
384
  {
385
  "id": 2037,
386
- "content": "יַּ",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
@@ -392,7 +806,7 @@
392
  },
393
  {
394
  "id": 2038,
395
- "content": "נוֹ",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
@@ -401,7 +815,7 @@
401
  },
402
  {
403
  "id": 2039,
404
- "content": " ֵם",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
@@ -410,7 +824,7 @@
410
  },
411
  {
412
  "id": 2040,
413
- "content": "יִּ",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
@@ -419,7 +833,7 @@
419
  },
420
  {
421
  "id": 2041,
422
- "content": "נֵ",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
@@ -428,7 +842,7 @@
428
  },
429
  {
430
  "id": 2042,
431
- "content": "נֵּ",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
@@ -437,7 +851,7 @@
437
  },
438
  {
439
  "id": 2043,
440
- "content": " ָה",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
@@ -446,7 +860,7 @@
446
  },
447
  {
448
  "id": 2044,
449
- "content": "נֶּ",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
@@ -455,7 +869,7 @@
455
  },
456
  {
457
  "id": 2045,
458
- "content": " ִים",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
@@ -464,7 +878,7 @@
464
  },
465
  {
466
  "id": 2046,
467
- "content": "תִּתְ",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
@@ -482,7 +896,7 @@
482
  },
483
  {
484
  "id": 2048,
485
- "content": "יָּ",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
@@ -491,7 +905,7 @@
491
  },
492
  {
493
  "id": 2049,
494
- "content": "תֶּם",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
@@ -500,7 +914,7 @@
500
  },
501
  {
502
  "id": 2050,
503
- "content": " ָם",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
@@ -509,7 +923,7 @@
509
  },
510
  {
511
  "id": 2051,
512
- "content": "נַּ",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
@@ -518,7 +932,7 @@
518
  },
519
  {
520
  "id": 2052,
521
- "content": "תַּ",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
@@ -527,7 +941,7 @@
527
  },
528
  {
529
  "id": 2053,
530
- "content": "יוֹ",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
@@ -536,7 +950,7 @@
536
  },
537
  {
538
  "id": 2054,
539
- "content": " ָהּ",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
@@ -545,7 +959,7 @@
545
  },
546
  {
547
  "id": 2055,
548
- "content": "יֶ",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
@@ -554,7 +968,7 @@
554
  },
555
  {
556
  "id": 2056,
557
- "content": "תֹ",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
@@ -563,7 +977,7 @@
563
  },
564
  {
565
  "id": 2057,
566
- "content": "תּוֹ",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
@@ -572,7 +986,7 @@
572
  },
573
  {
574
  "id": 2058,
575
- "content": "נֹּ",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
@@ -581,7 +995,7 @@
581
  },
582
  {
583
  "id": 2059,
584
- "content": "וֹת",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
 
48
  "normalized": false,
49
  "special": true
50
  },
51
+ {
52
+ "id": 26,
53
+ "content": "ו",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": true,
58
+ "special": false
59
+ },
60
+ {
61
+ "id": 34,
62
+ "content": "ם",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": true,
67
+ "special": false
68
+ },
69
+ {
70
+ "id": 50,
71
+ "content": "וּ",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": true,
76
+ "special": false
77
+ },
78
+ {
79
+ "id": 59,
80
+ "content": "אֶ",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": true,
85
+ "special": false
86
+ },
87
+ {
88
+ "id": 69,
89
+ "content": "אֲ",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": true,
94
+ "special": false
95
+ },
96
+ {
97
+ "id": 75,
98
+ "content": "יְ",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": true,
103
+ "special": false
104
+ },
105
+ {
106
+ "id": 80,
107
+ "content": "יִ",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": true,
112
+ "special": false
113
+ },
114
+ {
115
+ "id": 85,
116
+ "content": "ךְ",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": true,
121
+ "special": false
122
+ },
123
+ {
124
+ "id": 93,
125
+ "content": "ךָ",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
+ "normalized": true,
130
+ "special": false
131
+ },
132
+ {
133
+ "id": 94,
134
+ "content": "אַ",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": true,
139
+ "special": false
140
+ },
141
+ {
142
+ "id": 99,
143
+ "content": "מְ",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": true,
148
+ "special": false
149
+ },
150
+ {
151
+ "id": 101,
152
+ "content": "אָ",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": true,
157
+ "special": false
158
+ },
159
+ {
160
+ "id": 104,
161
+ "content": "מַ",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": true,
166
+ "special": false
167
+ },
168
+ {
169
+ "id": 119,
170
+ "content": "נִי",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": true,
175
+ "special": false
176
+ },
177
+ {
178
+ "id": 127,
179
+ "content": "נְ",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": true,
184
+ "special": false
185
+ },
186
+ {
187
+ "id": 134,
188
+ "content": "הִ",
189
+ "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
+ "normalized": true,
193
+ "special": false
194
+ },
195
+ {
196
+ "id": 136,
197
+ "content": "נוּ",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": true,
202
+ "special": false
203
+ },
204
+ {
205
+ "id": 148,
206
+ "content": "כֶם",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": true,
211
+ "special": false
212
+ },
213
+ {
214
+ "id": 178,
215
+ "content": "הָ",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": true,
220
+ "special": false
221
+ },
222
+ {
223
+ "id": 181,
224
+ "content": "הוּ",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": true,
229
+ "special": false
230
+ },
231
+ {
232
+ "id": 190,
233
+ "content": "הֶם",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": true,
238
+ "special": false
239
+ },
240
+ {
241
+ "id": 196,
242
+ "content": "נִ",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": true,
247
+ "special": false
248
+ },
249
+ {
250
+ "id": 211,
251
+ "content": "הֲ",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": true,
256
+ "special": false
257
+ },
258
+ {
259
+ "id": 245,
260
+ "content": "תְ",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": true,
265
+ "special": false
266
+ },
267
+ {
268
+ "id": 253,
269
+ "content": "נָה",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": true,
274
+ "special": false
275
+ },
276
+ {
277
+ "id": 254,
278
+ "content": "יָ",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": true,
283
+ "special": false
284
+ },
285
+ {
286
+ "id": 260,
287
+ "content": "נָ",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": true,
292
+ "special": false
293
+ },
294
+ {
295
+ "id": 264,
296
+ "content": "נַ",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": true,
301
+ "special": false
302
+ },
303
+ {
304
+ "id": 274,
305
+ "content": "אִ",
306
+ "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
+ "normalized": true,
310
+ "special": false
311
+ },
312
+ {
313
+ "id": 385,
314
+ "content": "תִ",
315
+ "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
+ "normalized": true,
319
+ "special": false
320
+ },
321
+ {
322
+ "id": 390,
323
+ "content": "נֹ",
324
+ "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
+ "normalized": true,
328
+ "special": false
329
+ },
330
+ {
331
+ "id": 392,
332
+ "content": "הֵ",
333
+ "single_word": false,
334
+ "lstrip": false,
335
+ "rstrip": false,
336
+ "normalized": true,
337
+ "special": false
338
+ },
339
+ {
340
+ "id": 430,
341
+ "content": "נֶ",
342
+ "single_word": false,
343
+ "lstrip": false,
344
+ "rstrip": false,
345
+ "normalized": true,
346
+ "special": false
347
+ },
348
+ {
349
+ "id": 459,
350
+ "content": "יַ",
351
+ "single_word": false,
352
+ "lstrip": false,
353
+ "rstrip": false,
354
+ "normalized": true,
355
+ "special": false
356
+ },
357
+ {
358
+ "id": 462,
359
+ "content": "תִי",
360
+ "single_word": false,
361
+ "lstrip": false,
362
+ "rstrip": false,
363
+ "normalized": true,
364
+ "special": false
365
+ },
366
+ {
367
+ "id": 531,
368
+ "content": "הֶ",
369
+ "single_word": false,
370
+ "lstrip": false,
371
+ "rstrip": false,
372
+ "normalized": true,
373
+ "special": false
374
+ },
375
+ {
376
+ "id": 569,
377
+ "content": "אֹ",
378
+ "single_word": false,
379
+ "lstrip": false,
380
+ "rstrip": false,
381
+ "normalized": true,
382
+ "special": false
383
+ },
384
+ {
385
+ "id": 664,
386
+ "content": "תַ",
387
+ "single_word": false,
388
+ "lstrip": false,
389
+ "rstrip": false,
390
+ "normalized": true,
391
+ "special": false
392
+ },
393
+ {
394
+ "id": 681,
395
+ "content": "יֵ",
396
+ "single_word": false,
397
+ "lstrip": false,
398
+ "rstrip": false,
399
+ "normalized": true,
400
+ "special": false
401
+ },
402
+ {
403
+ "id": 701,
404
+ "content": "תָ",
405
+ "single_word": false,
406
+ "lstrip": false,
407
+ "rstrip": false,
408
+ "normalized": true,
409
+ "special": false
410
+ },
411
+ {
412
+ "id": 732,
413
+ "content": "יֹ",
414
+ "single_word": false,
415
+ "lstrip": false,
416
+ "rstrip": false,
417
+ "normalized": true,
418
+ "special": false
419
+ },
420
+ {
421
+ "id": 1038,
422
+ "content": "הִתְ",
423
+ "single_word": false,
424
+ "lstrip": false,
425
+ "rstrip": false,
426
+ "normalized": true,
427
+ "special": false
428
+ },
429
+ {
430
+ "id": 1236,
431
+ "content": "הֶן",
432
+ "single_word": false,
433
+ "lstrip": false,
434
+ "rstrip": false,
435
+ "normalized": true,
436
+ "special": false
437
+ },
438
+ {
439
+ "id": 1245,
440
+ "content": "יִתְ",
441
+ "single_word": false,
442
+ "lstrip": false,
443
+ "rstrip": false,
444
+ "normalized": true,
445
+ "special": false
446
+ },
447
+ {
448
+ "id": 1349,
449
+ "content": "אֵ",
450
+ "single_word": false,
451
+ "lstrip": false,
452
+ "rstrip": false,
453
+ "normalized": true,
454
+ "special": false
455
+ },
456
+ {
457
+ "id": 1795,
458
+ "content": "נּוּ",
459
+ "single_word": false,
460
+ "lstrip": false,
461
+ "rstrip": false,
462
+ "normalized": true,
463
+ "special": false
464
+ },
465
  {
466
  "id": 2000,
467
+ "content": "מוֹ",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
 
473
  },
474
  {
475
  "id": 2001,
476
+ "content": "הוֹ",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
 
482
  },
483
  {
484
  "id": 2002,
485
+ "content": " ַת",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
 
491
  },
492
  {
493
  "id": 2003,
494
+ "content": " ֵי",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
 
500
  },
501
  {
502
  "id": 2004,
503
+ "content": " ָה",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
 
509
  },
510
  {
511
  "id": 2005,
512
+ "content": "וֹת",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
 
518
  },
519
  {
520
  "id": 2006,
521
+ "content": " ִים",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
 
527
  },
528
  {
529
  "id": 2007,
530
+ "content": " ִי",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
 
536
  },
537
  {
538
  "id": 2008,
539
+ "content": " ַי",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
 
545
  },
546
  {
547
  "id": 2009,
548
+ "content": "וֹ",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
 
554
  },
555
  {
556
  "id": 2010,
557
+ "content": " ָהּ",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
 
563
  },
564
  {
565
  "id": 2011,
566
+ "content": "נָּה",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
 
572
  },
573
  {
574
  "id": 2012,
575
+ "content": "כֶן",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
 
581
  },
582
  {
583
  "id": 2013,
584
+ "content": " ָם",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
 
590
  },
591
  {
592
  "id": 2014,
593
+ "content": " ֵם",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
 
599
  },
600
  {
601
  "id": 2015,
602
+ "content": "הֵנָּה",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
 
608
  },
609
  {
610
  "id": 2016,
611
+ "content": " ֵן",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
 
617
  },
618
  {
619
  "id": 2017,
620
+ "content": " ָן",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
 
626
  },
627
  {
628
  "id": 2018,
629
+ "content": "נִתְ",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
 
635
  },
636
  {
637
  "id": 2019,
638
+ "content": "נִּתְ",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
 
644
  },
645
  {
646
  "id": 2020,
647
+ "content": "יִּתְ",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
 
653
  },
654
  {
655
  "id": 2021,
656
+ "content": "אֶתְ",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
 
662
  },
663
  {
664
  "id": 2022,
665
+ "content": "תִּתְ",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
 
671
  },
672
  {
673
  "id": 2023,
674
+ "content": "תִתְ",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
 
680
  },
681
  {
682
  "id": 2024,
683
+ "content": "תּוֹ",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
 
689
  },
690
  {
691
  "id": 2025,
692
+ "content": "תוֹ",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
 
698
  },
699
  {
700
  "id": 2026,
701
+ "content": "אוֹ",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
 
707
  },
708
  {
709
  "id": 2027,
710
+ "content": "נוֹ",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
 
716
  },
717
  {
718
  "id": 2028,
719
+ "content": "נּוֹ",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
 
725
  },
726
  {
727
  "id": 2029,
728
+ "content": "יוֹ",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
 
734
  },
735
  {
736
  "id": 2030,
737
+ "content": "יּוֹ",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
 
743
  },
744
  {
745
  "id": 2031,
746
+ "content": "תֵּ",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
 
752
  },
753
  {
754
  "id": 2032,
755
+ "content": "תַּ",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
 
761
  },
762
  {
763
  "id": 2033,
764
+ "content": "תִּ",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
 
770
  },
771
  {
772
  "id": 2034,
773
+ "content": "תָּ",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
 
779
  },
780
  {
781
  "id": 2035,
782
+ "content": "תְּ",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
 
788
  },
789
  {
790
  "id": 2036,
791
+ "content": "תֹּ",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
 
797
  },
798
  {
799
  "id": 2037,
800
+ "content": "תֶּ",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
 
806
  },
807
  {
808
  "id": 2038,
809
+ "content": "תֵ",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
 
815
  },
816
  {
817
  "id": 2039,
818
+ "content": "תֹ",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
 
824
  },
825
  {
826
  "id": 2040,
827
+ "content": "תֶ",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
 
833
  },
834
  {
835
  "id": 2041,
836
+ "content": "יֶ",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
 
842
  },
843
  {
844
  "id": 2042,
845
+ "content": "יָּ",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
 
851
  },
852
  {
853
  "id": 2043,
854
+ "content": "יִּ",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
 
860
  },
861
  {
862
  "id": 2044,
863
+ "content": "יֶּ",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
 
869
  },
870
  {
871
  "id": 2045,
872
+ "content": "יֹּ",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
 
878
  },
879
  {
880
  "id": 2046,
881
+ "content": "יַּ",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
 
896
  },
897
  {
898
  "id": 2048,
899
+ "content": "נֵ",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
 
905
  },
906
  {
907
  "id": 2049,
908
+ "content": "נֹּ",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
 
914
  },
915
  {
916
  "id": 2050,
917
+ "content": "נָּ",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
 
923
  },
924
  {
925
  "id": 2051,
926
+ "content": "נֵּ",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
 
932
  },
933
  {
934
  "id": 2052,
935
+ "content": "נִּ",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
 
941
  },
942
  {
943
  "id": 2053,
944
+ "content": "נֶּ",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
 
950
  },
951
  {
952
  "id": 2054,
953
+ "content": "נַּ",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
 
959
  },
960
  {
961
  "id": 2055,
962
+ "content": "תִּי",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
 
968
  },
969
  {
970
  "id": 2056,
971
+ "content": "תֶּם",
972
  "single_word": false,
973
  "lstrip": false,
974
  "rstrip": false,
 
977
  },
978
  {
979
  "id": 2057,
980
+ "content": "תֶּן",
981
  "single_word": false,
982
  "lstrip": false,
983
  "rstrip": false,
 
986
  },
987
  {
988
  "id": 2058,
989
+ "content": "תֶם",
990
  "single_word": false,
991
  "lstrip": false,
992
  "rstrip": false,
 
995
  },
996
  {
997
  "id": 2059,
998
+ "content": "תֶן",
999
  "single_word": false,
1000
  "lstrip": false,
1001
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -40,8 +40,376 @@
40
  "single_word": false,
41
  "special": true
42
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  "2000": {
44
- "content": "תוֹ",
45
  "lstrip": false,
46
  "normalized": true,
47
  "rstrip": false,
@@ -49,7 +417,7 @@
49
  "special": false
50
  },
51
  "2001": {
52
- "content": " ֵי",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
@@ -57,7 +425,7 @@
57
  "special": false
58
  },
59
  "2002": {
60
- "content": "הוֹ",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
@@ -65,7 +433,7 @@
65
  "special": false
66
  },
67
  "2003": {
68
- "content": " ֵן",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
@@ -73,7 +441,7 @@
73
  "special": false
74
  },
75
  "2004": {
76
- "content": "יּוֹ",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
@@ -81,7 +449,7 @@
81
  "special": false
82
  },
83
  "2005": {
84
- "content": "תֶ",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
@@ -89,7 +457,7 @@
89
  "special": false
90
  },
91
  "2006": {
92
- "content": "תֶּן",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
@@ -97,7 +465,7 @@
97
  "special": false
98
  },
99
  "2007": {
100
- "content": "הֵנָּה",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,7 +473,7 @@
105
  "special": false
106
  },
107
  "2008": {
108
- "content": " ַת",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
@@ -113,7 +481,7 @@
113
  "special": false
114
  },
115
  "2009": {
116
- "content": "תֵּ",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
@@ -121,7 +489,7 @@
121
  "special": false
122
  },
123
  "2010": {
124
- "content": " ִי",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
@@ -129,7 +497,7 @@
129
  "special": false
130
  },
131
  "2011": {
132
- "content": "יֹּ",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
@@ -137,7 +505,7 @@
137
  "special": false
138
  },
139
  "2012": {
140
- "content": "נִּתְ",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
@@ -145,7 +513,7 @@
145
  "special": false
146
  },
147
  "2013": {
148
- "content": "תִתְ",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
@@ -153,7 +521,7 @@
153
  "special": false
154
  },
155
  "2014": {
156
- "content": " ַי",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
@@ -161,7 +529,7 @@
161
  "special": false
162
  },
163
  "2015": {
164
- "content": "נּוֹ",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
@@ -169,7 +537,7 @@
169
  "special": false
170
  },
171
  "2016": {
172
- "content": "תִּ",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
@@ -177,7 +545,7 @@
177
  "special": false
178
  },
179
  "2017": {
180
- "content": "יֶּ",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
@@ -185,7 +553,7 @@
185
  "special": false
186
  },
187
  "2018": {
188
- "content": "תֵ",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
@@ -193,7 +561,7 @@
193
  "special": false
194
  },
195
  "2019": {
196
- "content": "וֹ",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
@@ -201,7 +569,7 @@
201
  "special": false
202
  },
203
  "2020": {
204
- "content": "נִתְ",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
@@ -209,7 +577,7 @@
209
  "special": false
210
  },
211
  "2021": {
212
- "content": "נִּ",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
@@ -217,7 +585,7 @@
217
  "special": false
218
  },
219
  "2022": {
220
- "content": "תֶן",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
@@ -225,7 +593,7 @@
225
  "special": false
226
  },
227
  "2023": {
228
- "content": "תֶּ",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
@@ -233,7 +601,7 @@
233
  "special": false
234
  },
235
  "2024": {
236
- "content": "אוֹ",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
@@ -241,7 +609,7 @@
241
  "special": false
242
  },
243
  "2025": {
244
- "content": "תֹּ",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
@@ -249,7 +617,7 @@
249
  "special": false
250
  },
251
  "2026": {
252
- "content": "יִּתְ",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
@@ -257,7 +625,7 @@
257
  "special": false
258
  },
259
  "2027": {
260
- "content": "תִּי",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
@@ -265,7 +633,7 @@
265
  "special": false
266
  },
267
  "2028": {
268
- "content": "כֶן",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
@@ -273,7 +641,7 @@
273
  "special": false
274
  },
275
  "2029": {
276
- "content": "תֶם",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
@@ -281,7 +649,7 @@
281
  "special": false
282
  },
283
  "2030": {
284
- "content": "תָּ",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
@@ -289,7 +657,7 @@
289
  "special": false
290
  },
291
  "2031": {
292
- "content": " ָן",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
@@ -297,7 +665,7 @@
297
  "special": false
298
  },
299
  "2032": {
300
- "content": "תְּ",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
@@ -305,7 +673,7 @@
305
  "special": false
306
  },
307
  "2033": {
308
- "content": "מוֹ",
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
@@ -313,7 +681,7 @@
313
  "special": false
314
  },
315
  "2034": {
316
- "content": "נָּה",
317
  "lstrip": false,
318
  "normalized": true,
319
  "rstrip": false,
@@ -321,7 +689,7 @@
321
  "special": false
322
  },
323
  "2035": {
324
- "content": "אֶתְ",
325
  "lstrip": false,
326
  "normalized": true,
327
  "rstrip": false,
@@ -329,7 +697,7 @@
329
  "special": false
330
  },
331
  "2036": {
332
- "content": "נָּ",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,
@@ -337,7 +705,7 @@
337
  "special": false
338
  },
339
  "2037": {
340
- "content": "יַּ",
341
  "lstrip": false,
342
  "normalized": true,
343
  "rstrip": false,
@@ -345,7 +713,7 @@
345
  "special": false
346
  },
347
  "2038": {
348
- "content": "נוֹ",
349
  "lstrip": false,
350
  "normalized": true,
351
  "rstrip": false,
@@ -353,7 +721,7 @@
353
  "special": false
354
  },
355
  "2039": {
356
- "content": " ֵם",
357
  "lstrip": false,
358
  "normalized": true,
359
  "rstrip": false,
@@ -361,7 +729,7 @@
361
  "special": false
362
  },
363
  "2040": {
364
- "content": "יִּ",
365
  "lstrip": false,
366
  "normalized": true,
367
  "rstrip": false,
@@ -369,7 +737,7 @@
369
  "special": false
370
  },
371
  "2041": {
372
- "content": "נֵ",
373
  "lstrip": false,
374
  "normalized": true,
375
  "rstrip": false,
@@ -377,7 +745,7 @@
377
  "special": false
378
  },
379
  "2042": {
380
- "content": "נֵּ",
381
  "lstrip": false,
382
  "normalized": true,
383
  "rstrip": false,
@@ -385,7 +753,7 @@
385
  "special": false
386
  },
387
  "2043": {
388
- "content": " ָה",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
@@ -393,7 +761,7 @@
393
  "special": false
394
  },
395
  "2044": {
396
- "content": "נֶּ",
397
  "lstrip": false,
398
  "normalized": true,
399
  "rstrip": false,
@@ -401,7 +769,7 @@
401
  "special": false
402
  },
403
  "2045": {
404
- "content": " ִים",
405
  "lstrip": false,
406
  "normalized": true,
407
  "rstrip": false,
@@ -409,7 +777,7 @@
409
  "special": false
410
  },
411
  "2046": {
412
- "content": "תִּתְ",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
@@ -425,7 +793,7 @@
425
  "special": false
426
  },
427
  "2048": {
428
- "content": "יָּ",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
@@ -433,7 +801,7 @@
433
  "special": false
434
  },
435
  "2049": {
436
- "content": "תֶּם",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
@@ -441,7 +809,7 @@
441
  "special": false
442
  },
443
  "2050": {
444
- "content": " ָם",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
@@ -449,7 +817,7 @@
449
  "special": false
450
  },
451
  "2051": {
452
- "content": "נַּ",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
@@ -457,7 +825,7 @@
457
  "special": false
458
  },
459
  "2052": {
460
- "content": "תַּ",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
@@ -465,7 +833,7 @@
465
  "special": false
466
  },
467
  "2053": {
468
- "content": "יוֹ",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
@@ -473,7 +841,7 @@
473
  "special": false
474
  },
475
  "2054": {
476
- "content": " ָהּ",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
@@ -481,7 +849,7 @@
481
  "special": false
482
  },
483
  "2055": {
484
- "content": "יֶ",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
@@ -489,7 +857,7 @@
489
  "special": false
490
  },
491
  "2056": {
492
- "content": "תֹ",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
@@ -497,7 +865,7 @@
497
  "special": false
498
  },
499
  "2057": {
500
- "content": "תּוֹ",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
@@ -505,7 +873,7 @@
505
  "special": false
506
  },
507
  "2058": {
508
- "content": "נֹּ",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
@@ -513,7 +881,7 @@
513
  "special": false
514
  },
515
  "2059": {
516
- "content": "וֹת",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "26": {
44
+ "content": "ו",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "34": {
52
+ "content": "ם",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50": {
60
+ "content": "וּ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "59": {
68
+ "content": "אֶ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "69": {
76
+ "content": "אֲ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "75": {
84
+ "content": "יְ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "80": {
92
+ "content": "יִ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "85": {
100
+ "content": "ךְ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "93": {
108
+ "content": "ךָ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "94": {
116
+ "content": "אַ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "99": {
124
+ "content": "מְ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "101": {
132
+ "content": "אָ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "104": {
140
+ "content": "מַ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "119": {
148
+ "content": "נִי",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "127": {
156
+ "content": "נְ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "134": {
164
+ "content": "הִ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "136": {
172
+ "content": "נוּ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "148": {
180
+ "content": "כֶם",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "178": {
188
+ "content": "הָ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "181": {
196
+ "content": "הוּ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "190": {
204
+ "content": "הֶם",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "196": {
212
+ "content": "נִ",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "211": {
220
+ "content": "הֲ",
221
+ "lstrip": false,
222
+ "normalized": true,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": false
226
+ },
227
+ "245": {
228
+ "content": "תְ",
229
+ "lstrip": false,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": false
234
+ },
235
+ "253": {
236
+ "content": "נָה",
237
+ "lstrip": false,
238
+ "normalized": true,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": false
242
+ },
243
+ "254": {
244
+ "content": "יָ",
245
+ "lstrip": false,
246
+ "normalized": true,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": false
250
+ },
251
+ "260": {
252
+ "content": "נָ",
253
+ "lstrip": false,
254
+ "normalized": true,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": false
258
+ },
259
+ "264": {
260
+ "content": "נַ",
261
+ "lstrip": false,
262
+ "normalized": true,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": false
266
+ },
267
+ "274": {
268
+ "content": "אִ",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "385": {
276
+ "content": "תִ",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "390": {
284
+ "content": "נֹ",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "392": {
292
+ "content": "הֵ",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "430": {
300
+ "content": "נֶ",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "459": {
308
+ "content": "יַ",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "462": {
316
+ "content": "תִי",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "531": {
324
+ "content": "הֶ",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "569": {
332
+ "content": "אֹ",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "664": {
340
+ "content": "תַ",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "681": {
348
+ "content": "יֵ",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "701": {
356
+ "content": "תָ",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "732": {
364
+ "content": "יֹ",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "1038": {
372
+ "content": "הִתְ",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "1236": {
380
+ "content": "הֶן",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "1245": {
388
+ "content": "יִתְ",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "1349": {
396
+ "content": "אֵ",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "1795": {
404
+ "content": "נּוּ",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
  "2000": {
412
+ "content": "מוֹ",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
 
417
  "special": false
418
  },
419
  "2001": {
420
+ "content": "הוֹ",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
 
425
  "special": false
426
  },
427
  "2002": {
428
+ "content": " ַת",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
 
433
  "special": false
434
  },
435
  "2003": {
436
+ "content": " ֵי",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
 
441
  "special": false
442
  },
443
  "2004": {
444
+ "content": " ָה",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
 
449
  "special": false
450
  },
451
  "2005": {
452
+ "content": "וֹת",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
 
457
  "special": false
458
  },
459
  "2006": {
460
+ "content": " ִים",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
 
465
  "special": false
466
  },
467
  "2007": {
468
+ "content": " ִי",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
 
473
  "special": false
474
  },
475
  "2008": {
476
+ "content": " ַי",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
 
481
  "special": false
482
  },
483
  "2009": {
484
+ "content": "וֹ",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
 
489
  "special": false
490
  },
491
  "2010": {
492
+ "content": " ָהּ",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
 
497
  "special": false
498
  },
499
  "2011": {
500
+ "content": "נָּה",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
 
505
  "special": false
506
  },
507
  "2012": {
508
+ "content": "כֶן",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
 
513
  "special": false
514
  },
515
  "2013": {
516
+ "content": " ָם",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
 
521
  "special": false
522
  },
523
  "2014": {
524
+ "content": " ֵם",
525
  "lstrip": false,
526
  "normalized": true,
527
  "rstrip": false,
 
529
  "special": false
530
  },
531
  "2015": {
532
+ "content": "הֵנָּה",
533
  "lstrip": false,
534
  "normalized": true,
535
  "rstrip": false,
 
537
  "special": false
538
  },
539
  "2016": {
540
+ "content": " ֵן",
541
  "lstrip": false,
542
  "normalized": true,
543
  "rstrip": false,
 
545
  "special": false
546
  },
547
  "2017": {
548
+ "content": " ָן",
549
  "lstrip": false,
550
  "normalized": true,
551
  "rstrip": false,
 
553
  "special": false
554
  },
555
  "2018": {
556
+ "content": "נִתְ",
557
  "lstrip": false,
558
  "normalized": true,
559
  "rstrip": false,
 
561
  "special": false
562
  },
563
  "2019": {
564
+ "content": "נִּתְ",
565
  "lstrip": false,
566
  "normalized": true,
567
  "rstrip": false,
 
569
  "special": false
570
  },
571
  "2020": {
572
+ "content": "יִּתְ",
573
  "lstrip": false,
574
  "normalized": true,
575
  "rstrip": false,
 
577
  "special": false
578
  },
579
  "2021": {
580
+ "content": "אֶתְ",
581
  "lstrip": false,
582
  "normalized": true,
583
  "rstrip": false,
 
585
  "special": false
586
  },
587
  "2022": {
588
+ "content": "תִּתְ",
589
  "lstrip": false,
590
  "normalized": true,
591
  "rstrip": false,
 
593
  "special": false
594
  },
595
  "2023": {
596
+ "content": "תִתְ",
597
  "lstrip": false,
598
  "normalized": true,
599
  "rstrip": false,
 
601
  "special": false
602
  },
603
  "2024": {
604
+ "content": "תּוֹ",
605
  "lstrip": false,
606
  "normalized": true,
607
  "rstrip": false,
 
609
  "special": false
610
  },
611
  "2025": {
612
+ "content": "תוֹ",
613
  "lstrip": false,
614
  "normalized": true,
615
  "rstrip": false,
 
617
  "special": false
618
  },
619
  "2026": {
620
+ "content": "אוֹ",
621
  "lstrip": false,
622
  "normalized": true,
623
  "rstrip": false,
 
625
  "special": false
626
  },
627
  "2027": {
628
+ "content": "נוֹ",
629
  "lstrip": false,
630
  "normalized": true,
631
  "rstrip": false,
 
633
  "special": false
634
  },
635
  "2028": {
636
+ "content": "נּוֹ",
637
  "lstrip": false,
638
  "normalized": true,
639
  "rstrip": false,
 
641
  "special": false
642
  },
643
  "2029": {
644
+ "content": "יוֹ",
645
  "lstrip": false,
646
  "normalized": true,
647
  "rstrip": false,
 
649
  "special": false
650
  },
651
  "2030": {
652
+ "content": "יּוֹ",
653
  "lstrip": false,
654
  "normalized": true,
655
  "rstrip": false,
 
657
  "special": false
658
  },
659
  "2031": {
660
+ "content": "תֵּ",
661
  "lstrip": false,
662
  "normalized": true,
663
  "rstrip": false,
 
665
  "special": false
666
  },
667
  "2032": {
668
+ "content": "תַּ",
669
  "lstrip": false,
670
  "normalized": true,
671
  "rstrip": false,
 
673
  "special": false
674
  },
675
  "2033": {
676
+ "content": "תִּ",
677
  "lstrip": false,
678
  "normalized": true,
679
  "rstrip": false,
 
681
  "special": false
682
  },
683
  "2034": {
684
+ "content": "תָּ",
685
  "lstrip": false,
686
  "normalized": true,
687
  "rstrip": false,
 
689
  "special": false
690
  },
691
  "2035": {
692
+ "content": "תְּ",
693
  "lstrip": false,
694
  "normalized": true,
695
  "rstrip": false,
 
697
  "special": false
698
  },
699
  "2036": {
700
+ "content": "תֹּ",
701
  "lstrip": false,
702
  "normalized": true,
703
  "rstrip": false,
 
705
  "special": false
706
  },
707
  "2037": {
708
+ "content": "תֶּ",
709
  "lstrip": false,
710
  "normalized": true,
711
  "rstrip": false,
 
713
  "special": false
714
  },
715
  "2038": {
716
+ "content": "תֵ",
717
  "lstrip": false,
718
  "normalized": true,
719
  "rstrip": false,
 
721
  "special": false
722
  },
723
  "2039": {
724
+ "content": "תֹ",
725
  "lstrip": false,
726
  "normalized": true,
727
  "rstrip": false,
 
729
  "special": false
730
  },
731
  "2040": {
732
+ "content": "תֶ",
733
  "lstrip": false,
734
  "normalized": true,
735
  "rstrip": false,
 
737
  "special": false
738
  },
739
  "2041": {
740
+ "content": "יֶ",
741
  "lstrip": false,
742
  "normalized": true,
743
  "rstrip": false,
 
745
  "special": false
746
  },
747
  "2042": {
748
+ "content": "יָּ",
749
  "lstrip": false,
750
  "normalized": true,
751
  "rstrip": false,
 
753
  "special": false
754
  },
755
  "2043": {
756
+ "content": "יִּ",
757
  "lstrip": false,
758
  "normalized": true,
759
  "rstrip": false,
 
761
  "special": false
762
  },
763
  "2044": {
764
+ "content": "יֶּ",
765
  "lstrip": false,
766
  "normalized": true,
767
  "rstrip": false,
 
769
  "special": false
770
  },
771
  "2045": {
772
+ "content": "יֹּ",
773
  "lstrip": false,
774
  "normalized": true,
775
  "rstrip": false,
 
777
  "special": false
778
  },
779
  "2046": {
780
+ "content": "יַּ",
781
  "lstrip": false,
782
  "normalized": true,
783
  "rstrip": false,
 
793
  "special": false
794
  },
795
  "2048": {
796
+ "content": "נֵ",
797
  "lstrip": false,
798
  "normalized": true,
799
  "rstrip": false,
 
801
  "special": false
802
  },
803
  "2049": {
804
+ "content": "נֹּ",
805
  "lstrip": false,
806
  "normalized": true,
807
  "rstrip": false,
 
809
  "special": false
810
  },
811
  "2050": {
812
+ "content": "נָּ",
813
  "lstrip": false,
814
  "normalized": true,
815
  "rstrip": false,
 
817
  "special": false
818
  },
819
  "2051": {
820
+ "content": "נֵּ",
821
  "lstrip": false,
822
  "normalized": true,
823
  "rstrip": false,
 
825
  "special": false
826
  },
827
  "2052": {
828
+ "content": "נִּ",
829
  "lstrip": false,
830
  "normalized": true,
831
  "rstrip": false,
 
833
  "special": false
834
  },
835
  "2053": {
836
+ "content": "נֶּ",
837
  "lstrip": false,
838
  "normalized": true,
839
  "rstrip": false,
 
841
  "special": false
842
  },
843
  "2054": {
844
+ "content": "נַּ",
845
  "lstrip": false,
846
  "normalized": true,
847
  "rstrip": false,
 
849
  "special": false
850
  },
851
  "2055": {
852
+ "content": "תִּי",
853
  "lstrip": false,
854
  "normalized": true,
855
  "rstrip": false,
 
857
  "special": false
858
  },
859
  "2056": {
860
+ "content": "תֶּם",
861
  "lstrip": false,
862
  "normalized": true,
863
  "rstrip": false,
 
865
  "special": false
866
  },
867
  "2057": {
868
+ "content": "תֶּן",
869
  "lstrip": false,
870
  "normalized": true,
871
  "rstrip": false,
 
873
  "special": false
874
  },
875
  "2058": {
876
+ "content": "תֶם",
877
  "lstrip": false,
878
  "normalized": true,
879
  "rstrip": false,
 
881
  "special": false
882
  },
883
  "2059": {
884
+ "content": "תֶן",
885
  "lstrip": false,
886
  "normalized": true,
887
  "rstrip": false,