Transformers
martijn75 commited on
Commit
2ffa7e9
·
verified ·
1 Parent(s): 062c2ea

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +60 -474
  2. tokenizer_config.json +60 -428
tokenizer.json CHANGED
@@ -48,423 +48,9 @@
48
  "normalized": false,
49
  "special": true
50
  },
51
- {
52
- "id": 26,
53
- "content": "ו",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": true,
58
- "special": false
59
- },
60
- {
61
- "id": 34,
62
- "content": "ם",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": true,
67
- "special": false
68
- },
69
- {
70
- "id": 50,
71
- "content": "וּ",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": true,
76
- "special": false
77
- },
78
- {
79
- "id": 59,
80
- "content": "אֶ",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": true,
85
- "special": false
86
- },
87
- {
88
- "id": 69,
89
- "content": "אֲ",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": true,
94
- "special": false
95
- },
96
- {
97
- "id": 75,
98
- "content": "יְ",
99
- "single_word": false,
100
- "lstrip": false,
101
- "rstrip": false,
102
- "normalized": true,
103
- "special": false
104
- },
105
- {
106
- "id": 80,
107
- "content": "יִ",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": true,
112
- "special": false
113
- },
114
- {
115
- "id": 85,
116
- "content": "ךְ",
117
- "single_word": false,
118
- "lstrip": false,
119
- "rstrip": false,
120
- "normalized": true,
121
- "special": false
122
- },
123
- {
124
- "id": 93,
125
- "content": "ךָ",
126
- "single_word": false,
127
- "lstrip": false,
128
- "rstrip": false,
129
- "normalized": true,
130
- "special": false
131
- },
132
- {
133
- "id": 94,
134
- "content": "אַ",
135
- "single_word": false,
136
- "lstrip": false,
137
- "rstrip": false,
138
- "normalized": true,
139
- "special": false
140
- },
141
- {
142
- "id": 99,
143
- "content": "מְ",
144
- "single_word": false,
145
- "lstrip": false,
146
- "rstrip": false,
147
- "normalized": true,
148
- "special": false
149
- },
150
- {
151
- "id": 101,
152
- "content": "אָ",
153
- "single_word": false,
154
- "lstrip": false,
155
- "rstrip": false,
156
- "normalized": true,
157
- "special": false
158
- },
159
- {
160
- "id": 104,
161
- "content": "מַ",
162
- "single_word": false,
163
- "lstrip": false,
164
- "rstrip": false,
165
- "normalized": true,
166
- "special": false
167
- },
168
- {
169
- "id": 119,
170
- "content": "נִי",
171
- "single_word": false,
172
- "lstrip": false,
173
- "rstrip": false,
174
- "normalized": true,
175
- "special": false
176
- },
177
- {
178
- "id": 127,
179
- "content": "נְ",
180
- "single_word": false,
181
- "lstrip": false,
182
- "rstrip": false,
183
- "normalized": true,
184
- "special": false
185
- },
186
- {
187
- "id": 134,
188
- "content": "הִ",
189
- "single_word": false,
190
- "lstrip": false,
191
- "rstrip": false,
192
- "normalized": true,
193
- "special": false
194
- },
195
- {
196
- "id": 136,
197
- "content": "נוּ",
198
- "single_word": false,
199
- "lstrip": false,
200
- "rstrip": false,
201
- "normalized": true,
202
- "special": false
203
- },
204
- {
205
- "id": 148,
206
- "content": "כֶם",
207
- "single_word": false,
208
- "lstrip": false,
209
- "rstrip": false,
210
- "normalized": true,
211
- "special": false
212
- },
213
- {
214
- "id": 178,
215
- "content": "הָ",
216
- "single_word": false,
217
- "lstrip": false,
218
- "rstrip": false,
219
- "normalized": true,
220
- "special": false
221
- },
222
- {
223
- "id": 181,
224
- "content": "הוּ",
225
- "single_word": false,
226
- "lstrip": false,
227
- "rstrip": false,
228
- "normalized": true,
229
- "special": false
230
- },
231
- {
232
- "id": 190,
233
- "content": "הֶם",
234
- "single_word": false,
235
- "lstrip": false,
236
- "rstrip": false,
237
- "normalized": true,
238
- "special": false
239
- },
240
- {
241
- "id": 196,
242
- "content": "נִ",
243
- "single_word": false,
244
- "lstrip": false,
245
- "rstrip": false,
246
- "normalized": true,
247
- "special": false
248
- },
249
- {
250
- "id": 211,
251
- "content": "הֲ",
252
- "single_word": false,
253
- "lstrip": false,
254
- "rstrip": false,
255
- "normalized": true,
256
- "special": false
257
- },
258
- {
259
- "id": 245,
260
- "content": "תְ",
261
- "single_word": false,
262
- "lstrip": false,
263
- "rstrip": false,
264
- "normalized": true,
265
- "special": false
266
- },
267
- {
268
- "id": 253,
269
- "content": "נָה",
270
- "single_word": false,
271
- "lstrip": false,
272
- "rstrip": false,
273
- "normalized": true,
274
- "special": false
275
- },
276
- {
277
- "id": 254,
278
- "content": "יָ",
279
- "single_word": false,
280
- "lstrip": false,
281
- "rstrip": false,
282
- "normalized": true,
283
- "special": false
284
- },
285
- {
286
- "id": 260,
287
- "content": "נָ",
288
- "single_word": false,
289
- "lstrip": false,
290
- "rstrip": false,
291
- "normalized": true,
292
- "special": false
293
- },
294
- {
295
- "id": 264,
296
- "content": "נַ",
297
- "single_word": false,
298
- "lstrip": false,
299
- "rstrip": false,
300
- "normalized": true,
301
- "special": false
302
- },
303
- {
304
- "id": 274,
305
- "content": "אִ",
306
- "single_word": false,
307
- "lstrip": false,
308
- "rstrip": false,
309
- "normalized": true,
310
- "special": false
311
- },
312
- {
313
- "id": 385,
314
- "content": "תִ",
315
- "single_word": false,
316
- "lstrip": false,
317
- "rstrip": false,
318
- "normalized": true,
319
- "special": false
320
- },
321
- {
322
- "id": 390,
323
- "content": "נֹ",
324
- "single_word": false,
325
- "lstrip": false,
326
- "rstrip": false,
327
- "normalized": true,
328
- "special": false
329
- },
330
- {
331
- "id": 392,
332
- "content": "הֵ",
333
- "single_word": false,
334
- "lstrip": false,
335
- "rstrip": false,
336
- "normalized": true,
337
- "special": false
338
- },
339
- {
340
- "id": 430,
341
- "content": "נֶ",
342
- "single_word": false,
343
- "lstrip": false,
344
- "rstrip": false,
345
- "normalized": true,
346
- "special": false
347
- },
348
- {
349
- "id": 459,
350
- "content": "יַ",
351
- "single_word": false,
352
- "lstrip": false,
353
- "rstrip": false,
354
- "normalized": true,
355
- "special": false
356
- },
357
- {
358
- "id": 462,
359
- "content": "תִי",
360
- "single_word": false,
361
- "lstrip": false,
362
- "rstrip": false,
363
- "normalized": true,
364
- "special": false
365
- },
366
- {
367
- "id": 531,
368
- "content": "הֶ",
369
- "single_word": false,
370
- "lstrip": false,
371
- "rstrip": false,
372
- "normalized": true,
373
- "special": false
374
- },
375
- {
376
- "id": 569,
377
- "content": "אֹ",
378
- "single_word": false,
379
- "lstrip": false,
380
- "rstrip": false,
381
- "normalized": true,
382
- "special": false
383
- },
384
- {
385
- "id": 664,
386
- "content": "תַ",
387
- "single_word": false,
388
- "lstrip": false,
389
- "rstrip": false,
390
- "normalized": true,
391
- "special": false
392
- },
393
- {
394
- "id": 681,
395
- "content": "יֵ",
396
- "single_word": false,
397
- "lstrip": false,
398
- "rstrip": false,
399
- "normalized": true,
400
- "special": false
401
- },
402
- {
403
- "id": 701,
404
- "content": "תָ",
405
- "single_word": false,
406
- "lstrip": false,
407
- "rstrip": false,
408
- "normalized": true,
409
- "special": false
410
- },
411
- {
412
- "id": 732,
413
- "content": "יֹ",
414
- "single_word": false,
415
- "lstrip": false,
416
- "rstrip": false,
417
- "normalized": true,
418
- "special": false
419
- },
420
- {
421
- "id": 1038,
422
- "content": "הִתְ",
423
- "single_word": false,
424
- "lstrip": false,
425
- "rstrip": false,
426
- "normalized": true,
427
- "special": false
428
- },
429
- {
430
- "id": 1236,
431
- "content": "הֶן",
432
- "single_word": false,
433
- "lstrip": false,
434
- "rstrip": false,
435
- "normalized": true,
436
- "special": false
437
- },
438
- {
439
- "id": 1245,
440
- "content": "יִתְ",
441
- "single_word": false,
442
- "lstrip": false,
443
- "rstrip": false,
444
- "normalized": true,
445
- "special": false
446
- },
447
- {
448
- "id": 1349,
449
- "content": "אֵ",
450
- "single_word": false,
451
- "lstrip": false,
452
- "rstrip": false,
453
- "normalized": true,
454
- "special": false
455
- },
456
- {
457
- "id": 1795,
458
- "content": "נּוּ",
459
- "single_word": false,
460
- "lstrip": false,
461
- "rstrip": false,
462
- "normalized": true,
463
- "special": false
464
- },
465
  {
466
  "id": 2000,
467
- "content": "מוֹ",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
@@ -473,7 +59,7 @@
473
  },
474
  {
475
  "id": 2001,
476
- "content": "הוֹ",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
@@ -482,7 +68,7 @@
482
  },
483
  {
484
  "id": 2002,
485
- "content": " ַת",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
@@ -491,7 +77,7 @@
491
  },
492
  {
493
  "id": 2003,
494
- "content": " ֵי",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
@@ -500,7 +86,7 @@
500
  },
501
  {
502
  "id": 2004,
503
- "content": " ָה",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
@@ -509,7 +95,7 @@
509
  },
510
  {
511
  "id": 2005,
512
- "content": "וֹת",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
@@ -518,7 +104,7 @@
518
  },
519
  {
520
  "id": 2006,
521
- "content": " ִים",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
@@ -527,7 +113,7 @@
527
  },
528
  {
529
  "id": 2007,
530
- "content": " ִי",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
@@ -536,7 +122,7 @@
536
  },
537
  {
538
  "id": 2008,
539
- "content": " ַ��",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
@@ -545,7 +131,7 @@
545
  },
546
  {
547
  "id": 2009,
548
- "content": "וֹ",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
@@ -554,7 +140,7 @@
554
  },
555
  {
556
  "id": 2010,
557
- "content": " ָהּ",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
@@ -563,7 +149,7 @@
563
  },
564
  {
565
  "id": 2011,
566
- "content": "נָּה",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
@@ -572,7 +158,7 @@
572
  },
573
  {
574
  "id": 2012,
575
- "content": "כֶן",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
@@ -581,7 +167,7 @@
581
  },
582
  {
583
  "id": 2013,
584
- "content": " ָם",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
@@ -590,7 +176,7 @@
590
  },
591
  {
592
  "id": 2014,
593
- "content": " ֵם",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
@@ -599,7 +185,7 @@
599
  },
600
  {
601
  "id": 2015,
602
- "content": "הֵנָּה",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
@@ -608,7 +194,7 @@
608
  },
609
  {
610
  "id": 2016,
611
- "content": " ֵן",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
@@ -617,7 +203,7 @@
617
  },
618
  {
619
  "id": 2017,
620
- "content": " ָן",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
@@ -626,7 +212,7 @@
626
  },
627
  {
628
  "id": 2018,
629
- "content": "נִתְ",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
@@ -635,7 +221,7 @@
635
  },
636
  {
637
  "id": 2019,
638
- "content": "נִּתְ",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
@@ -644,7 +230,7 @@
644
  },
645
  {
646
  "id": 2020,
647
- "content": "יִּתְ",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
@@ -653,7 +239,7 @@
653
  },
654
  {
655
  "id": 2021,
656
- "content": "אֶתְ",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
@@ -662,7 +248,7 @@
662
  },
663
  {
664
  "id": 2022,
665
- "content": "תִּתְ",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
@@ -671,7 +257,7 @@
671
  },
672
  {
673
  "id": 2023,
674
- "content": "תִתְ",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
@@ -680,7 +266,7 @@
680
  },
681
  {
682
  "id": 2024,
683
- "content": "תּוֹ",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
@@ -689,7 +275,7 @@
689
  },
690
  {
691
  "id": 2025,
692
- "content": "תוֹ",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
@@ -698,7 +284,7 @@
698
  },
699
  {
700
  "id": 2026,
701
- "content": "אוֹ",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
@@ -707,7 +293,7 @@
707
  },
708
  {
709
  "id": 2027,
710
- "content": "נוֹ",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
@@ -716,7 +302,7 @@
716
  },
717
  {
718
  "id": 2028,
719
- "content": "נּוֹ",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
@@ -725,7 +311,7 @@
725
  },
726
  {
727
  "id": 2029,
728
- "content": "יוֹ",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
@@ -734,7 +320,7 @@
734
  },
735
  {
736
  "id": 2030,
737
- "content": "יּוֹ",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
@@ -743,7 +329,7 @@
743
  },
744
  {
745
  "id": 2031,
746
- "content": "תֵּ",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
@@ -752,7 +338,7 @@
752
  },
753
  {
754
  "id": 2032,
755
- "content": "תַּ",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
@@ -761,7 +347,7 @@
761
  },
762
  {
763
  "id": 2033,
764
- "content": "תִּ",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
@@ -770,7 +356,7 @@
770
  },
771
  {
772
  "id": 2034,
773
- "content": "תָּ",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
@@ -779,7 +365,7 @@
779
  },
780
  {
781
  "id": 2035,
782
- "content": "תְּ",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
@@ -788,7 +374,7 @@
788
  },
789
  {
790
  "id": 2036,
791
- "content": "תֹּ",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
@@ -797,7 +383,7 @@
797
  },
798
  {
799
  "id": 2037,
800
- "content": "תֶּ",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
@@ -806,7 +392,7 @@
806
  },
807
  {
808
  "id": 2038,
809
- "content": "תֵ",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
@@ -815,7 +401,7 @@
815
  },
816
  {
817
  "id": 2039,
818
- "content": "תֹ",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
@@ -824,7 +410,7 @@
824
  },
825
  {
826
  "id": 2040,
827
- "content": "תֶ",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
@@ -833,7 +419,7 @@
833
  },
834
  {
835
  "id": 2041,
836
- "content": "יֶ",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
@@ -842,7 +428,7 @@
842
  },
843
  {
844
  "id": 2042,
845
- "content": "יָּ",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
@@ -851,7 +437,7 @@
851
  },
852
  {
853
  "id": 2043,
854
- "content": "יִּ",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
@@ -860,7 +446,7 @@
860
  },
861
  {
862
  "id": 2044,
863
- "content": "יֶּ",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
@@ -869,7 +455,7 @@
869
  },
870
  {
871
  "id": 2045,
872
- "content": "יֹּ",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
@@ -878,7 +464,7 @@
878
  },
879
  {
880
  "id": 2046,
881
- "content": "יַּ",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
@@ -887,7 +473,7 @@
887
  },
888
  {
889
  "id": 2047,
890
- "content": "יֵּ",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
@@ -896,7 +482,7 @@
896
  },
897
  {
898
  "id": 2048,
899
- "content": "נֵ",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
@@ -905,7 +491,7 @@
905
  },
906
  {
907
  "id": 2049,
908
- "content": "נֹּ",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
@@ -914,7 +500,7 @@
914
  },
915
  {
916
  "id": 2050,
917
- "content": "נָּ",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
@@ -923,7 +509,7 @@
923
  },
924
  {
925
  "id": 2051,
926
- "content": "נֵּ",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
@@ -932,7 +518,7 @@
932
  },
933
  {
934
  "id": 2052,
935
- "content": "נִּ",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
@@ -941,7 +527,7 @@
941
  },
942
  {
943
  "id": 2053,
944
- "content": "נֶּ",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
@@ -950,7 +536,7 @@
950
  },
951
  {
952
  "id": 2054,
953
- "content": "נַּ",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
@@ -959,7 +545,7 @@
959
  },
960
  {
961
  "id": 2055,
962
- "content": "תִּי",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
@@ -968,7 +554,7 @@
968
  },
969
  {
970
  "id": 2056,
971
- "content": "תֶּם",
972
  "single_word": false,
973
  "lstrip": false,
974
  "rstrip": false,
@@ -977,7 +563,7 @@
977
  },
978
  {
979
  "id": 2057,
980
- "content": "תֶּן",
981
  "single_word": false,
982
  "lstrip": false,
983
  "rstrip": false,
@@ -986,7 +572,7 @@
986
  },
987
  {
988
  "id": 2058,
989
- "content": "תֶם",
990
  "single_word": false,
991
  "lstrip": false,
992
  "rstrip": false,
@@ -995,7 +581,7 @@
995
  },
996
  {
997
  "id": 2059,
998
- "content": "תֶן",
999
  "single_word": false,
1000
  "lstrip": false,
1001
  "rstrip": false,
 
48
  "normalized": false,
49
  "special": true
50
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  {
52
  "id": 2000,
53
+ "content": "נֵּ",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
 
59
  },
60
  {
61
  "id": 2001,
62
+ "content": "יּוֹ",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 2002,
71
+ "content": " ֵי",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 2003,
80
+ "content": "תֶם",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 2004,
89
+ "content": "נּוֹ",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 2005,
98
+ "content": "תֶ",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 2006,
107
+ "content": "נִּתְ",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 2007,
116
+ "content": " ֵן",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 2008,
125
+ "content": "תֵ",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 2009,
134
+ "content": "תוֹ",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
 
140
  },
141
  {
142
  "id": 2010,
143
+ "content": "נַּ",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
 
149
  },
150
  {
151
  "id": 2011,
152
+ "content": " ָה",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
 
158
  },
159
  {
160
  "id": 2012,
161
+ "content": "נֶּ",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
 
167
  },
168
  {
169
  "id": 2013,
170
+ "content": "יֶּ",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
 
176
  },
177
  {
178
  "id": 2014,
179
+ "content": "נָּ",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
 
185
  },
186
  {
187
  "id": 2015,
188
+ "content": "תֵּ",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
 
194
  },
195
  {
196
  "id": 2016,
197
+ "content": " ַת",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
 
203
  },
204
  {
205
  "id": 2017,
206
+ "content": "יִּתְ",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
 
212
  },
213
  {
214
  "id": 2018,
215
+ "content": "יִּ",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
 
221
  },
222
  {
223
  "id": 2019,
224
+ "content": "נֵ",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
 
230
  },
231
  {
232
  "id": 2020,
233
+ "content": "יוֹ",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
 
239
  },
240
  {
241
  "id": 2021,
242
+ "content": " ַי",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
 
248
  },
249
  {
250
  "id": 2022,
251
+ "content": "תּוֹ",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
 
257
  },
258
  {
259
  "id": 2023,
260
+ "content": "יֹּ",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
 
266
  },
267
  {
268
  "id": 2024,
269
+ "content": "תָּ",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
 
275
  },
276
  {
277
  "id": 2025,
278
+ "content": " ִי",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
 
284
  },
285
  {
286
  "id": 2026,
287
+ "content": "תֹ",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
 
293
  },
294
  {
295
  "id": 2027,
296
+ "content": "הֵנָּה",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
 
302
  },
303
  {
304
  "id": 2028,
305
+ "content": "אֶתְ",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
 
311
  },
312
  {
313
  "id": 2029,
314
+ "content": "כֶן",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
 
320
  },
321
  {
322
  "id": 2030,
323
+ "content": " ִים",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
 
329
  },
330
  {
331
  "id": 2031,
332
+ "content": "תְּ",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
 
338
  },
339
  {
340
  "id": 2032,
341
+ "content": "יֶ",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
 
347
  },
348
  {
349
  "id": 2033,
350
+ "content": "תִּי",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
 
356
  },
357
  {
358
  "id": 2034,
359
+ "content": "יַּ",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
 
365
  },
366
  {
367
  "id": 2035,
368
+ "content": "יֵּ",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
 
374
  },
375
  {
376
  "id": 2036,
377
+ "content": "נֹּ",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
 
383
  },
384
  {
385
  "id": 2037,
386
+ "content": "תֶן",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
 
392
  },
393
  {
394
  "id": 2038,
395
+ "content": "נָּה",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
 
401
  },
402
  {
403
  "id": 2039,
404
+ "content": "תֶּ",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
 
410
  },
411
  {
412
  "id": 2040,
413
+ "content": " ָם",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
 
419
  },
420
  {
421
  "id": 2041,
422
+ "content": "נוֹ",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
 
428
  },
429
  {
430
  "id": 2042,
431
+ "content": "תִּ",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
 
437
  },
438
  {
439
  "id": 2043,
440
+ "content": "וֹת",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
 
446
  },
447
  {
448
  "id": 2044,
449
+ "content": " ָהּ",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
 
455
  },
456
  {
457
  "id": 2045,
458
+ "content": "מוֹ",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
 
464
  },
465
  {
466
  "id": 2046,
467
+ "content": "יָּ",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
 
473
  },
474
  {
475
  "id": 2047,
476
+ "content": "נִּ",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
 
482
  },
483
  {
484
  "id": 2048,
485
+ "content": "תִתְ",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
 
491
  },
492
  {
493
  "id": 2049,
494
+ "content": "תֶּן",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
 
500
  },
501
  {
502
  "id": 2050,
503
+ "content": "נִתְ",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
 
509
  },
510
  {
511
  "id": 2051,
512
+ "content": "הוֹ",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
 
518
  },
519
  {
520
  "id": 2052,
521
+ "content": "אוֹ",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
 
527
  },
528
  {
529
  "id": 2053,
530
+ "content": " ֵם",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
 
536
  },
537
  {
538
  "id": 2054,
539
+ "content": "תַּ",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
 
545
  },
546
  {
547
  "id": 2055,
548
+ "content": " ָן",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
 
554
  },
555
  {
556
  "id": 2056,
557
+ "content": "תִּתְ",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
 
563
  },
564
  {
565
  "id": 2057,
566
+ "content": "וֹ",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
 
572
  },
573
  {
574
  "id": 2058,
575
+ "content": "תֹּ",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
 
581
  },
582
  {
583
  "id": 2059,
584
+ "content": "תֶּם",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -40,376 +40,8 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "26": {
44
- "content": "ו",
45
- "lstrip": false,
46
- "normalized": true,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": false
50
- },
51
- "34": {
52
- "content": "ם",
53
- "lstrip": false,
54
- "normalized": true,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": false
58
- },
59
- "50": {
60
- "content": "וּ",
61
- "lstrip": false,
62
- "normalized": true,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": false
66
- },
67
- "59": {
68
- "content": "אֶ",
69
- "lstrip": false,
70
- "normalized": true,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": false
74
- },
75
- "69": {
76
- "content": "אֲ",
77
- "lstrip": false,
78
- "normalized": true,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": false
82
- },
83
- "75": {
84
- "content": "יְ",
85
- "lstrip": false,
86
- "normalized": true,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": false
90
- },
91
- "80": {
92
- "content": "יִ",
93
- "lstrip": false,
94
- "normalized": true,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": false
98
- },
99
- "85": {
100
- "content": "ךְ",
101
- "lstrip": false,
102
- "normalized": true,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": false
106
- },
107
- "93": {
108
- "content": "ךָ",
109
- "lstrip": false,
110
- "normalized": true,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": false
114
- },
115
- "94": {
116
- "content": "אַ",
117
- "lstrip": false,
118
- "normalized": true,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": false
122
- },
123
- "99": {
124
- "content": "מְ",
125
- "lstrip": false,
126
- "normalized": true,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": false
130
- },
131
- "101": {
132
- "content": "אָ",
133
- "lstrip": false,
134
- "normalized": true,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": false
138
- },
139
- "104": {
140
- "content": "מַ",
141
- "lstrip": false,
142
- "normalized": true,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": false
146
- },
147
- "119": {
148
- "content": "נִי",
149
- "lstrip": false,
150
- "normalized": true,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": false
154
- },
155
- "127": {
156
- "content": "נְ",
157
- "lstrip": false,
158
- "normalized": true,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": false
162
- },
163
- "134": {
164
- "content": "הִ",
165
- "lstrip": false,
166
- "normalized": true,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": false
170
- },
171
- "136": {
172
- "content": "נוּ",
173
- "lstrip": false,
174
- "normalized": true,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": false
178
- },
179
- "148": {
180
- "content": "כֶם",
181
- "lstrip": false,
182
- "normalized": true,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": false
186
- },
187
- "178": {
188
- "content": "הָ",
189
- "lstrip": false,
190
- "normalized": true,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": false
194
- },
195
- "181": {
196
- "content": "הוּ",
197
- "lstrip": false,
198
- "normalized": true,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": false
202
- },
203
- "190": {
204
- "content": "הֶם",
205
- "lstrip": false,
206
- "normalized": true,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": false
210
- },
211
- "196": {
212
- "content": "נִ",
213
- "lstrip": false,
214
- "normalized": true,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": false
218
- },
219
- "211": {
220
- "content": "הֲ",
221
- "lstrip": false,
222
- "normalized": true,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": false
226
- },
227
- "245": {
228
- "content": "תְ",
229
- "lstrip": false,
230
- "normalized": true,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": false
234
- },
235
- "253": {
236
- "content": "נָה",
237
- "lstrip": false,
238
- "normalized": true,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": false
242
- },
243
- "254": {
244
- "content": "יָ",
245
- "lstrip": false,
246
- "normalized": true,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": false
250
- },
251
- "260": {
252
- "content": "נָ",
253
- "lstrip": false,
254
- "normalized": true,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": false
258
- },
259
- "264": {
260
- "content": "נַ",
261
- "lstrip": false,
262
- "normalized": true,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": false
266
- },
267
- "274": {
268
- "content": "אִ",
269
- "lstrip": false,
270
- "normalized": true,
271
- "rstrip": false,
272
- "single_word": false,
273
- "special": false
274
- },
275
- "385": {
276
- "content": "תִ",
277
- "lstrip": false,
278
- "normalized": true,
279
- "rstrip": false,
280
- "single_word": false,
281
- "special": false
282
- },
283
- "390": {
284
- "content": "נֹ",
285
- "lstrip": false,
286
- "normalized": true,
287
- "rstrip": false,
288
- "single_word": false,
289
- "special": false
290
- },
291
- "392": {
292
- "content": "הֵ",
293
- "lstrip": false,
294
- "normalized": true,
295
- "rstrip": false,
296
- "single_word": false,
297
- "special": false
298
- },
299
- "430": {
300
- "content": "נֶ",
301
- "lstrip": false,
302
- "normalized": true,
303
- "rstrip": false,
304
- "single_word": false,
305
- "special": false
306
- },
307
- "459": {
308
- "content": "יַ",
309
- "lstrip": false,
310
- "normalized": true,
311
- "rstrip": false,
312
- "single_word": false,
313
- "special": false
314
- },
315
- "462": {
316
- "content": "תִי",
317
- "lstrip": false,
318
- "normalized": true,
319
- "rstrip": false,
320
- "single_word": false,
321
- "special": false
322
- },
323
- "531": {
324
- "content": "הֶ",
325
- "lstrip": false,
326
- "normalized": true,
327
- "rstrip": false,
328
- "single_word": false,
329
- "special": false
330
- },
331
- "569": {
332
- "content": "אֹ",
333
- "lstrip": false,
334
- "normalized": true,
335
- "rstrip": false,
336
- "single_word": false,
337
- "special": false
338
- },
339
- "664": {
340
- "content": "תַ",
341
- "lstrip": false,
342
- "normalized": true,
343
- "rstrip": false,
344
- "single_word": false,
345
- "special": false
346
- },
347
- "681": {
348
- "content": "יֵ",
349
- "lstrip": false,
350
- "normalized": true,
351
- "rstrip": false,
352
- "single_word": false,
353
- "special": false
354
- },
355
- "701": {
356
- "content": "תָ",
357
- "lstrip": false,
358
- "normalized": true,
359
- "rstrip": false,
360
- "single_word": false,
361
- "special": false
362
- },
363
- "732": {
364
- "content": "יֹ",
365
- "lstrip": false,
366
- "normalized": true,
367
- "rstrip": false,
368
- "single_word": false,
369
- "special": false
370
- },
371
- "1038": {
372
- "content": "הִתְ",
373
- "lstrip": false,
374
- "normalized": true,
375
- "rstrip": false,
376
- "single_word": false,
377
- "special": false
378
- },
379
- "1236": {
380
- "content": "הֶן",
381
- "lstrip": false,
382
- "normalized": true,
383
- "rstrip": false,
384
- "single_word": false,
385
- "special": false
386
- },
387
- "1245": {
388
- "content": "יִתְ",
389
- "lstrip": false,
390
- "normalized": true,
391
- "rstrip": false,
392
- "single_word": false,
393
- "special": false
394
- },
395
- "1349": {
396
- "content": "אֵ",
397
- "lstrip": false,
398
- "normalized": true,
399
- "rstrip": false,
400
- "single_word": false,
401
- "special": false
402
- },
403
- "1795": {
404
- "content": "נּוּ",
405
- "lstrip": false,
406
- "normalized": true,
407
- "rstrip": false,
408
- "single_word": false,
409
- "special": false
410
- },
411
  "2000": {
412
- "content": "מוֹ",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
@@ -417,7 +49,7 @@
417
  "special": false
418
  },
419
  "2001": {
420
- "content": "הוֹ",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
@@ -425,7 +57,7 @@
425
  "special": false
426
  },
427
  "2002": {
428
- "content": " ַת",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
@@ -433,7 +65,7 @@
433
  "special": false
434
  },
435
  "2003": {
436
- "content": " ֵי",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
@@ -441,7 +73,7 @@
441
  "special": false
442
  },
443
  "2004": {
444
- "content": " ָה",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
@@ -449,7 +81,7 @@
449
  "special": false
450
  },
451
  "2005": {
452
- "content": "וֹת",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
@@ -457,7 +89,7 @@
457
  "special": false
458
  },
459
  "2006": {
460
- "content": " ִים",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
@@ -465,7 +97,7 @@
465
  "special": false
466
  },
467
  "2007": {
468
- "content": " ִי",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
@@ -473,7 +105,7 @@
473
  "special": false
474
  },
475
  "2008": {
476
- "content": " ַי",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
@@ -481,7 +113,7 @@
481
  "special": false
482
  },
483
  "2009": {
484
- "content": "וֹ",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
@@ -489,7 +121,7 @@
489
  "special": false
490
  },
491
  "2010": {
492
- "content": " ָהּ",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
@@ -497,7 +129,7 @@
497
  "special": false
498
  },
499
  "2011": {
500
- "content": "נָּה",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
@@ -505,7 +137,7 @@
505
  "special": false
506
  },
507
  "2012": {
508
- "content": "כֶן",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
@@ -513,7 +145,7 @@
513
  "special": false
514
  },
515
  "2013": {
516
- "content": " ָם",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
@@ -521,7 +153,7 @@
521
  "special": false
522
  },
523
  "2014": {
524
- "content": " ֵם",
525
  "lstrip": false,
526
  "normalized": true,
527
  "rstrip": false,
@@ -529,7 +161,7 @@
529
  "special": false
530
  },
531
  "2015": {
532
- "content": "הֵנָּה",
533
  "lstrip": false,
534
  "normalized": true,
535
  "rstrip": false,
@@ -537,7 +169,7 @@
537
  "special": false
538
  },
539
  "2016": {
540
- "content": " ֵן",
541
  "lstrip": false,
542
  "normalized": true,
543
  "rstrip": false,
@@ -545,7 +177,7 @@
545
  "special": false
546
  },
547
  "2017": {
548
- "content": " ָן",
549
  "lstrip": false,
550
  "normalized": true,
551
  "rstrip": false,
@@ -553,7 +185,7 @@
553
  "special": false
554
  },
555
  "2018": {
556
- "content": "נִתְ",
557
  "lstrip": false,
558
  "normalized": true,
559
  "rstrip": false,
@@ -561,7 +193,7 @@
561
  "special": false
562
  },
563
  "2019": {
564
- "content": "נִּתְ",
565
  "lstrip": false,
566
  "normalized": true,
567
  "rstrip": false,
@@ -569,7 +201,7 @@
569
  "special": false
570
  },
571
  "2020": {
572
- "content": "יִּתְ",
573
  "lstrip": false,
574
  "normalized": true,
575
  "rstrip": false,
@@ -577,7 +209,7 @@
577
  "special": false
578
  },
579
  "2021": {
580
- "content": "אֶתְ",
581
  "lstrip": false,
582
  "normalized": true,
583
  "rstrip": false,
@@ -585,7 +217,7 @@
585
  "special": false
586
  },
587
  "2022": {
588
- "content": "תִּתְ",
589
  "lstrip": false,
590
  "normalized": true,
591
  "rstrip": false,
@@ -593,7 +225,7 @@
593
  "special": false
594
  },
595
  "2023": {
596
- "content": "תִתְ",
597
  "lstrip": false,
598
  "normalized": true,
599
  "rstrip": false,
@@ -601,7 +233,7 @@
601
  "special": false
602
  },
603
  "2024": {
604
- "content": "תּוֹ",
605
  "lstrip": false,
606
  "normalized": true,
607
  "rstrip": false,
@@ -609,7 +241,7 @@
609
  "special": false
610
  },
611
  "2025": {
612
- "content": "תוֹ",
613
  "lstrip": false,
614
  "normalized": true,
615
  "rstrip": false,
@@ -617,7 +249,7 @@
617
  "special": false
618
  },
619
  "2026": {
620
- "content": "אוֹ",
621
  "lstrip": false,
622
  "normalized": true,
623
  "rstrip": false,
@@ -625,7 +257,7 @@
625
  "special": false
626
  },
627
  "2027": {
628
- "content": "נוֹ",
629
  "lstrip": false,
630
  "normalized": true,
631
  "rstrip": false,
@@ -633,7 +265,7 @@
633
  "special": false
634
  },
635
  "2028": {
636
- "content": "נּוֹ",
637
  "lstrip": false,
638
  "normalized": true,
639
  "rstrip": false,
@@ -641,7 +273,7 @@
641
  "special": false
642
  },
643
  "2029": {
644
- "content": "יוֹ",
645
  "lstrip": false,
646
  "normalized": true,
647
  "rstrip": false,
@@ -649,7 +281,7 @@
649
  "special": false
650
  },
651
  "2030": {
652
- "content": "יּוֹ",
653
  "lstrip": false,
654
  "normalized": true,
655
  "rstrip": false,
@@ -657,7 +289,7 @@
657
  "special": false
658
  },
659
  "2031": {
660
- "content": "תֵּ",
661
  "lstrip": false,
662
  "normalized": true,
663
  "rstrip": false,
@@ -665,7 +297,7 @@
665
  "special": false
666
  },
667
  "2032": {
668
- "content": "תַּ",
669
  "lstrip": false,
670
  "normalized": true,
671
  "rstrip": false,
@@ -673,7 +305,7 @@
673
  "special": false
674
  },
675
  "2033": {
676
- "content": "תִּ",
677
  "lstrip": false,
678
  "normalized": true,
679
  "rstrip": false,
@@ -681,7 +313,7 @@
681
  "special": false
682
  },
683
  "2034": {
684
- "content": "תָּ",
685
  "lstrip": false,
686
  "normalized": true,
687
  "rstrip": false,
@@ -689,7 +321,7 @@
689
  "special": false
690
  },
691
  "2035": {
692
- "content": "תְּ",
693
  "lstrip": false,
694
  "normalized": true,
695
  "rstrip": false,
@@ -697,7 +329,7 @@
697
  "special": false
698
  },
699
  "2036": {
700
- "content": "תֹּ",
701
  "lstrip": false,
702
  "normalized": true,
703
  "rstrip": false,
@@ -705,7 +337,7 @@
705
  "special": false
706
  },
707
  "2037": {
708
- "content": "תֶּ",
709
  "lstrip": false,
710
  "normalized": true,
711
  "rstrip": false,
@@ -713,7 +345,7 @@
713
  "special": false
714
  },
715
  "2038": {
716
- "content": "תֵ",
717
  "lstrip": false,
718
  "normalized": true,
719
  "rstrip": false,
@@ -721,7 +353,7 @@
721
  "special": false
722
  },
723
  "2039": {
724
- "content": "תֹ",
725
  "lstrip": false,
726
  "normalized": true,
727
  "rstrip": false,
@@ -729,7 +361,7 @@
729
  "special": false
730
  },
731
  "2040": {
732
- "content": "תֶ",
733
  "lstrip": false,
734
  "normalized": true,
735
  "rstrip": false,
@@ -737,7 +369,7 @@
737
  "special": false
738
  },
739
  "2041": {
740
- "content": "יֶ",
741
  "lstrip": false,
742
  "normalized": true,
743
  "rstrip": false,
@@ -745,7 +377,7 @@
745
  "special": false
746
  },
747
  "2042": {
748
- "content": "יָּ",
749
  "lstrip": false,
750
  "normalized": true,
751
  "rstrip": false,
@@ -753,7 +385,7 @@
753
  "special": false
754
  },
755
  "2043": {
756
- "content": "יִּ",
757
  "lstrip": false,
758
  "normalized": true,
759
  "rstrip": false,
@@ -761,7 +393,7 @@
761
  "special": false
762
  },
763
  "2044": {
764
- "content": "יֶּ",
765
  "lstrip": false,
766
  "normalized": true,
767
  "rstrip": false,
@@ -769,7 +401,7 @@
769
  "special": false
770
  },
771
  "2045": {
772
- "content": "יֹּ",
773
  "lstrip": false,
774
  "normalized": true,
775
  "rstrip": false,
@@ -777,7 +409,7 @@
777
  "special": false
778
  },
779
  "2046": {
780
- "content": "יַּ",
781
  "lstrip": false,
782
  "normalized": true,
783
  "rstrip": false,
@@ -785,7 +417,7 @@
785
  "special": false
786
  },
787
  "2047": {
788
- "content": "יֵּ",
789
  "lstrip": false,
790
  "normalized": true,
791
  "rstrip": false,
@@ -793,7 +425,7 @@
793
  "special": false
794
  },
795
  "2048": {
796
- "content": "נֵ",
797
  "lstrip": false,
798
  "normalized": true,
799
  "rstrip": false,
@@ -801,7 +433,7 @@
801
  "special": false
802
  },
803
  "2049": {
804
- "content": "נֹּ",
805
  "lstrip": false,
806
  "normalized": true,
807
  "rstrip": false,
@@ -809,7 +441,7 @@
809
  "special": false
810
  },
811
  "2050": {
812
- "content": "נָּ",
813
  "lstrip": false,
814
  "normalized": true,
815
  "rstrip": false,
@@ -817,7 +449,7 @@
817
  "special": false
818
  },
819
  "2051": {
820
- "content": "נֵּ",
821
  "lstrip": false,
822
  "normalized": true,
823
  "rstrip": false,
@@ -825,7 +457,7 @@
825
  "special": false
826
  },
827
  "2052": {
828
- "content": "נִּ",
829
  "lstrip": false,
830
  "normalized": true,
831
  "rstrip": false,
@@ -833,7 +465,7 @@
833
  "special": false
834
  },
835
  "2053": {
836
- "content": "נֶּ",
837
  "lstrip": false,
838
  "normalized": true,
839
  "rstrip": false,
@@ -841,7 +473,7 @@
841
  "special": false
842
  },
843
  "2054": {
844
- "content": "נַּ",
845
  "lstrip": false,
846
  "normalized": true,
847
  "rstrip": false,
@@ -849,7 +481,7 @@
849
  "special": false
850
  },
851
  "2055": {
852
- "content": "תִּי",
853
  "lstrip": false,
854
  "normalized": true,
855
  "rstrip": false,
@@ -857,7 +489,7 @@
857
  "special": false
858
  },
859
  "2056": {
860
- "content": "תֶּם",
861
  "lstrip": false,
862
  "normalized": true,
863
  "rstrip": false,
@@ -865,7 +497,7 @@
865
  "special": false
866
  },
867
  "2057": {
868
- "content": "תֶּן",
869
  "lstrip": false,
870
  "normalized": true,
871
  "rstrip": false,
@@ -873,7 +505,7 @@
873
  "special": false
874
  },
875
  "2058": {
876
- "content": "תֶם",
877
  "lstrip": false,
878
  "normalized": true,
879
  "rstrip": false,
@@ -881,7 +513,7 @@
881
  "special": false
882
  },
883
  "2059": {
884
- "content": "תֶן",
885
  "lstrip": false,
886
  "normalized": true,
887
  "rstrip": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  "2000": {
44
+ "content": "נֵּ",
45
  "lstrip": false,
46
  "normalized": true,
47
  "rstrip": false,
 
49
  "special": false
50
  },
51
  "2001": {
52
+ "content": "יּוֹ",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
 
57
  "special": false
58
  },
59
  "2002": {
60
+ "content": " ֵי",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
 
65
  "special": false
66
  },
67
  "2003": {
68
+ "content": "תֶם",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
 
73
  "special": false
74
  },
75
  "2004": {
76
+ "content": "נּוֹ",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
 
81
  "special": false
82
  },
83
  "2005": {
84
+ "content": "תֶ",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
 
89
  "special": false
90
  },
91
  "2006": {
92
+ "content": "נִּתְ",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
 
97
  "special": false
98
  },
99
  "2007": {
100
+ "content": " ֵן",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "2008": {
108
+ "content": "תֵ",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
 
113
  "special": false
114
  },
115
  "2009": {
116
+ "content": "תוֹ",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
 
121
  "special": false
122
  },
123
  "2010": {
124
+ "content": "נַּ",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
 
129
  "special": false
130
  },
131
  "2011": {
132
+ "content": " ָה",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
 
137
  "special": false
138
  },
139
  "2012": {
140
+ "content": "נֶּ",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
 
145
  "special": false
146
  },
147
  "2013": {
148
+ "content": "יֶּ",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
 
153
  "special": false
154
  },
155
  "2014": {
156
+ "content": "נָּ",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
 
161
  "special": false
162
  },
163
  "2015": {
164
+ "content": "תֵּ",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
 
169
  "special": false
170
  },
171
  "2016": {
172
+ "content": " ַת",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
 
177
  "special": false
178
  },
179
  "2017": {
180
+ "content": "יִּתְ",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
 
185
  "special": false
186
  },
187
  "2018": {
188
+ "content": "יִּ",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
 
193
  "special": false
194
  },
195
  "2019": {
196
+ "content": "נֵ",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
 
201
  "special": false
202
  },
203
  "2020": {
204
+ "content": "יוֹ",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
 
209
  "special": false
210
  },
211
  "2021": {
212
+ "content": " ַי",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
 
217
  "special": false
218
  },
219
  "2022": {
220
+ "content": "תּוֹ",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
 
225
  "special": false
226
  },
227
  "2023": {
228
+ "content": "יֹּ",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
 
233
  "special": false
234
  },
235
  "2024": {
236
+ "content": "תָּ",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
 
241
  "special": false
242
  },
243
  "2025": {
244
+ "content": " ִי",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
 
249
  "special": false
250
  },
251
  "2026": {
252
+ "content": "תֹ",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
 
257
  "special": false
258
  },
259
  "2027": {
260
+ "content": "הֵנָּה",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
 
265
  "special": false
266
  },
267
  "2028": {
268
+ "content": "אֶתְ",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
 
273
  "special": false
274
  },
275
  "2029": {
276
+ "content": "כֶן",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
 
281
  "special": false
282
  },
283
  "2030": {
284
+ "content": " ִים",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
 
289
  "special": false
290
  },
291
  "2031": {
292
+ "content": "תְּ",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
 
297
  "special": false
298
  },
299
  "2032": {
300
+ "content": "יֶ",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
 
305
  "special": false
306
  },
307
  "2033": {
308
+ "content": "תִּי",
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
 
313
  "special": false
314
  },
315
  "2034": {
316
+ "content": "יַּ",
317
  "lstrip": false,
318
  "normalized": true,
319
  "rstrip": false,
 
321
  "special": false
322
  },
323
  "2035": {
324
+ "content": "יֵּ",
325
  "lstrip": false,
326
  "normalized": true,
327
  "rstrip": false,
 
329
  "special": false
330
  },
331
  "2036": {
332
+ "content": "נֹּ",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,
 
337
  "special": false
338
  },
339
  "2037": {
340
+ "content": "תֶן",
341
  "lstrip": false,
342
  "normalized": true,
343
  "rstrip": false,
 
345
  "special": false
346
  },
347
  "2038": {
348
+ "content": "נָּה",
349
  "lstrip": false,
350
  "normalized": true,
351
  "rstrip": false,
 
353
  "special": false
354
  },
355
  "2039": {
356
+ "content": "תֶּ",
357
  "lstrip": false,
358
  "normalized": true,
359
  "rstrip": false,
 
361
  "special": false
362
  },
363
  "2040": {
364
+ "content": " ָם",
365
  "lstrip": false,
366
  "normalized": true,
367
  "rstrip": false,
 
369
  "special": false
370
  },
371
  "2041": {
372
+ "content": "נוֹ",
373
  "lstrip": false,
374
  "normalized": true,
375
  "rstrip": false,
 
377
  "special": false
378
  },
379
  "2042": {
380
+ "content": "תִּ",
381
  "lstrip": false,
382
  "normalized": true,
383
  "rstrip": false,
 
385
  "special": false
386
  },
387
  "2043": {
388
+ "content": "וֹת",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
 
393
  "special": false
394
  },
395
  "2044": {
396
+ "content": " ָהּ",
397
  "lstrip": false,
398
  "normalized": true,
399
  "rstrip": false,
 
401
  "special": false
402
  },
403
  "2045": {
404
+ "content": "מוֹ",
405
  "lstrip": false,
406
  "normalized": true,
407
  "rstrip": false,
 
409
  "special": false
410
  },
411
  "2046": {
412
+ "content": "יָּ",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
 
417
  "special": false
418
  },
419
  "2047": {
420
+ "content": "נִּ",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
 
425
  "special": false
426
  },
427
  "2048": {
428
+ "content": "תִתְ",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
 
433
  "special": false
434
  },
435
  "2049": {
436
+ "content": "תֶּן",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
 
441
  "special": false
442
  },
443
  "2050": {
444
+ "content": "נִתְ",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
 
449
  "special": false
450
  },
451
  "2051": {
452
+ "content": "הוֹ",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
 
457
  "special": false
458
  },
459
  "2052": {
460
+ "content": "אוֹ",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
 
465
  "special": false
466
  },
467
  "2053": {
468
+ "content": " ֵם",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
 
473
  "special": false
474
  },
475
  "2054": {
476
+ "content": "תַּ",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
 
481
  "special": false
482
  },
483
  "2055": {
484
+ "content": " ָן",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
 
489
  "special": false
490
  },
491
  "2056": {
492
+ "content": "תִּתְ",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
 
497
  "special": false
498
  },
499
  "2057": {
500
+ "content": "וֹ",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
 
505
  "special": false
506
  },
507
  "2058": {
508
+ "content": "תֹּ",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
 
513
  "special": false
514
  },
515
  "2059": {
516
+ "content": "תֶּם",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,