Update tokenizer.json
Browse files- tokenizer.json +1 -141
tokenizer.json
CHANGED
@@ -1,141 +1 @@
|
|
1 |
-
{
|
2 |
-
"<OOV>": 1,
|
3 |
-
"zijn": 2,
|
4 |
-
"iedereen": 3,
|
5 |
-
"is": 4,
|
6 |
-
"niet": 5,
|
7 |
-
"altijd": 6,
|
8 |
-
"dat": 7,
|
9 |
-
"mensen": 8,
|
10 |
-
"ze": 9,
|
11 |
-
"groep": 10,
|
12 |
-
"van": 11,
|
13 |
-
"verschillende": 12,
|
14 |
-
"hetzelfde": 13,
|
15 |
-
"wij": 14,
|
16 |
-
"het": 15,
|
17 |
-
"en": 16,
|
18 |
-
"hun": 17,
|
19 |
-
"zij": 18,
|
20 |
-
"die": 19,
|
21 |
-
"heeft": 20,
|
22 |
-
"we": 21,
|
23 |
-
"de": 22,
|
24 |
-
"elkaar": 23,
|
25 |
-
"kunnen": 24,
|
26 |
-
"doen": 25,
|
27 |
-
"je": 26,
|
28 |
-
"dan": 27,
|
29 |
-
"te": 28,
|
30 |
-
"niemand": 29,
|
31 |
-
"voor": 30,
|
32 |
-
"in": 31,
|
33 |
-
"uit": 32,
|
34 |
-
"als": 33,
|
35 |
-
"doet": 34,
|
36 |
-
"aardig": 35,
|
37 |
-
"onze": 36,
|
38 |
-
"dingen": 37,
|
39 |
-
"hebben": 38,
|
40 |
-
"oké": 39,
|
41 |
-
"leren": 40,
|
42 |
-
"respect": 41,
|
43 |
-
"ongeacht": 42,
|
44 |
-
"een": 43,
|
45 |
-
"wat": 44,
|
46 |
-
"allemaal": 45,
|
47 |
-
"er": 46,
|
48 |
-
"uitzonderingen": 47,
|
49 |
-
"sterke": 48,
|
50 |
-
"zwakke": 49,
|
51 |
-
"punten": 50,
|
52 |
-
"winnen": 51,
|
53 |
-
"jullie": 52,
|
54 |
-
"verliezen": 53,
|
55 |
-
"geen": 54,
|
56 |
-
"volgers": 55,
|
57 |
-
"hebt": 56,
|
58 |
-
"ben": 57,
|
59 |
-
"elke": 58,
|
60 |
-
"eigen": 59,
|
61 |
-
"manier": 60,
|
62 |
-
"leven": 61,
|
63 |
-
"denken": 62,
|
64 |
-
"beter": 63,
|
65 |
-
"zorgt": 64,
|
66 |
-
"problemen": 65,
|
67 |
-
"culturen": 66,
|
68 |
-
"andere": 67,
|
69 |
-
"gewoontes": 68,
|
70 |
-
"klas": 69,
|
71 |
-
"vindt": 70,
|
72 |
-
"hen": 71,
|
73 |
-
"pest": 72,
|
74 |
-
"zoals": 73,
|
75 |
-
"ons": 74,
|
76 |
-
"iets": 75,
|
77 |
-
"nieuws": 76,
|
78 |
-
"afkomst": 77,
|
79 |
-
"gevaarlijk": 78,
|
80 |
-
"alle": 79,
|
81 |
-
"kinderen": 80,
|
82 |
-
"school": 81,
|
83 |
-
"stom": 82,
|
84 |
-
"goede": 83,
|
85 |
-
"slechte": 84,
|
86 |
-
"dagen": 85,
|
87 |
-
"besten": 86,
|
88 |
-
"waardeloos": 87,
|
89 |
-
"nooit": 88,
|
90 |
-
"krijgen": 89,
|
91 |
-
"meeste": 90,
|
92 |
-
"likes": 91,
|
93 |
-
"denkt": 92,
|
94 |
-
"prima": 93,
|
95 |
-
"mogen": 94,
|
96 |
-
"houden": 95,
|
97 |
-
"sommige": 96,
|
98 |
-
"maken": 97,
|
99 |
-
"fouten": 98,
|
100 |
-
"maar": 99,
|
101 |
-
"betekent": 100,
|
102 |
-
"zo": 101,
|
103 |
-
"gedragen": 102,
|
104 |
-
"zich": 103,
|
105 |
-
"beesten": 104,
|
106 |
-
"maakt": 105,
|
107 |
-
"interessant": 106,
|
108 |
-
"cultuur": 107,
|
109 |
-
"inferieur": 108,
|
110 |
-
"aan": 109,
|
111 |
-
"hoeven": 110,
|
112 |
-
"met": 111,
|
113 |
-
"eens": 112,
|
114 |
-
"vinden": 113,
|
115 |
-
"leuk": 114,
|
116 |
-
"weten": 115,
|
117 |
-
"goed": 116,
|
118 |
-
"voetbal": 117,
|
119 |
-
"speelt": 118,
|
120 |
-
"saai": 119,
|
121 |
-
"net": 120,
|
122 |
-
"kakkerlakken": 121,
|
123 |
-
"samen": 122,
|
124 |
-
"beste": 123,
|
125 |
-
"bereiken": 124,
|
126 |
-
"om": 125,
|
127 |
-
"anders": 126,
|
128 |
-
"alles": 127,
|
129 |
-
"fout": 128,
|
130 |
-
"dit": 129,
|
131 |
-
"pure": 130,
|
132 |
-
"diefstal": 131,
|
133 |
-
"meningen": 132,
|
134 |
-
"verdient": 133,
|
135 |
-
"achtergrond": 134,
|
136 |
-
"proberen": 135,
|
137 |
-
"meestal": 136,
|
138 |
-
"best": 137,
|
139 |
-
"verliezer": 138,
|
140 |
-
"slecht": 139
|
141 |
-
}
|
|
|
1 |
+
{"class_name": "Tokenizer", "config": {"num_words": null, "filters": "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n", "lower": true, "split": " ", "char_level": false, "oov_token": null, "document_count": 1000, "word_counts": "{\"zij\": 96, \"krijgen\": 26, \"altijd\": 208, \"de\": 77, \"meeste\": 26, \"likes\": 26, \"iedereen\": 288, \"die\": 88, \"niet\": 206, \"voetbal\": 23, \"speelt\": 23, \"is\": 272, \"saai\": 23, \"hun\": 104, \"groep\": 144, \"pest\": 28, \"mensen\": 193, \"zoals\": 28, \"ons\": 28, \"zijn\": 305, \"allemaal\": 40, \"hetzelfde\": 113, \"en\": 110, \"dat\": 198, \"maakt\": 22, \"het\": 104, \"interessant\": 22, \"cultuur\": 25, \"inferieur\": 25, \"aan\": 25, \"onze\": 53, \"ze\": 157, \"denken\": 30, \"beter\": 30, \"dan\": 62, \"wij\": 110, \"doet\": 55, \"een\": 45, \"verliezer\": 17, \"net\": 20, \"kakkerlakken\": 20, \"mogen\": 26, \"van\": 124, \"verschillende\": 123, \"dingen\": 49, \"houden\": 26, \"ok\\u00e9\": 43, \"dit\": 19, \"pure\": 19, \"diefstal\": 19, \"heeft\": 95, \"sterke\": 35, \"zwakke\": 35, \"punten\": 35, \"we\": 86, \"hoeven\": 23, \"met\": 23, \"elkaar\": 64, \"eens\": 23, \"te\": 58, \"goede\": 28, \"slechte\": 28, \"dagen\": 28, \"samen\": 22, \"kunnen\": 63, \"beste\": 22, \"bereiken\": 22, \"besten\": 26, \"waardeloos\": 26, \"zorgt\": 33, \"voor\": 62, \"problemen\": 33, \"doen\": 68, \"nooit\": 28, \"aardig\": 56, \"iets\": 27, \"nieuws\": 27, \"leren\": 41, \"slecht\": 18, \"vinden\": 23, \"leuk\": 23, \"proberen\": 18, \"meestal\": 18, \"best\": 18, \"in\": 56, \"er\": 38, \"uitzonderingen\": 38, \"weten\": 22, \"wat\": 44, \"goed\": 22, \"niemand\": 60, \"klas\": 28, \"vindt\": 28, \"hen\": 28, \"sommige\": 25, \"maken\": 25, \"fouten\": 25, \"maar\": 25, \"betekent\": 25, \"zo\": 25, \"respect\": 46, \"ongeacht\": 46, \"afkomst\": 29, \"uit\": 55, \"gevaarlijk\": 23, \"gedragen\": 23, \"zich\": 23, \"als\": 55, \"beesten\": 23, \"je\": 64, \"geen\": 32, \"volgers\": 32, \"hebt\": 32, \"ben\": 32, \"elke\": 32, \"eigen\": 32, \"manier\": 32, \"leven\": 32, \"culturen\": 32, \"hebben\": 51, \"andere\": 32, \"gewoontes\": 32, \"om\": 17, \"anders\": 17, \"denkt\": 25, \"prima\": 25, \"meningen\": 19, \"verdient\": 17, \"achtergrond\": 17, \"alle\": 25, \"kinderen\": 25, \"school\": 25, \"stom\": 25, \"alles\": 22, \"fout\": 22, \"winnen\": 32, \"jullie\": 32, \"verliezen\": 32}", "word_docs": "{\"de\": 77, \"zij\": 96, \"krijgen\": 26, \"altijd\": 208, \"meeste\": 26, \"likes\": 26, \"niet\": 206, \"speelt\": 23, \"iedereen\": 288, \"voetbal\": 23, \"saai\": 23, \"die\": 88, \"is\": 272, \"zoals\": 28, \"groep\": 144, \"hun\": 104, \"pest\": 28, \"ons\": 28, \"mensen\": 193, \"maakt\": 22, \"hetzelfde\": 113, \"interessant\": 22, \"het\": 104, \"en\": 110, \"allemaal\": 40, \"dat\": 173, \"zijn\": 279, \"cultuur\": 25, \"inferieur\": 25, \"onze\": 53, \"aan\": 25, \"ze\": 99, \"beter\": 30, \"denken\": 30, \"wij\": 110, \"dan\": 62, \"verliezer\": 17, \"doet\": 55, \"een\": 45, \"net\": 20, \"kakkerlakken\": 20, \"dingen\": 49, \"verschillende\": 100, \"van\": 124, \"houden\": 26, \"mogen\": 26, \"ok\\u00e9\": 43, \"pure\": 19, \"dit\": 19, \"diefstal\": 19, \"punten\": 35, \"sterke\": 35, \"heeft\": 95, \"zwakke\": 35, \"we\": 86, \"eens\": 23, \"elkaar\": 64, \"te\": 58, \"met\": 23, \"hoeven\": 23, \"slechte\": 28, \"dagen\": 28, \"goede\": 28, \"bereiken\": 22, \"beste\": 22, \"kunnen\": 63, \"samen\": 22, \"besten\": 26, \"waardeloos\": 26, \"voor\": 62, \"zorgt\": 33, \"problemen\": 33, \"aardig\": 56, \"doen\": 68, \"nooit\": 28, \"nieuws\": 27, \"leren\": 41, \"iets\": 27, \"slecht\": 18, \"leuk\": 23, \"vinden\": 23, \"meestal\": 18, \"best\": 18, \"proberen\": 18, \"in\": 56, \"er\": 38, \"uitzonderingen\": 38, \"wat\": 44, \"goed\": 22, \"weten\": 22, \"niemand\": 60, \"hen\": 28, \"klas\": 28, \"vindt\": 28, \"sommige\": 25, \"betekent\": 25, \"maar\": 25, \"zo\": 25, \"maken\": 25, \"fouten\": 25, \"afkomst\": 29, \"ongeacht\": 46, \"respect\": 46, \"uit\": 55, \"gevaarlijk\": 23, \"gedragen\": 23, \"als\": 55, \"zich\": 23, \"beesten\": 23, \"je\": 32, \"hebt\": 32, \"geen\": 32, \"volgers\": 32, \"ben\": 32, \"manier\": 32, \"leven\": 32, \"elke\": 32, \"eigen\": 32, \"gewoontes\": 32, \"hebben\": 51, \"andere\": 32, \"culturen\": 32, \"anders\": 17, \"om\": 17, \"denkt\": 25, \"prima\": 25, \"meningen\": 19, \"verdient\": 17, \"achtergrond\": 17, \"stom\": 25, \"alle\": 25, \"kinderen\": 25, \"school\": 25, \"alles\": 22, \"fout\": 22, \"winnen\": 32, \"jullie\": 32, \"verliezen\": 32}", "index_docs": "{\"21\": 77, \"17\": 96, \"81\": 26, \"4\": 208, \"82\": 26, \"83\": 26, \"5\": 206, \"104\": 23, \"2\": 288, \"103\": 23, \"105\": 23, \"19\": 88, \"3\": 272, \"70\": 28, \"9\": 144, \"15\": 104, \"69\": 28, \"71\": 28, \"7\": 193, \"115\": 22, \"12\": 113, \"116\": 22, \"16\": 104, \"13\": 110, \"44\": 40, \"6\": 173, \"1\": 279, \"88\": 25, \"89\": 25, \"35\": 53, \"90\": 25, \"8\": 99, \"67\": 30, \"66\": 30, \"14\": 110, \"26\": 62, \"134\": 17, \"32\": 55, \"40\": 45, \"124\": 20, \"125\": 20, \"37\": 49, \"11\": 100, \"10\": 124, \"85\": 26, \"84\": 26, \"42\": 43, \"127\": 19, \"126\": 19, \"128\": 19, \"49\": 35, \"47\": 35, \"18\": 95, \"48\": 35, \"20\": 86, \"108\": 23, \"23\": 64, \"29\": 58, \"107\": 23, \"106\": 23, \"73\": 28, \"74\": 28, \"72\": 28, \"119\": 22, \"118\": 22, \"25\": 63, \"117\": 22, \"86\": 26, \"87\": 26, \"27\": 62, \"50\": 33, \"51\": 33, \"30\": 56, \"22\": 68, \"75\": 28, \"80\": 27, \"43\": 41, \"79\": 27, \"130\": 18, \"110\": 23, \"109\": 23, \"132\": 18, \"133\": 18, \"131\": 18, \"31\": 56, \"45\": 38, \"46\": 38, \"41\": 44, \"121\": 22, \"120\": 22, \"28\": 60, \"78\": 28, \"76\": 28, \"77\": 28, \"91\": 25, \"95\": 25, \"94\": 25, \"96\": 25, \"92\": 25, \"93\": 25, \"68\": 29, \"39\": 46, \"38\": 46, \"33\": 55, \"111\": 23, \"112\": 23, \"34\": 55, \"113\": 23, \"114\": 23, \"24\": 32, \"54\": 32, \"52\": 32, \"53\": 32, \"55\": 32, \"58\": 32, \"59\": 32, \"56\": 32, \"57\": 32, \"62\": 32, \"36\": 51, \"61\": 32, \"60\": 32, \"136\": 17, \"135\": 17, \"97\": 25, \"98\": 25, \"129\": 19, \"137\": 17, \"138\": 17, \"102\": 25, \"99\": 25, \"100\": 25, \"101\": 25, \"122\": 22, \"123\": 22, \"63\": 32, \"64\": 32, \"65\": 32}", "index_word": "{\"1\": \"zijn\", \"2\": \"iedereen\", \"3\": \"is\", \"4\": \"altijd\", \"5\": \"niet\", \"6\": \"dat\", \"7\": \"mensen\", \"8\": \"ze\", \"9\": \"groep\", \"10\": \"van\", \"11\": \"verschillende\", \"12\": \"hetzelfde\", \"13\": \"en\", \"14\": \"wij\", \"15\": \"hun\", \"16\": \"het\", \"17\": \"zij\", \"18\": \"heeft\", \"19\": \"die\", \"20\": \"we\", \"21\": \"de\", \"22\": \"doen\", \"23\": \"elkaar\", \"24\": \"je\", \"25\": \"kunnen\", \"26\": \"dan\", \"27\": \"voor\", \"28\": \"niemand\", \"29\": \"te\", \"30\": \"aardig\", \"31\": \"in\", \"32\": \"doet\", \"33\": \"uit\", \"34\": \"als\", \"35\": \"onze\", \"36\": \"hebben\", \"37\": \"dingen\", \"38\": \"respect\", \"39\": \"ongeacht\", \"40\": \"een\", \"41\": \"wat\", \"42\": \"ok\\u00e9\", \"43\": \"leren\", \"44\": \"allemaal\", \"45\": \"er\", \"46\": \"uitzonderingen\", \"47\": \"sterke\", \"48\": \"zwakke\", \"49\": \"punten\", \"50\": \"zorgt\", \"51\": \"problemen\", \"52\": \"geen\", \"53\": \"volgers\", \"54\": \"hebt\", \"55\": \"ben\", \"56\": \"elke\", \"57\": \"eigen\", \"58\": \"manier\", \"59\": \"leven\", \"60\": \"culturen\", \"61\": \"andere\", \"62\": \"gewoontes\", \"63\": \"winnen\", \"64\": \"jullie\", \"65\": \"verliezen\", \"66\": \"denken\", \"67\": \"beter\", \"68\": \"afkomst\", \"69\": \"pest\", \"70\": \"zoals\", \"71\": \"ons\", \"72\": \"goede\", \"73\": \"slechte\", \"74\": \"dagen\", \"75\": \"nooit\", \"76\": \"klas\", \"77\": \"vindt\", \"78\": \"hen\", \"79\": \"iets\", \"80\": \"nieuws\", \"81\": \"krijgen\", \"82\": \"meeste\", \"83\": \"likes\", \"84\": \"mogen\", \"85\": \"houden\", \"86\": \"besten\", \"87\": \"waardeloos\", \"88\": \"cultuur\", \"89\": \"inferieur\", \"90\": \"aan\", \"91\": \"sommige\", \"92\": \"maken\", \"93\": \"fouten\", \"94\": \"maar\", \"95\": \"betekent\", \"96\": \"zo\", \"97\": \"denkt\", \"98\": \"prima\", \"99\": \"alle\", \"100\": \"kinderen\", \"101\": \"school\", \"102\": \"stom\", \"103\": \"voetbal\", \"104\": \"speelt\", \"105\": \"saai\", \"106\": \"hoeven\", \"107\": \"met\", \"108\": \"eens\", \"109\": \"vinden\", \"110\": \"leuk\", \"111\": \"gevaarlijk\", \"112\": \"gedragen\", \"113\": \"zich\", \"114\": \"beesten\", \"115\": \"maakt\", \"116\": \"interessant\", \"117\": \"samen\", \"118\": \"beste\", \"119\": \"bereiken\", \"120\": \"weten\", \"121\": \"goed\", \"122\": \"alles\", \"123\": \"fout\", \"124\": \"net\", \"125\": \"kakkerlakken\", \"126\": \"dit\", \"127\": \"pure\", \"128\": \"diefstal\", \"129\": \"meningen\", \"130\": \"slecht\", \"131\": \"proberen\", \"132\": \"meestal\", \"133\": \"best\", \"134\": \"verliezer\", \"135\": \"om\", \"136\": \"anders\", \"137\": \"verdient\", \"138\": \"achtergrond\"}", "word_index": "{\"zijn\": 1, \"iedereen\": 2, \"is\": 3, \"altijd\": 4, \"niet\": 5, \"dat\": 6, \"mensen\": 7, \"ze\": 8, \"groep\": 9, \"van\": 10, \"verschillende\": 11, \"hetzelfde\": 12, \"en\": 13, \"wij\": 14, \"hun\": 15, \"het\": 16, \"zij\": 17, \"heeft\": 18, \"die\": 19, \"we\": 20, \"de\": 21, \"doen\": 22, \"elkaar\": 23, \"je\": 24, \"kunnen\": 25, \"dan\": 26, \"voor\": 27, \"niemand\": 28, \"te\": 29, \"aardig\": 30, \"in\": 31, \"doet\": 32, \"uit\": 33, \"als\": 34, \"onze\": 35, \"hebben\": 36, \"dingen\": 37, \"respect\": 38, \"ongeacht\": 39, \"een\": 40, \"wat\": 41, \"ok\\u00e9\": 42, \"leren\": 43, \"allemaal\": 44, \"er\": 45, \"uitzonderingen\": 46, \"sterke\": 47, \"zwakke\": 48, \"punten\": 49, \"zorgt\": 50, \"problemen\": 51, \"geen\": 52, \"volgers\": 53, \"hebt\": 54, \"ben\": 55, \"elke\": 56, \"eigen\": 57, \"manier\": 58, \"leven\": 59, \"culturen\": 60, \"andere\": 61, \"gewoontes\": 62, \"winnen\": 63, \"jullie\": 64, \"verliezen\": 65, \"denken\": 66, \"beter\": 67, \"afkomst\": 68, \"pest\": 69, \"zoals\": 70, \"ons\": 71, \"goede\": 72, \"slechte\": 73, \"dagen\": 74, \"nooit\": 75, \"klas\": 76, \"vindt\": 77, \"hen\": 78, \"iets\": 79, \"nieuws\": 80, \"krijgen\": 81, \"meeste\": 82, \"likes\": 83, \"mogen\": 84, \"houden\": 85, \"besten\": 86, \"waardeloos\": 87, \"cultuur\": 88, \"inferieur\": 89, \"aan\": 90, \"sommige\": 91, \"maken\": 92, \"fouten\": 93, \"maar\": 94, \"betekent\": 95, \"zo\": 96, \"denkt\": 97, \"prima\": 98, \"alle\": 99, \"kinderen\": 100, \"school\": 101, \"stom\": 102, \"voetbal\": 103, \"speelt\": 104, \"saai\": 105, \"hoeven\": 106, \"met\": 107, \"eens\": 108, \"vinden\": 109, \"leuk\": 110, \"gevaarlijk\": 111, \"gedragen\": 112, \"zich\": 113, \"beesten\": 114, \"maakt\": 115, \"interessant\": 116, \"samen\": 117, \"beste\": 118, \"bereiken\": 119, \"weten\": 120, \"goed\": 121, \"alles\": 122, \"fout\": 123, \"net\": 124, \"kakkerlakken\": 125, \"dit\": 126, \"pure\": 127, \"diefstal\": 128, \"meningen\": 129, \"slecht\": 130, \"proberen\": 131, \"meestal\": 132, \"best\": 133, \"verliezer\": 134, \"om\": 135, \"anders\": 136, \"verdient\": 137, \"achtergrond\": 138}"}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|