timotewos commited on
Commit
9e221ce
1 Parent(s): 244f4b0

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +228 -228
vocab.json CHANGED
@@ -1,232 +1,232 @@
1
  {
2
  "[PAD]": 229,
3
  "[UNK]": 228,
4
- "|": 161,
5
- "ሀ": 219,
6
- "ሁ": 162,
7
- "ሂ": 67,
8
- "ሃ": 191,
9
- "ሄ": 125,
10
- "ህ": 80,
11
- "ሆ": 184,
12
- "ለ": 53,
13
- "ሉ": 109,
14
- "ሊ": 103,
15
- "ላ": 36,
16
- "ሌ": 183,
17
- "ል": 46,
18
- "ሎ": 139,
19
- "ሏ": 17,
20
- "ሐ": 1,
21
- "ሑ": 130,
22
- "ሓ": 227,
23
- "ሔ": 16,
24
- "ሕ": 209,
25
- "መ": 200,
26
- "ሙ": 11,
27
- "ሚ": 85,
28
- "ማ": 152,
29
- "ሜ": 35,
30
- "ም": 135,
31
- "ሞ": 173,
32
- "ሟ": 143,
33
- "ሠ": 165,
34
- "ሡ": 12,
35
- "ሣ": 22,
36
- "ሥ": 91,
37
- "ሦ": 97,
38
- "ረ": 30,
39
- "ሩ": 19,
40
- "ሪ": 29,
41
- "ራ": 79,
42
- "ሬ": 0,
43
- "ር": 101,
44
- "ሮ": 142,
45
- "ሯ": 27,
46
- "ሰ": 26,
47
- "ሱ": 132,
48
- "ሲ": 20,
49
- "ሳ": 107,
50
- "ሴ": 45,
51
- "ስ": 178,
52
- "ሶ": 167,
53
- "ሷ": 43,
54
- "ሸ": 48,
55
- "ሹ": 86,
56
- "ሺ": 62,
57
- "ሻ": 24,
58
- "ሼ": 83,
59
- "ሽ": 55,
60
- "ሾ": 168,
61
- "ቀ": 116,
62
- "ቁ": 75,
63
- "ቂ": 214,
64
- "ቃ": 192,
65
- "ቄ": 126,
66
- "ቅ": 5,
67
- "ቆ": 215,
68
- "ቋ": 28,
69
- "በ": 188,
70
- "ቡ": 98,
71
- "ቢ": 13,
72
- "ባ": 50,
73
- "ቤ": 3,
74
- "ብ": 39,
75
- "ቦ": 151,
76
- "ቧ": 124,
77
- "ቨ": 34,
78
- "ቪ": 181,
79
- "ቫ": 68,
80
- "ቭ": 102,
81
- "ቮ": 194,
82
- "ተ": 96,
83
- "ቱ": 6,
84
- "ቲ": 61,
85
- "ታ": 74,
86
- "ቴ": 153,
87
- "ት": 158,
88
- "ቶ": 222,
89
- "ቷ": 185,
90
- "ቸ": 41,
91
- "ቹ": 207,
92
- "ቺ": 15,
93
- "ቻ": 52,
94
- "ቼ": 224,
95
- "ች": 145,
96
- "ቾ": 51,
97
- "ቿ": 37,
98
- "ኀ": 147,
99
- "ኃ": 160,
100
- "ኅ": 197,
101
- "ኋ": 58,
102
- "ነ": 129,
103
- "ኑ": 204,
104
- "ኒ": 44,
105
- "ና": 104,
106
- "ኔ": 93,
107
- "ን": 180,
108
- "ኖ": 221,
109
- "ኗ": 88,
110
- "ኘ": 174,
111
- "ኙ": 100,
112
- "ኛ": 226,
113
- "ኝ": 176,
114
- "ኞ": 40,
115
- "ኟ": 89,
116
- "አ": 141,
117
- "ኡ": 206,
118
- "ኢ": 31,
119
- "ኤ": 70,
120
- "እ": 179,
121
- "ኦ": 66,
122
- "ከ": 8,
123
- "ኩ": 205,
124
- "ኪ": 32,
125
- "ካ": 123,
126
- "ኬ": 170,
127
- "ክ": 156,
128
- "ኮ": 127,
129
- "ኳ": 150,
130
- "ኸ": 225,
131
- "ኽ": 189,
132
- "ወ": 198,
133
- "ዊ": 187,
134
- "ዋ": 154,
135
- "ዌ": 69,
136
- "ው": 164,
137
- "ዎ": 108,
138
- "ዐ": 117,
139
- "ዑ": 216,
140
- "ዒ": 54,
141
- "ዓ": 56,
142
- "ዕ": 105,
143
- "ዖ": 47,
144
- "ዘ": 90,
145
- "ዙ": 202,
146
- "ዚ": 140,
147
- "ዛ": 212,
148
- "ዜ": 113,
149
- "ዝ": 171,
150
- "ዞ": 81,
151
- "ዟ": 155,
152
- "ዢ": 201,
153
- "ዣ": 131,
154
- "ዤ": 120,
155
- "ዥ": 23,
156
- "ዦ": 159,
157
- "የ": 134,
158
- "ዩ": 14,
159
- "ያ": 182,
160
- "ዬ": 138,
161
- "ይ": 213,
162
- "ዮ": 78,
163
- "ደ": 10,
164
- "ዱ": 128,
165
- "ዲ": 121,
166
- "ዳ": 190,
167
- "ዴ": 196,
168
- "ድ": 169,
169
- "ዶ": 49,
170
- "ዷ": 59,
171
- "ጀ": 157,
172
- "ጁ": 149,
173
- "ጂ": 203,
174
- "ጃ": 136,
175
- "ጄ": 7,
176
- "ጅ": 57,
177
- "ጆ": 114,
178
- "ገ": 137,
179
- "ጉ": 146,
180
- "ጊ": 65,
181
- "ጋ": 217,
182
- "ጌ": 223,
183
- "ግ": 25,
184
- "ጎ": 110,
185
- "ጓ": 64,
186
- "ጠ": 175,
187
- "ጡ": 193,
188
- "ጢ": 133,
189
- "ጣ": 92,
190
- "ጤ": 71,
191
- "ጥ": 2,
192
- "ጦ": 112,
193
- "ጧ": 95,
194
- "ጨ": 42,
195
- "ጩ": 63,
196
- "ጪ": 177,
197
- "ጫ": 106,
198
- "ጬ": 186,
199
- "ጭ": 99,
200
- "ጮ": 122,
201
- "ጲ": 195,
202
- "ጴ": 73,
203
- "ጵ": 144,
204
- "ጶ": 172,
205
- "ጸ": 166,
206
- "ጹ": 21,
207
- "ጺ": 115,
208
- "ጻ": 111,
209
- "ጽ": 33,
210
- "ጾ": 211,
211
- "ጿ": 77,
212
- "ፀ": 218,
213
- "ፁ": 163,
214
- "ፃ": 94,
215
- "ፅ": 199,
216
- "ፈ": 210,
217
- "ፉ": 118,
218
- "ፊ": 84,
219
- "ፋ": 38,
220
- "ፌ": 220,
221
- "ፍ": 119,
222
- "ፎ": 60,
223
- "ፏ": 72,
224
- "ፑ": 18,
225
- "ፒ": 148,
226
- "ፓ": 4,
227
- "ፔ": 208,
228
- "ፕ": 82,
229
- "ፖ": 76,
230
- "“": 87,
231
- "”": 9
232
  }
 
1
  {
2
  "[PAD]": 229,
3
  "[UNK]": 228,
4
+ "|": 114,
5
+ "ሀ": 225,
6
+ "ሁ": 43,
7
+ "ሂ": 24,
8
+ "ሃ": 189,
9
+ "ሄ": 32,
10
+ "ህ": 129,
11
+ "ሆ": 221,
12
+ "ለ": 152,
13
+ "ሉ": 108,
14
+ "ሊ": 69,
15
+ "ላ": 4,
16
+ "ሌ": 102,
17
+ "ል": 17,
18
+ "ሎ": 50,
19
+ "ሏ": 30,
20
+ "ሐ": 60,
21
+ "ሑ": 35,
22
+ "ሓ": 206,
23
+ "ሔ": 82,
24
+ "ሕ": 128,
25
+ "መ": 56,
26
+ "ሙ": 210,
27
+ "ሚ": 41,
28
+ "ማ": 166,
29
+ "ሜ": 180,
30
+ "ም": 40,
31
+ "ሞ": 226,
32
+ "ሟ": 194,
33
+ "ሠ": 47,
34
+ "ሡ": 149,
35
+ "ሣ": 86,
36
+ "ሥ": 99,
37
+ "ሦ": 81,
38
+ "ረ": 88,
39
+ "ሩ": 190,
40
+ "ሪ": 185,
41
+ "ራ": 135,
42
+ "ሬ": 162,
43
+ "ር": 58,
44
+ "ሮ": 48,
45
+ "ሯ": 39,
46
+ "ሰ": 105,
47
+ "ሱ": 148,
48
+ "ሲ": 144,
49
+ "ሳ": 156,
50
+ "ሴ": 95,
51
+ "ስ": 27,
52
+ "ሶ": 115,
53
+ "ሷ": 53,
54
+ "ሸ": 94,
55
+ "ሹ": 85,
56
+ "ሺ": 215,
57
+ "ሻ": 3,
58
+ "ሼ": 179,
59
+ "ሽ": 193,
60
+ "ሾ": 71,
61
+ "ቀ": 200,
62
+ "ቁ": 224,
63
+ "ቂ": 87,
64
+ "ቃ": 160,
65
+ "ቄ": 153,
66
+ "ቅ": 163,
67
+ "ቆ": 175,
68
+ "ቋ": 204,
69
+ "በ": 101,
70
+ "ቡ": 34,
71
+ "ቢ": 170,
72
+ "ባ": 122,
73
+ "ቤ": 109,
74
+ "ብ": 79,
75
+ "ቦ": 171,
76
+ "ቧ": 46,
77
+ "ቨ": 61,
78
+ "ቪ": 137,
79
+ "ቫ": 143,
80
+ "ቭ": 10,
81
+ "ቮ": 74,
82
+ "ተ": 165,
83
+ "ቱ": 51,
84
+ "ቲ": 49,
85
+ "ታ": 130,
86
+ "ቴ": 184,
87
+ "ት": 107,
88
+ "ቶ": 191,
89
+ "ቷ": 178,
90
+ "ቸ": 63,
91
+ "ቹ": 223,
92
+ "ቺ": 209,
93
+ "ቻ": 25,
94
+ "ቼ": 116,
95
+ "ች": 5,
96
+ "ቾ": 77,
97
+ "ቿ": 158,
98
+ "ኀ": 1,
99
+ "ኃ": 119,
100
+ "ኅ": 222,
101
+ "ኋ": 205,
102
+ "ነ": 14,
103
+ "ኑ": 8,
104
+ "ኒ": 211,
105
+ "ና": 83,
106
+ "ኔ": 151,
107
+ "ን": 172,
108
+ "ኖ": 201,
109
+ "ኗ": 216,
110
+ "ኘ": 33,
111
+ "ኙ": 123,
112
+ "ኛ": 169,
113
+ "ኝ": 183,
114
+ "ኞ": 97,
115
+ "ኟ": 177,
116
+ "አ": 45,
117
+ "ኡ": 145,
118
+ "ኢ": 154,
119
+ "ኤ": 13,
120
+ "እ": 75,
121
+ "ኦ": 12,
122
+ "ከ": 140,
123
+ "ኩ": 64,
124
+ "ኪ": 113,
125
+ "ካ": 36,
126
+ "ኬ": 134,
127
+ "ክ": 192,
128
+ "ኮ": 168,
129
+ "ኳ": 125,
130
+ "ኸ": 93,
131
+ "ኽ": 197,
132
+ "ወ": 72,
133
+ "ዊ": 142,
134
+ "ዋ": 65,
135
+ "ዌ": 187,
136
+ "ው": 198,
137
+ "ዎ": 213,
138
+ "ዐ": 188,
139
+ "ዑ": 212,
140
+ "ዒ": 7,
141
+ "ዓ": 139,
142
+ "ዕ": 132,
143
+ "ዖ": 124,
144
+ "ዘ": 117,
145
+ "ዙ": 100,
146
+ "ዚ": 121,
147
+ "ዛ": 181,
148
+ "ዜ": 173,
149
+ "ዝ": 15,
150
+ "ዞ": 16,
151
+ "ዟ": 161,
152
+ "ዢ": 167,
153
+ "ዣ": 217,
154
+ "ዤ": 159,
155
+ "ዥ": 186,
156
+ "ዦ": 126,
157
+ "የ": 0,
158
+ "ዩ": 67,
159
+ "ያ": 76,
160
+ "ዬ": 104,
161
+ "ይ": 37,
162
+ "ዮ": 2,
163
+ "ደ": 202,
164
+ "ዱ": 220,
165
+ "ዲ": 155,
166
+ "ዳ": 28,
167
+ "ዴ": 23,
168
+ "ድ": 31,
169
+ "ዶ": 120,
170
+ "ዷ": 92,
171
+ "ጀ": 29,
172
+ "ጁ": 207,
173
+ "ጂ": 57,
174
+ "ጃ": 208,
175
+ "ጄ": 203,
176
+ "ጅ": 103,
177
+ "ጆ": 9,
178
+ "ገ": 42,
179
+ "ጉ": 68,
180
+ "ጊ": 84,
181
+ "ጋ": 141,
182
+ "ጌ": 146,
183
+ "ግ": 118,
184
+ "ጎ": 22,
185
+ "ጓ": 90,
186
+ "ጠ": 150,
187
+ "ጡ": 62,
188
+ "ጢ": 131,
189
+ "ጣ": 19,
190
+ "ጤ": 20,
191
+ "ጥ": 106,
192
+ "ጦ": 11,
193
+ "ጧ": 52,
194
+ "ጨ": 199,
195
+ "ጩ": 55,
196
+ "ጪ": 70,
197
+ "ጫ": 73,
198
+ "ጬ": 110,
199
+ "ጭ": 127,
200
+ "ጮ": 91,
201
+ "ጲ": 196,
202
+ "ጴ": 138,
203
+ "ጵ": 219,
204
+ "ጶ": 111,
205
+ "ጸ": 59,
206
+ "ጹ": 98,
207
+ "ጺ": 195,
208
+ "ጻ": 96,
209
+ "ጽ": 21,
210
+ "ጾ": 80,
211
+ "ጿ": 26,
212
+ "ፀ": 164,
213
+ "ፁ": 133,
214
+ "ፃ": 38,
215
+ "ፅ": 18,
216
+ "ፈ": 6,
217
+ "ፉ": 218,
218
+ "ፊ": 89,
219
+ "ፋ": 112,
220
+ "ፌ": 176,
221
+ "ፍ": 54,
222
+ "ፎ": 136,
223
+ "ፏ": 227,
224
+ "ፑ": 66,
225
+ "ፒ": 78,
226
+ "ፓ": 182,
227
+ "ፔ": 147,
228
+ "ፕ": 174,
229
+ "ፖ": 214,
230
+ "“": 157,
231
+ "”": 44
232
  }