timotewos commited on
Commit
5106ebf
·
verified ·
1 Parent(s): 07b59f5

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +227 -227
vocab.json CHANGED
@@ -1,232 +1,232 @@
1
  {
2
  "[PAD]": 229,
3
  "[UNK]": 228,
4
- "|": 95,
5
- "ሀ": 116,
6
- "ሁ": 63,
7
- "ሂ": 145,
8
- "ሃ": 183,
9
- "ሄ": 53,
10
- "ህ": 37,
11
- "ሆ": 159,
12
- "ለ": 226,
13
- "ሉ": 78,
14
- "ሊ": 107,
15
- "ላ": 61,
16
  "ሌ": 72,
17
- "ል": 149,
18
- "ሎ": 22,
19
- "ሏ": 99,
20
- "ሐ": 219,
21
- "ሑ": 160,
22
- "ሓ": 196,
23
- "ሔ": 9,
24
- "ሕ": 157,
25
- "መ": 48,
26
- "ሙ": 17,
27
- "ሚ": 195,
28
- "ማ": 39,
29
- "ሜ": 127,
30
- "ም": 190,
31
- "ሞ": 222,
32
- "ሟ": 203,
33
- "ሠ": 85,
34
- "ሡ": 65,
35
- "ሣ": 79,
36
- "ሥ": 57,
37
- "ሦ": 29,
38
- "ረ": 20,
39
- "ሩ": 4,
40
- "ሪ": 7,
41
- "ራ": 15,
42
- "ሬ": 46,
43
- "ር": 100,
44
- "ሮ": 102,
45
- "ሯ": 164,
46
- "ሰ": 6,
47
- "ሱ": 64,
48
- "ሲ": 121,
49
- "ሳ": 197,
50
- "ሴ": 144,
51
- "ስ": 211,
52
- "ሶ": 131,
53
- "ሷ": 94,
54
- "ሸ": 119,
55
- "ሹ": 176,
56
- "ሺ": 166,
57
- "ሻ": 174,
58
- "ሼ": 26,
59
- "ሽ": 44,
60
- "ሾ": 153,
61
- "ቀ": 224,
62
- "ቁ": 177,
63
- "ቂ": 62,
64
- "ቃ": 191,
65
- "ቄ": 210,
66
- "ቅ": 129,
67
- "ቆ": 104,
68
- "ቋ": 214,
69
- "በ": 185,
70
- "ቡ": 120,
71
- "ቢ": 54,
72
- "ባ": 49,
73
- "ቤ": 70,
74
- "ብ": 147,
75
- "ቦ": 83,
76
- "ቧ": 77,
77
- "ቨ": 113,
78
- "ቪ": 123,
79
- "ቫ": 122,
80
- "ቭ": 223,
81
- "ቮ": 68,
82
- "ተ": 167,
83
- "ቱ": 182,
84
- "ቲ": 171,
85
- "ታ": 126,
86
- "ቴ": 106,
87
- "ት": 134,
88
- "ቶ": 59,
89
- "ቷ": 199,
90
- "ቸ": 24,
91
- "ቹ": 221,
92
- "ቺ": 8,
93
- "ቻ": 154,
94
- "ቼ": 19,
95
- "ች": 165,
96
- "ቾ": 12,
97
- "ቿ": 140,
98
- "ኀ": 208,
99
- "ኃ": 115,
100
- "ኅ": 5,
101
- "ኋ": 118,
102
- "ነ": 97,
103
- "ኑ": 58,
104
- "ኒ": 10,
105
- "ና": 227,
106
- "ኔ": 55,
107
- "ን": 148,
108
- "ኖ": 28,
109
- "ኗ": 80,
110
- "ኘ": 67,
111
- "ኙ": 96,
112
- "ኛ": 27,
113
- "ኝ": 212,
114
- "ኞ": 218,
115
- "ኟ": 117,
116
- "አ": 84,
117
- "ኡ": 41,
118
- "ኢ": 14,
119
- "ኤ": 193,
120
- "እ": 88,
121
- "ኦ": 152,
122
- "ከ": 173,
123
- "ኩ": 13,
124
- "ኪ": 2,
125
- "ካ": 109,
126
- "ኬ": 69,
127
- "ክ": 23,
128
- "ኮ": 43,
129
- "ኳ": 137,
130
- "ኸ": 25,
131
- "ኽ": 142,
132
- "ወ": 101,
133
- "ዊ": 132,
134
- "ዋ": 216,
135
- "ዌ": 112,
136
- "ው": 128,
137
- "ዎ": 42,
138
- "ዐ": 11,
139
- "ዑ": 0,
140
- "ዒ": 207,
141
- "ዓ": 3,
142
- "ዕ": 81,
143
- "ዖ": 178,
144
- "ዘ": 151,
145
- "ዙ": 133,
146
- "ዚ": 74,
147
- "ዛ": 56,
148
- "ዜ": 40,
149
- "ዝ": 47,
150
- "ዞ": 52,
151
- "ዟ": 21,
152
- "ዢ": 31,
153
- "ዣ": 181,
154
- "ዤ": 188,
155
- "ዥ": 206,
156
- "ዦ": 73,
157
- "የ": 45,
158
- "ዩ": 201,
159
- "ያ": 170,
160
- "ዬ": 33,
161
- "ይ": 146,
162
- "ዮ": 130,
163
- "ደ": 156,
164
- "ዱ": 136,
165
- "ዲ": 141,
166
- "ዳ": 213,
167
- "ዴ": 50,
168
- "ድ": 111,
169
- "ዶ": 34,
170
- "ዷ": 38,
171
- "ጀ": 198,
172
- "ጁ": 187,
173
- "ጂ": 172,
174
- "ጃ": 150,
175
- "ጄ": 105,
176
- "ጅ": 124,
177
- "ጆ": 192,
178
- "ገ": 92,
179
- "ጉ": 89,
180
- "ጊ": 35,
181
- "ጋ": 179,
182
- "ጌ": 87,
183
- "ግ": 108,
184
- "ጎ": 30,
185
- "ጓ": 202,
186
- "ጠ": 168,
187
- "ጡ": 217,
188
- "ጢ": 158,
189
- "ጣ": 194,
190
- "ጤ": 186,
191
- "ጥ": 161,
192
- "ጦ": 86,
193
- "ጧ": 91,
194
- "ጨ": 36,
195
- "ጩ": 135,
196
- "ጪ": 51,
197
- "ጫ": 138,
198
- "ጬ": 205,
199
- "ጭ": 163,
200
- "ጮ": 114,
201
- "ጲ": 66,
202
- "ጴ": 32,
203
- "ጵ": 200,
204
- "ጶ": 1,
205
- "ጸ": 90,
206
- "ጹ": 16,
207
- "ጺ": 82,
208
- "ጻ": 220,
209
- "ጽ": 75,
210
- "ጾ": 209,
211
- "ጿ": 169,
212
- "ፀ": 180,
213
- "ፁ": 143,
214
- "ፃ": 204,
215
- "ፅ": 184,
216
- "ፈ": 189,
217
- "ፉ": 110,
218
- "ፊ": 162,
219
- "ፋ": 98,
220
- "ፌ": 125,
221
- "ፍ": 155,
222
- "ፎ": 76,
223
- "ፏ": 93,
224
- "ፑ": 103,
225
- "ፒ": 215,
226
- "ፓ": 71,
227
- "ፔ": 139,
228
- "ፕ": 60,
229
- "ፖ": 18,
230
- "“": 175,
231
- "”": 225
232
  }
 
1
  {
2
  "[PAD]": 229,
3
  "[UNK]": 228,
4
+ "|": 78,
5
+ "ሀ": 3,
6
+ "ሁ": 21,
7
+ "ሂ": 103,
8
+ "ሃ": 218,
9
+ "ሄ": 111,
10
+ "ህ": 27,
11
+ "ሆ": 116,
12
+ "ለ": 105,
13
+ "ሉ": 93,
14
+ "ሊ": 144,
15
+ "ላ": 2,
16
  "ሌ": 72,
17
+ "ል": 82,
18
+ "ሎ": 92,
19
+ "ሏ": 140,
20
+ "ሐ": 121,
21
+ "ሑ": 15,
22
+ "ሓ": 170,
23
+ "ሔ": 110,
24
+ "ሕ": 17,
25
+ "መ": 203,
26
+ "ሙ": 18,
27
+ "ሚ": 146,
28
+ "ማ": 102,
29
+ "ሜ": 32,
30
+ "ም": 114,
31
+ "ሞ": 204,
32
+ "ሟ": 86,
33
+ "ሠ": 67,
34
+ "ሡ": 4,
35
+ "ሣ": 181,
36
+ "ሥ": 197,
37
+ "ሦ": 195,
38
+ "ረ": 101,
39
+ "ሩ": 202,
40
+ "ሪ": 89,
41
+ "ራ": 49,
42
+ "ሬ": 226,
43
+ "ር": 51,
44
+ "ሮ": 215,
45
+ "ሯ": 20,
46
+ "ሰ": 39,
47
+ "ሱ": 127,
48
+ "ሲ": 156,
49
+ "ሳ": 158,
50
+ "ሴ": 28,
51
+ "ስ": 40,
52
+ "ሶ": 177,
53
+ "ሷ": 25,
54
+ "ሸ": 196,
55
+ "ሹ": 200,
56
+ "ሺ": 148,
57
+ "ሻ": 188,
58
+ "ሼ": 154,
59
+ "ሽ": 19,
60
+ "ሾ": 133,
61
+ "ቀ": 165,
62
+ "ቁ": 58,
63
+ "ቂ": 186,
64
+ "ቃ": 57,
65
+ "ቄ": 213,
66
+ "ቅ": 180,
67
+ "ቆ": 50,
68
+ "ቋ": 194,
69
+ "በ": 172,
70
+ "ቡ": 168,
71
+ "ቢ": 60,
72
+ "ባ": 108,
73
+ "ቤ": 65,
74
+ "ብ": 167,
75
+ "ቦ": 34,
76
+ "ቧ": 64,
77
+ "ቨ": 220,
78
+ "ቪ": 53,
79
+ "ቫ": 137,
80
+ "ቭ": 41,
81
+ "ቮ": 56,
82
+ "ተ": 100,
83
+ "ቱ": 159,
84
+ "ቲ": 119,
85
+ "ታ": 54,
86
+ "ቴ": 122,
87
+ "ት": 151,
88
+ "ቶ": 206,
89
+ "ቷ": 79,
90
+ "ቸ": 145,
91
+ "ቹ": 207,
92
+ "ቺ": 85,
93
+ "ቻ": 1,
94
+ "ቼ": 152,
95
+ "ች": 118,
96
+ "ቾ": 190,
97
+ "ቿ": 91,
98
+ "ኀ": 38,
99
+ "ኃ": 185,
100
+ "ኅ": 126,
101
+ "ኋ": 98,
102
+ "ነ": 75,
103
+ "ኑ": 131,
104
+ "ኒ": 192,
105
+ "ና": 184,
106
+ "ኔ": 130,
107
+ "ን": 10,
108
+ "ኖ": 55,
109
+ "ኗ": 178,
110
+ "ኘ": 106,
111
+ "ኙ": 129,
112
+ "ኛ": 46,
113
+ "ኝ": 47,
114
+ "ኞ": 169,
115
+ "ኟ": 175,
116
+ "አ": 171,
117
+ "ኡ": 223,
118
+ "ኢ": 134,
119
+ "ኤ": 0,
120
+ "እ": 80,
121
+ "ኦ": 90,
122
+ "ከ": 211,
123
+ "ኩ": 45,
124
+ "ኪ": 11,
125
+ "ካ": 87,
126
+ "ኬ": 164,
127
+ "ክ": 74,
128
+ "ኮ": 63,
129
+ "ኳ": 225,
130
+ "ኸ": 109,
131
+ "ኽ": 125,
132
+ "ወ": 212,
133
+ "ዊ": 12,
134
+ "ዋ": 179,
135
+ "ዌ": 138,
136
+ "ው": 29,
137
+ "ዎ": 120,
138
+ "ዐ": 71,
139
+ "ዑ": 217,
140
+ "ዒ": 150,
141
+ "ዓ": 7,
142
+ "ዕ": 107,
143
+ "ዖ": 227,
144
+ "ዘ": 147,
145
+ "ዙ": 128,
146
+ "ዚ": 13,
147
+ "ዛ": 141,
148
+ "ዜ": 208,
149
+ "ዝ": 163,
150
+ "ዞ": 97,
151
+ "ዟ": 174,
152
+ "ዢ": 219,
153
+ "ዣ": 33,
154
+ "ዤ": 157,
155
+ "ዥ": 224,
156
+ "ዦ": 182,
157
+ "የ": 183,
158
+ "ዩ": 31,
159
+ "ያ": 62,
160
+ "ዬ": 123,
161
+ "ይ": 173,
162
+ "ዮ": 70,
163
+ "ደ": 162,
164
+ "ዱ": 112,
165
+ "ዲ": 77,
166
+ "ዳ": 66,
167
+ "ዴ": 9,
168
+ "ድ": 26,
169
+ "ዶ": 153,
170
+ "ዷ": 43,
171
+ "ጀ": 176,
172
+ "ጁ": 149,
173
+ "ጂ": 201,
174
+ "ጃ": 84,
175
+ "ጄ": 209,
176
+ "ጅ": 95,
177
+ "ጆ": 88,
178
+ "ገ": 48,
179
+ "ጉ": 5,
180
+ "ጊ": 166,
181
+ "ጋ": 76,
182
+ "ጌ": 94,
183
+ "ግ": 117,
184
+ "ጎ": 35,
185
+ "ጓ": 210,
186
+ "ጠ": 221,
187
+ "ጡ": 52,
188
+ "ጢ": 37,
189
+ "ጣ": 69,
190
+ "ጤ": 68,
191
+ "ጥ": 61,
192
+ "ጦ": 23,
193
+ "ጧ": 142,
194
+ "ጨ": 214,
195
+ "ጩ": 187,
196
+ "ጪ": 222,
197
+ "ጫ": 83,
198
+ "ጬ": 113,
199
+ "ጭ": 139,
200
+ "ጮ": 14,
201
+ "ጲ": 24,
202
+ "ጴ": 132,
203
+ "ጵ": 16,
204
+ "ጶ": 22,
205
+ "ጸ": 216,
206
+ "ጹ": 143,
207
+ "ጺ": 6,
208
+ "ጻ": 155,
209
+ "ጽ": 136,
210
+ "ጾ": 135,
211
+ "ጿ": 30,
212
+ "ፀ": 115,
213
+ "ፁ": 99,
214
+ "ፃ": 124,
215
+ "ፅ": 104,
216
+ "ፈ": 81,
217
+ "ፉ": 161,
218
+ "ፊ": 8,
219
+ "ፋ": 36,
220
+ "ፌ": 199,
221
+ "ፍ": 193,
222
+ "ፎ": 96,
223
+ "ፏ": 73,
224
+ "ፑ": 205,
225
+ "ፒ": 44,
226
+ "ፓ": 191,
227
+ "ፔ": 42,
228
+ "ፕ": 59,
229
+ "ፖ": 198,
230
+ "“": 160,
231
+ "”": 189
232
  }