codebyzeb commited on
Commit
127dad2
1 Parent(s): d0f7972

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +46 -61
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -113,68 +113,53 @@
113
  "PAD": 1,
114
  "WORD_BOUNDARY": 2,
115
  "UTT_BOUNDARY": 3,
116
- "d": 4,
117
- "": 5,
118
- "j": 6,
119
- "w": 7,
120
- "ɔ": 8,
121
- "n": 9,
122
- "t": 10,
123
- "ə": 11,
124
- "l": 12,
125
- "ʊ": 13,
126
- "k": 14,
127
- "æ": 15,
128
- "ð": 16,
129
- "ʌ": 17,
130
- "ɪ": 18,
131
- "s": 19,
132
- "ɛ": 20,
133
- "z": 21,
134
- "": 22,
135
- "ɹ": 23,
136
- "f": 24,
137
- "": 25,
138
- "ɡ": 26,
139
- "ɑ": 27,
140
- "h": 28,
141
- "p": 29,
142
- "b": 30,
143
- "i": 31,
144
- "t̠ʃ": 32,
145
- "": 33,
146
- "θ": 34,
147
- "ŋ": 35,
148
- "m": 36,
149
- "ɔɪ": 37,
150
- "": 38,
151
- "": 39,
152
- "v": 40,
153
- "ɜː": 41,
154
- "d̠ʒ": 42,
155
- "ʃ": 43,
156
- "": 44,
157
  "ʒ": 45,
158
- "ɑ̃": 46,
159
- "r": 47,
160
- "": 48,
161
- "x": 49,
162
- "ɬ": 50,
163
- "ç": 51,
164
- "e": 52,
165
- "o": 53,
166
- "ɛː": 54,
167
- "ɪː": 55,
168
- "u": 56,
169
- "q": 57,
170
- "tɕ": 58,
171
- "tʰ": 59,
172
- "ɯ": 60,
173
- "r̩": 61,
174
- "əʊ": 62,
175
- "a": 63,
176
- "ɒ": 64,
177
- "eə": 65
178
  },
179
  "unk_token": "UNK"
180
  }
 
113
  "PAD": 1,
114
  "WORD_BOUNDARY": 2,
115
  "UTT_BOUNDARY": 3,
116
+ "j": 4,
117
+ "ɛ": 5,
118
+ "h": 6,
119
+ "k": 7,
120
+ "ɑ": 8,
121
+ "m": 9,
122
+ "p": 10,
123
+ "": 11,
124
+ "n": 12,
125
+ "d": 13,
126
+ "z": 14,
127
+ "θ": 15,
128
+ "ɪ": 16,
129
+ "ŋ": 17,
130
+ "l": 18,
131
+ "": 19,
132
+ "s": 20,
133
+ "ɜː": 21,
134
+ "t": 22,
135
+ "w": 23,
136
+ "v": 24,
137
+ "ð": 25,
138
+ "æ": 26,
139
+ "ɔ": 27,
140
+ "ɹ": 28,
141
+ "ʌ": 29,
142
+ "f": 30,
143
+ "ə": 31,
144
+ "b": 32,
145
+ "": 33,
146
+ "": 34,
147
+ "": 35,
148
+ "d̠ʒ": 36,
149
+ "i": 37,
150
+ "": 38,
151
+ "": 39,
152
+ "ʊ": 40,
153
+ "ɡ": 41,
154
+ "t̠ʃ": 42,
155
+ "ɔɪ": 43,
156
+ "ʃ": 44,
157
  "ʒ": 45,
158
+ "r": 46,
159
+ "x": 47,
160
+ "ɬ": 48,
161
+ "ɑ̃": 49,
162
+ "": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  },
164
  "unk_token": "UNK"
165
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"d":4,"":5,"j":6,"w":7,"ɔ":8,"n":9,"t":10,"ə":11,"l":12,"ʊ":13,"k":14,"æ":15,"ð":16,"ʌ":17,"ɪ":18,"s":19,"ɛ":20,"z":21,"":22,"ɹ":23,"f":24,"":25,"ɡ":26,"ɑ":27,"h":28,"p":29,"b":30,"i":31,"t̠ʃ":32,"":33,"θ":34,"ŋ":35,"m":36,"ɔɪ":37,"":38,"":39,"v":40,"ɜː":41,"d̠ʒ":42,"ʃ":43,"":44,"ʒ":45,"ɑ̃":46,"r":47,"":48,"x":49,"ɬ":50,"ç":51,"e":52,"o":53,"ɛː":54,"ɪː":55,"u":56,"q":57,"tɕ":58,"tʰ":59,"ɯ":60,"r̩":61,"əʊ":62,"a":63,"ɒ":64,"eə":65}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"j":4,"ɛ":5,"h":6,"k":7,"ɑ":8,"m":9,"p":10,"":11,"n":12,"d":13,"z":14,"θ":15,"ɪ":16,"ŋ":17,"l":18,"":19,"s":20,"ɜː":21,"t":22,"w":23,"v":24,"ð":25,"æ":26,"ɔ":27,"ɹ":28,"ʌ":29,"f":30,"ə":31,"b":32,"":33,"":34,"":35,"d̠ʒ":36,"i":37,"":38,"":39,"ʊ":40,"ɡ":41,"t̠ʃ":42,"ɔɪ":43,"ʃ":44,"ʒ":45,"r":46,"x":47,"ɬ":48,"ɑ̃":49,"":50}