codebyzeb commited on
Commit
26ee18d
·
verified ·
1 Parent(s): 1cd23db

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +46 -45
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -106,52 +106,53 @@
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
- "k": 4,
110
- "y": 5,
111
- "m": 6,
112
- "": 7,
113
- "s": 8,
114
- "t": 9,
115
- "": 10,
116
- "ŋ": 11,
117
- "a": 12,
118
- "i": 13,
119
- "n": 14,
120
- "ɛ": 15,
121
- "æ": 16,
122
  "z": 17,
123
- "ɡ": 18,
124
- "r": 19,
125
- "v": 20,
126
- "u": 21,
127
- "ɾ": 22,
128
- "d": 23,
129
- "ʊ": 24,
130
- "": 25,
131
- "ɲ": 26,
132
- "e": 27,
133
- "f": 28,
134
- "o": 29,
135
- "p": 30,
136
- "ʒ": 31,
137
- "t̠ʃ": 32,
138
- "d̠ʒ": 33,
139
- "": 34,
140
- "w": 35,
141
- "": 36,
142
- "j": 37,
143
- "ə": 38,
144
- "ũ": 39,
145
- "ɐ̃": 40,
146
- "l": 41,
147
- "b": 42,
148
- "x": 43,
149
- "ɔ": 44,
150
- "ʃ": 45,
151
- "": 46,
152
- "ɛʊ": 47,
153
- "ɔɪ": 48,
154
- "": 49
 
155
  },
156
  "unk_token": "UNK"
157
  }
 
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
+ "m": 4,
110
+ "a": 5,
111
+ "": 6,
112
+ "k": 7,
113
+ "ɛ": 8,
114
+ "ɾ": 9,
115
+ "u": 10,
116
+ "b": 11,
117
+ "e": 12,
118
+ "aʊ̯": 13,
119
+ "ɡ": 14,
120
+ "ɐ": 15,
121
+ "oɪ̯": 16,
122
  "z": 17,
123
+ "i": 18,
124
+ "": 19,
125
+ "": 20,
126
+ "eʊ̯": 21,
127
+ "": 22,
128
+ "v": 23,
129
+ "": 24,
130
+ "ɐ̃ʊ̯̃": 25,
131
+ "eɪ̯": 26,
132
+ "d̠ʒ": 27,
133
+ "ẽɪ̯̃": 28,
134
+ "p": 29,
135
+ "r": 30,
136
+ "ɔ": 31,
137
+ "o": 32,
138
+ "l": 33,
139
+ "ɐ̃": 34,
140
+ "": 35,
141
+ "f": 36,
142
+ "ɲ": 37,
143
+ "": 38,
144
+ "uɪ̯": 39,
145
+ "w": 40,
146
+ "ʒ": 41,
147
+ "iʊ̯": 42,
148
+ "ʃ": 43,
149
+ "oʊ̯": 44,
150
+ "aɪ̯": 45,
151
+ "ɔɪ̯": 46,
152
+ "ɣ": 47,
153
+ "ɛɪ̯": 48,
154
+ "ɛʊ̯": 49,
155
+ "ɪ̯": 50
156
  },
157
  "unk_token": "UNK"
158
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"k":4,"y":5,"m":6,"":7,"s":8,"t":9,"":10,"ŋ":11,"a":12,"i":13,"n":14,"ɛ":15,"æ":16,"z":17,"ɡ":18,"r":19,"v":20,"u":21,"ɾ":22,"d":23,"ʊ":24,"":25,"ɲ":26,"e":27,"f":28,"o":29,"p":30,"ʒ":31,"t̠ʃ":32,"d̠ʒ":33,"":34,"w":35,"":36,"j":37,"ə":38,"ũ":39,"ɐ̃":40,"l":41,"b":42,"x":43,"ɔ":44,"ʃ":45,"":46,"ɛʊ":47,"ɔɪ":48,"":49}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"m":4,"a":5,"":6,"k":7,"ɛ":8,"ɾ":9,"u":10,"b":11,"e":12,"aʊ̯":13,"ɡ":14,"ɐ":15,"oɪ̯":16,"z":17,"i":18,"":19,"":20,"eʊ̯":21,"":22,"v":23,"":24,"ɐ̃ʊ̯̃":25,"eɪ̯":26,"d̠ʒ":27,"ẽɪ̯̃":28,"p":29,"r":30,"ɔ":31,"o":32,"l":33,"ɐ̃":34,"":35,"f":36,"ɲ":37,"":38,"uɪ̯":39,"w":40,"ʒ":41,"iʊ̯":42,"ʃ":43,"oʊ̯":44,"aɪ̯":45,"ɔɪ̯":46,"ɣ":47,"ɛɪ̯":48,"ɛʊ̯":49,"ɪ̯":50}