codebyzeb commited on
Commit
be85522
·
verified ·
1 Parent(s): 5ded8f0

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +59 -55
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -106,61 +106,65 @@
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
- "a": 4,
110
- "ʃ": 5,
111
- "m": 6,
112
- "e": 7,
113
- "h": 8,
114
- "d": 9,
115
- "s": 10,
116
- "f": 11,
117
- "t̠ʃ": 12,
118
- "l": 13,
119
- "ə": 14,
120
- "p": 15,
121
- "i": 16,
122
- "k": 17,
123
- "ɨ": 18,
124
- "n": 19,
125
- "": 20,
126
- "t": 21,
127
- "t̠ʃʲ": 22,
128
- "w": 23,
129
- "u": 24,
130
- "ts": 25,
131
- "": 26,
132
- "r": 27,
133
- "o": 28,
134
- "j": 29,
135
- "b": 30,
136
- "v": 31,
137
- "": 32,
138
- "ɔa": 33,
139
- "ɡ": 34,
140
- "z": 35,
141
- "ɾ": 36,
142
- "ea": 37,
143
- "": 38,
144
- "": 39,
145
- "ʒ": 40,
146
- "əɪ": 41,
147
- "d̠ʒ": 42,
148
- "": 43,
149
- "": 44,
150
- "": 45,
151
- "ŋ": 46,
152
- "tsʲ": 47,
153
- "": 48,
154
- "eo": 49,
155
- "d̠ʒʲ": 50,
156
- "": 51,
157
- "əʊ": 52,
158
- "": 53,
159
- "": 54,
160
- "ɾʲ": 55,
161
- "ɔ": 56,
162
- "": 57,
163
- "": 58
 
 
 
 
164
  },
165
  "unk_token": "UNK"
166
  }
 
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
+ "m": 4,
110
+ "": 5,
111
+ "": 6,
112
+ "d̠ʒ": 7,
113
+ "i": 8,
114
+ "v": 9,
115
+ "": 10,
116
+ "h": 11,
117
+ "u": 12,
118
+ "ʒ": 13,
119
+ "": 14,
120
+ "": 15,
121
+ "l": 16,
122
+ "ɾ̪": 17,
123
+ "t̠ʃ": 18,
124
+ "p": 19,
125
+ "j": 20,
126
+ "": 21,
127
+ "": 22,
128
+ "": 23,
129
+ "": 24,
130
+ "k": 25,
131
+ "w": 26,
132
+ "ɡ": 27,
133
+ "b": 28,
134
+ "t̠ʃʲ": 29,
135
+ "e̯ä": 30,
136
+ "ʃ": 31,
137
+ "ʃʲ": 32,
138
+ "ə": 33,
139
+ "o̯ä": 34,
140
+ "ɨ": 35,
141
+ "": 36,
142
+ "f": 37,
143
+ "t̪s̪": 38,
144
+ "": 39,
145
+ "əɪ": 40,
146
+ "": 41,
147
+ "tsʲ": 42,
148
+ "": 43,
149
+ "": 44,
150
+ "": 45,
151
+ "": 46,
152
+ "": 47,
153
+ "": 48,
154
+ "": 49,
155
+ "ɾʲ": 50,
156
+ "": 51,
157
+ "": 52,
158
+ "": 53,
159
+ "": 54,
160
+ "": 55,
161
+ "eo": 56,
162
+ "d̠ʒʲ": 57,
163
+ "": 58,
164
+ "pʲ": 59,
165
+ "əʊ": 60,
166
+ "fʲ": 61,
167
+ "oɪ": 62
168
  },
169
  "unk_token": "UNK"
170
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"a":4,"ʃ":5,"m":6,"e":7,"h":8,"d":9,"s":10,"f":11,"t̠ʃ":12,"l":13,"ə":14,"p":15,"i":16,"k":17,"ɨ":18,"n":19,"":20,"t":21,"t̠ʃʲ":22,"w":23,"u":24,"ts":25,"":26,"r":27,"o":28,"j":29,"b":30,"v":31,"":32,"ɔa":33,"ɡ":34,"z":35,"ɾ":36,"ea":37,"":38,"":39,"ʒ":40,"əɪ":41,"d̠ʒ":42,"":43,"":44,"":45,"ŋ":46,"tsʲ":47,"":48,"eo":49,"d̠ʒʲ":50,"":51,"əʊ":52,"":53,"":54,"ɾʲ":55,"ɔ":56,"":57,"oɪ":58}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"m":4,"":5,"":6,"d̠ʒ":7,"i":8,"v":9,"":10,"h":11,"u":12,"ʒ":13,"":14,"":15,"l":16,"ɾ̪":17,"t̠ʃ":18,"p":19,"j":20,"":21,"":22,"":23,"":24,"k":25,"w":26,"ɡ":27,"b":28,"t̠ʃʲ":29,"e̯ä":30,"ʃ":31,"ʃʲ":32,"ə":33,"o̯ä":34,"ɨ":35,"":36,"f":37,"t̪s̪":38,"":39,"əɪ":40,"":41,"tsʲ":42,"":43,"":44,"":45,"":46,"":47,"":48,"":49,"ɾʲ":50,"":51,"":52,"":53,"":54,"":55,"eo":56,"d̠ʒʲ":57,"dʲ":58,"pʲ":59,"əʊ":60,"fʲ":61,"oɪ":62}