Alex Cabrera commited on
Commit
c24ff9a
·
1 Parent(s): c52f77a
.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .git
.gitattributes DELETED
@@ -1,34 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.zeno_cache/OUTPUTsilero_sst.pickle ADDED
Binary file (768 kB). View file
 
.zeno_cache/OUTPUTwhisper.pickle ADDED
Binary file (804 kB). View file
 
.zeno_cache/POSTDISTILLwer_msilero_sst.pickle ADDED
Binary file (63.7 kB). View file
 
.zeno_cache/POSTDISTILLwer_mwhisper.pickle ADDED
Binary file (63.7 kB). View file
 
.zeno_cache/PREDISTILLamplitude.pickle ADDED
Binary file (63.7 kB). View file
 
.zeno_cache/PREDISTILLcountry.pickle ADDED
Binary file (65 kB). View file
 
.zeno_cache/PREDISTILLlength.pickle ADDED
Binary file (55.1 kB). View file
 
.zeno_cache/folders.pickle ADDED
Binary file (28 Bytes). View file
 
.zeno_cache/reports.pickle ADDED
Binary file (7.95 kB). View file
 
.zeno_cache/slices.pickle ADDED
Binary file (2.87 kB). View file
 
.zeno_cache/view.mjs ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ function noop() { }
2
+ function run(fn) {
3
+ return fn();
4
+ }
5
+ function blank_object() {
6
+ return Object.create(null);
7
+ }
8
+ function run_all(fns) {
9
+ fns.forEach(run);
10
+ }
11
+ function is_function(thing) {
12
+ return typeof thing === 'function';
13
+ }
14
+ function safe_not_equal(a, b) {
15
+ return a != a ? b == b : a !== b || ((a && typeof a === 'object') || typeof a === 'function');
16
+ }
17
+ let src_url_equal_anchor;
18
+ function src_url_equal(element_src, url) {
19
+ if (!src_url_equal_anchor) {
20
+ src_url_equal_anchor = document.createElement('a');
21
+ }
22
+ src_url_equal_anchor.href = url;
23
+ return element_src === src_url_equal_anchor.href;
24
+ }
25
+ function is_empty(obj) {
26
+ return Object.keys(obj).length === 0;
27
+ }
28
+
29
+ // Track which nodes are claimed during hydration. Unclaimed nodes can then be removed from the DOM
30
+ // at the end of hydration without touching the remaining nodes.
31
+ let is_hydrating = false;
32
+ function start_hydrating() {
33
+ is_hydrating = true;
34
+ }
35
+ function end_hydrating() {
36
+ is_hydrating = false;
37
+ }
38
+ function upper_bound(low, high, key, value) {
39
+ // Return first index of value larger than input value in the range [low, high)
40
+ while (low < high) {
41
+ const mid = low + ((high - low) >> 1);
42
+ if (key(mid) <= value) {
43
+ low = mid + 1;
44
+ }
45
+ else {
46
+ high = mid;
47
+ }
48
+ }
49
+ return low;
50
+ }
51
+ function init_hydrate(target) {
52
+ if (target.hydrate_init)
53
+ return;
54
+ target.hydrate_init = true;
55
+ // We know that all children have claim_order values since the unclaimed have been detached if target is not <head>
56
+ let children = target.childNodes;
57
+ // If target is <head>, there may be children without claim_order
58
+ if (target.nodeName === 'HEAD') {
59
+ const myChildren = [];
60
+ for (let i = 0; i < children.length; i++) {
61
+ const node = children[i];
62
+ if (node.claim_order !== undefined) {
63
+ myChildren.push(node);
64
+ }
65
+ }
66
+ children = myChildren;
67
+ }
68
+ /*
69
+ * Reorder claimed children optimally.
70
+ * We can reorder claimed children optimally by finding the longest subsequence of
71
+ * nodes that are already claimed in order and only moving the rest. The longest
72
+ * subsequence subsequence of nodes that are claimed in order can be found by
73
+ * computing the longest increasing subsequence of .claim_order values.
74
+ *
75
+ * This algorithm is optimal in generating the least amount of reorder operations
76
+ * possible.
77
+ *
78
+ * Proof:
79
+ * We know that, given a set of reordering operations, the nodes that do not move
80
+ * always form an increasing subsequence, since they do not move among each other
81
+ * meaning that they must be already ordered among each other. Thus, the maximal
82
+ * set of nodes that do not move form a longest increasing subsequence.
83
+ */
84
+ // Compute longest increasing subsequence
85
+ // m: subsequence length j => index k of smallest value that ends an increasing subsequence of length j
86
+ const m = new Int32Array(children.length + 1);
87
+ // Predecessor indices + 1
88
+ const p = new Int32Array(children.length);
89
+ m[0] = -1;
90
+ let longest = 0;
91
+ for (let i = 0; i < children.length; i++) {
92
+ const current = children[i].claim_order;
93
+ // Find the largest subsequence length such that it ends in a value less than our current value
94
+ // upper_bound returns first greater value, so we subtract one
95
+ // with fast path for when we are on the current longest subsequence
96
+ const seqLen = ((longest > 0 && children[m[longest]].claim_order <= current) ? longest + 1 : upper_bound(1, longest, idx => children[m[idx]].claim_order, current)) - 1;
97
+ p[i] = m[seqLen] + 1;
98
+ const newLen = seqLen + 1;
99
+ // We can guarantee that current is the smallest value. Otherwise, we would have generated a longer sequence.
100
+ m[newLen] = i;
101
+ longest = Math.max(newLen, longest);
102
+ }
103
+ // The longest increasing subsequence of nodes (initially reversed)
104
+ const lis = [];
105
+ // The rest of the nodes, nodes that will be moved
106
+ const toMove = [];
107
+ let last = children.length - 1;
108
+ for (let cur = m[longest] + 1; cur != 0; cur = p[cur - 1]) {
109
+ lis.push(children[cur - 1]);
110
+ for (; last >= cur; last--) {
111
+ toMove.push(children[last]);
112
+ }
113
+ last--;
114
+ }
115
+ for (; last >= 0; last--) {
116
+ toMove.push(children[last]);
117
+ }
118
+ lis.reverse();
119
+ // We sort the nodes being moved to guarantee that their insertion order matches the claim order
120
+ toMove.sort((a, b) => a.claim_order - b.claim_order);
121
+ // Finally, we move the nodes
122
+ for (let i = 0, j = 0; i < toMove.length; i++) {
123
+ while (j < lis.length && toMove[i].claim_order >= lis[j].claim_order) {
124
+ j++;
125
+ }
126
+ const anchor = j < lis.length ? lis[j] : null;
127
+ target.insertBefore(toMove[i], anchor);
128
+ }
129
+ }
130
+ function append(target, node) {
131
+ target.appendChild(node);
132
+ }
133
+ function append_styles(target, style_sheet_id, styles) {
134
+ const append_styles_to = get_root_for_style(target);
135
+ if (!append_styles_to.getElementById(style_sheet_id)) {
136
+ const style = element('style');
137
+ style.id = style_sheet_id;
138
+ style.textContent = styles;
139
+ append_stylesheet(append_styles_to, style);
140
+ }
141
+ }
142
+ function get_root_for_style(node) {
143
+ if (!node)
144
+ return document;
145
+ const root = node.getRootNode ? node.getRootNode() : node.ownerDocument;
146
+ if (root && root.host) {
147
+ return root;
148
+ }
149
+ return node.ownerDocument;
150
+ }
151
+ function append_stylesheet(node, style) {
152
+ append(node.head || node, style);
153
+ }
154
+ function append_hydration(target, node) {
155
+ if (is_hydrating) {
156
+ init_hydrate(target);
157
+ if ((target.actual_end_child === undefined) || ((target.actual_end_child !== null) && (target.actual_end_child.parentElement !== target))) {
158
+ target.actual_end_child = target.firstChild;
159
+ }
160
+ // Skip nodes of undefined ordering
161
+ while ((target.actual_end_child !== null) && (target.actual_end_child.claim_order === undefined)) {
162
+ target.actual_end_child = target.actual_end_child.nextSibling;
163
+ }
164
+ if (node !== target.actual_end_child) {
165
+ // We only insert if the ordering of this node should be modified or the parent node is not target
166
+ if (node.claim_order !== undefined || node.parentNode !== target) {
167
+ target.insertBefore(node, target.actual_end_child);
168
+ }
169
+ }
170
+ else {
171
+ target.actual_end_child = node.nextSibling;
172
+ }
173
+ }
174
+ else if (node.parentNode !== target || node.nextSibling !== null) {
175
+ target.appendChild(node);
176
+ }
177
+ }
178
+ function insert_hydration(target, node, anchor) {
179
+ if (is_hydrating && !anchor) {
180
+ append_hydration(target, node);
181
+ }
182
+ else if (node.parentNode !== target || node.nextSibling != anchor) {
183
+ target.insertBefore(node, anchor || null);
184
+ }
185
+ }
186
+ function detach(node) {
187
+ node.parentNode.removeChild(node);
188
+ }
189
+ function element(name) {
190
+ return document.createElement(name);
191
+ }
192
+ function text(data) {
193
+ return document.createTextNode(data);
194
+ }
195
+ function space() {
196
+ return text(' ');
197
+ }
198
+ function attr(node, attribute, value) {
199
+ if (value == null)
200
+ node.removeAttribute(attribute);
201
+ else if (node.getAttribute(attribute) !== value)
202
+ node.setAttribute(attribute, value);
203
+ }
204
+ function children(element) {
205
+ return Array.from(element.childNodes);
206
+ }
207
+ function init_claim_info(nodes) {
208
+ if (nodes.claim_info === undefined) {
209
+ nodes.claim_info = { last_index: 0, total_claimed: 0 };
210
+ }
211
+ }
212
+ function claim_node(nodes, predicate, processNode, createNode, dontUpdateLastIndex = false) {
213
+ // Try to find nodes in an order such that we lengthen the longest increasing subsequence
214
+ init_claim_info(nodes);
215
+ const resultNode = (() => {
216
+ // We first try to find an element after the previous one
217
+ for (let i = nodes.claim_info.last_index; i < nodes.length; i++) {
218
+ const node = nodes[i];
219
+ if (predicate(node)) {
220
+ const replacement = processNode(node);
221
+ if (replacement === undefined) {
222
+ nodes.splice(i, 1);
223
+ }
224
+ else {
225
+ nodes[i] = replacement;
226
+ }
227
+ if (!dontUpdateLastIndex) {
228
+ nodes.claim_info.last_index = i;
229
+ }
230
+ return node;
231
+ }
232
+ }
233
+ // Otherwise, we try to find one before
234
+ // We iterate in reverse so that we don't go too far back
235
+ for (let i = nodes.claim_info.last_index - 1; i >= 0; i--) {
236
+ const node = nodes[i];
237
+ if (predicate(node)) {
238
+ const replacement = processNode(node);
239
+ if (replacement === undefined) {
240
+ nodes.splice(i, 1);
241
+ }
242
+ else {
243
+ nodes[i] = replacement;
244
+ }
245
+ if (!dontUpdateLastIndex) {
246
+ nodes.claim_info.last_index = i;
247
+ }
248
+ else if (replacement === undefined) {
249
+ // Since we spliced before the last_index, we decrease it
250
+ nodes.claim_info.last_index--;
251
+ }
252
+ return node;
253
+ }
254
+ }
255
+ // If we can't find any matching node, we create a new one
256
+ return createNode();
257
+ })();
258
+ resultNode.claim_order = nodes.claim_info.total_claimed;
259
+ nodes.claim_info.total_claimed += 1;
260
+ return resultNode;
261
+ }
262
+ function claim_element_base(nodes, name, attributes, create_element) {
263
+ return claim_node(nodes, (node) => node.nodeName === name, (node) => {
264
+ const remove = [];
265
+ for (let j = 0; j < node.attributes.length; j++) {
266
+ const attribute = node.attributes[j];
267
+ if (!attributes[attribute.name]) {
268
+ remove.push(attribute.name);
269
+ }
270
+ }
271
+ remove.forEach(v => node.removeAttribute(v));
272
+ return undefined;
273
+ }, () => create_element(name));
274
+ }
275
+ function claim_element(nodes, name, attributes) {
276
+ return claim_element_base(nodes, name, attributes, element);
277
+ }
278
+ function claim_text(nodes, data) {
279
+ return claim_node(nodes, (node) => node.nodeType === 3, (node) => {
280
+ const dataStr = '' + data;
281
+ if (node.data.startsWith(dataStr)) {
282
+ if (node.data.length !== dataStr.length) {
283
+ return node.splitText(dataStr.length);
284
+ }
285
+ }
286
+ else {
287
+ node.data = dataStr;
288
+ }
289
+ }, () => text(data), true // Text nodes should not update last index since it is likely not worth it to eliminate an increasing subsequence of actual elements
290
+ );
291
+ }
292
+ function claim_space(nodes) {
293
+ return claim_text(nodes, ' ');
294
+ }
295
+ function set_data(text, data) {
296
+ data = '' + data;
297
+ if (text.wholeText !== data)
298
+ text.data = data;
299
+ }
300
+ function set_style(node, key, value, important) {
301
+ if (value === null) {
302
+ node.style.removeProperty(key);
303
+ }
304
+ else {
305
+ node.style.setProperty(key, value, important ? 'important' : '');
306
+ }
307
+ }
308
+
309
+ let current_component;
310
+ function set_current_component(component) {
311
+ current_component = component;
312
+ }
313
+
314
+ const dirty_components = [];
315
+ const binding_callbacks = [];
316
+ const render_callbacks = [];
317
+ const flush_callbacks = [];
318
+ const resolved_promise = Promise.resolve();
319
+ let update_scheduled = false;
320
+ function schedule_update() {
321
+ if (!update_scheduled) {
322
+ update_scheduled = true;
323
+ resolved_promise.then(flush);
324
+ }
325
+ }
326
+ function add_render_callback(fn) {
327
+ render_callbacks.push(fn);
328
+ }
329
+ // flush() calls callbacks in this order:
330
+ // 1. All beforeUpdate callbacks, in order: parents before children
331
+ // 2. All bind:this callbacks, in reverse order: children before parents.
332
+ // 3. All afterUpdate callbacks, in order: parents before children. EXCEPT
333
+ // for afterUpdates called during the initial onMount, which are called in
334
+ // reverse order: children before parents.
335
+ // Since callbacks might update component values, which could trigger another
336
+ // call to flush(), the following steps guard against this:
337
+ // 1. During beforeUpdate, any updated components will be added to the
338
+ // dirty_components array and will cause a reentrant call to flush(). Because
339
+ // the flush index is kept outside the function, the reentrant call will pick
340
+ // up where the earlier call left off and go through all dirty components. The
341
+ // current_component value is saved and restored so that the reentrant call will
342
+ // not interfere with the "parent" flush() call.
343
+ // 2. bind:this callbacks cannot trigger new flush() calls.
344
+ // 3. During afterUpdate, any updated components will NOT have their afterUpdate
345
+ // callback called a second time; the seen_callbacks set, outside the flush()
346
+ // function, guarantees this behavior.
347
+ const seen_callbacks = new Set();
348
+ let flushidx = 0; // Do *not* move this inside the flush() function
349
+ function flush() {
350
+ const saved_component = current_component;
351
+ do {
352
+ // first, call beforeUpdate functions
353
+ // and update components
354
+ while (flushidx < dirty_components.length) {
355
+ const component = dirty_components[flushidx];
356
+ flushidx++;
357
+ set_current_component(component);
358
+ update(component.$$);
359
+ }
360
+ set_current_component(null);
361
+ dirty_components.length = 0;
362
+ flushidx = 0;
363
+ while (binding_callbacks.length)
364
+ binding_callbacks.pop()();
365
+ // then, once components are updated, call
366
+ // afterUpdate functions. This may cause
367
+ // subsequent updates...
368
+ for (let i = 0; i < render_callbacks.length; i += 1) {
369
+ const callback = render_callbacks[i];
370
+ if (!seen_callbacks.has(callback)) {
371
+ // ...so guard against infinite loops
372
+ seen_callbacks.add(callback);
373
+ callback();
374
+ }
375
+ }
376
+ render_callbacks.length = 0;
377
+ } while (dirty_components.length);
378
+ while (flush_callbacks.length) {
379
+ flush_callbacks.pop()();
380
+ }
381
+ update_scheduled = false;
382
+ seen_callbacks.clear();
383
+ set_current_component(saved_component);
384
+ }
385
+ function update($$) {
386
+ if ($$.fragment !== null) {
387
+ $$.update();
388
+ run_all($$.before_update);
389
+ const dirty = $$.dirty;
390
+ $$.dirty = [-1];
391
+ $$.fragment && $$.fragment.p($$.ctx, dirty);
392
+ $$.after_update.forEach(add_render_callback);
393
+ }
394
+ }
395
+ const outroing = new Set();
396
+ function transition_in(block, local) {
397
+ if (block && block.i) {
398
+ outroing.delete(block);
399
+ block.i(local);
400
+ }
401
+ }
402
+ function mount_component(component, target, anchor, customElement) {
403
+ const { fragment, on_mount, on_destroy, after_update } = component.$$;
404
+ fragment && fragment.m(target, anchor);
405
+ if (!customElement) {
406
+ // onMount happens before the initial afterUpdate
407
+ add_render_callback(() => {
408
+ const new_on_destroy = on_mount.map(run).filter(is_function);
409
+ if (on_destroy) {
410
+ on_destroy.push(...new_on_destroy);
411
+ }
412
+ else {
413
+ // Edge case - component was destroyed immediately,
414
+ // most likely as a result of a binding initialising
415
+ run_all(new_on_destroy);
416
+ }
417
+ component.$$.on_mount = [];
418
+ });
419
+ }
420
+ after_update.forEach(add_render_callback);
421
+ }
422
+ function destroy_component(component, detaching) {
423
+ const $$ = component.$$;
424
+ if ($$.fragment !== null) {
425
+ run_all($$.on_destroy);
426
+ $$.fragment && $$.fragment.d(detaching);
427
+ // TODO null out other refs, including component.$$ (but need to
428
+ // preserve final state?)
429
+ $$.on_destroy = $$.fragment = null;
430
+ $$.ctx = [];
431
+ }
432
+ }
433
+ function make_dirty(component, i) {
434
+ if (component.$$.dirty[0] === -1) {
435
+ dirty_components.push(component);
436
+ schedule_update();
437
+ component.$$.dirty.fill(0);
438
+ }
439
+ component.$$.dirty[(i / 31) | 0] |= (1 << (i % 31));
440
+ }
441
+ function init(component, options, instance, create_fragment, not_equal, props, append_styles, dirty = [-1]) {
442
+ const parent_component = current_component;
443
+ set_current_component(component);
444
+ const $$ = component.$$ = {
445
+ fragment: null,
446
+ ctx: null,
447
+ // state
448
+ props,
449
+ update: noop,
450
+ not_equal,
451
+ bound: blank_object(),
452
+ // lifecycle
453
+ on_mount: [],
454
+ on_destroy: [],
455
+ on_disconnect: [],
456
+ before_update: [],
457
+ after_update: [],
458
+ context: new Map(options.context || (parent_component ? parent_component.$$.context : [])),
459
+ // everything else
460
+ callbacks: blank_object(),
461
+ dirty,
462
+ skip_bound: false,
463
+ root: options.target || parent_component.$$.root
464
+ };
465
+ append_styles && append_styles($$.root);
466
+ let ready = false;
467
+ $$.ctx = instance
468
+ ? instance(component, options.props || {}, (i, ret, ...rest) => {
469
+ const value = rest.length ? rest[0] : ret;
470
+ if ($$.ctx && not_equal($$.ctx[i], $$.ctx[i] = value)) {
471
+ if (!$$.skip_bound && $$.bound[i])
472
+ $$.bound[i](value);
473
+ if (ready)
474
+ make_dirty(component, i);
475
+ }
476
+ return ret;
477
+ })
478
+ : [];
479
+ $$.update();
480
+ ready = true;
481
+ run_all($$.before_update);
482
+ // `false` as a special case of no DOM component
483
+ $$.fragment = create_fragment ? create_fragment($$.ctx) : false;
484
+ if (options.target) {
485
+ if (options.hydrate) {
486
+ start_hydrating();
487
+ const nodes = children(options.target);
488
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
489
+ $$.fragment && $$.fragment.l(nodes);
490
+ nodes.forEach(detach);
491
+ }
492
+ else {
493
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
494
+ $$.fragment && $$.fragment.c();
495
+ }
496
+ if (options.intro)
497
+ transition_in(component.$$.fragment);
498
+ mount_component(component, options.target, options.anchor, options.customElement);
499
+ end_hydrating();
500
+ flush();
501
+ }
502
+ set_current_component(parent_component);
503
+ }
504
+ /**
505
+ * Base class for Svelte components. Used when dev=false.
506
+ */
507
+ class SvelteComponent {
508
+ $destroy() {
509
+ destroy_component(this, 1);
510
+ this.$destroy = noop;
511
+ }
512
+ $on(type, callback) {
513
+ const callbacks = (this.$$.callbacks[type] || (this.$$.callbacks[type] = []));
514
+ callbacks.push(callback);
515
+ return () => {
516
+ const index = callbacks.indexOf(callback);
517
+ if (index !== -1)
518
+ callbacks.splice(index, 1);
519
+ };
520
+ }
521
+ $set($$props) {
522
+ if (this.$$set && !is_empty($$props)) {
523
+ this.$$.skip_bound = true;
524
+ this.$$set($$props);
525
+ this.$$.skip_bound = false;
526
+ }
527
+ }
528
+ }
529
+
530
+ /* src/InstanceView.svelte generated by Svelte v3.49.0 */
531
+
532
+ function add_css(target) {
533
+ append_styles(target, "svelte-1qkvlix", ".label.svelte-1qkvlix{font-size:12px;color:rgba(0, 0, 0, 0.5);font-variant:small-caps}.value.svelte-1qkvlix{font-size:12px}.box.svelte-1qkvlix{padding:10px;border:0.5px solid rgb(224, 224, 224);max-width:400px}#container.svelte-1qkvlix{display:flex;flex-direction:row;flex-wrap:wrap}spectrogram canvas{z-index:0 !important}wave canvas{z-index:0 !important}wave{z-index:0 !important}");
534
+ }
535
+
536
+ // (27:4) {#if modelColumn && entry[modelColumn] !== undefined}
537
+ function create_if_block(ctx) {
538
+ let br;
539
+ let t0;
540
+ let span0;
541
+ let t1;
542
+ let t2;
543
+ let span1;
544
+ let t3_value = /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] + "";
545
+ let t3;
546
+
547
+ return {
548
+ c() {
549
+ br = element("br");
550
+ t0 = space();
551
+ span0 = element("span");
552
+ t1 = text("output:");
553
+ t2 = space();
554
+ span1 = element("span");
555
+ t3 = text(t3_value);
556
+ this.h();
557
+ },
558
+ l(nodes) {
559
+ br = claim_element(nodes, "BR", {});
560
+ t0 = claim_space(nodes);
561
+ span0 = claim_element(nodes, "SPAN", { class: true });
562
+ var span0_nodes = children(span0);
563
+ t1 = claim_text(span0_nodes, "output:");
564
+ span0_nodes.forEach(detach);
565
+ t2 = claim_space(nodes);
566
+ span1 = claim_element(nodes, "SPAN", { class: true });
567
+ var span1_nodes = children(span1);
568
+ t3 = claim_text(span1_nodes, t3_value);
569
+ span1_nodes.forEach(detach);
570
+ this.h();
571
+ },
572
+ h() {
573
+ attr(span0, "class", "label svelte-1qkvlix");
574
+ attr(span1, "class", "value svelte-1qkvlix");
575
+ },
576
+ m(target, anchor) {
577
+ insert_hydration(target, br, anchor);
578
+ insert_hydration(target, t0, anchor);
579
+ insert_hydration(target, span0, anchor);
580
+ append_hydration(span0, t1);
581
+ insert_hydration(target, t2, anchor);
582
+ insert_hydration(target, span1, anchor);
583
+ append_hydration(span1, t3);
584
+ },
585
+ p(ctx, dirty) {
586
+ if (dirty & /*entry, modelColumn*/ 3 && t3_value !== (t3_value = /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] + "")) set_data(t3, t3_value);
587
+ },
588
+ d(detaching) {
589
+ if (detaching) detach(br);
590
+ if (detaching) detach(t0);
591
+ if (detaching) detach(span0);
592
+ if (detaching) detach(t2);
593
+ if (detaching) detach(span1);
594
+ }
595
+ };
596
+ }
597
+
598
+ function create_fragment(ctx) {
599
+ let div2;
600
+ let div1;
601
+ let div0;
602
+ let audio;
603
+ let source;
604
+ let source_src_value;
605
+ let source_type_value;
606
+ let audio_src_value;
607
+ let t0;
608
+ let span0;
609
+ let t1;
610
+ let span1;
611
+ let t2_value = /*entry*/ ctx[0][/*labelColumn*/ ctx[2]] + "";
612
+ let t2;
613
+ let t3;
614
+ let if_block = /*modelColumn*/ ctx[1] && /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] !== undefined && create_if_block(ctx);
615
+
616
+ return {
617
+ c() {
618
+ div2 = element("div");
619
+ div1 = element("div");
620
+ div0 = element("div");
621
+ audio = element("audio");
622
+ source = element("source");
623
+ t0 = space();
624
+ span0 = element("span");
625
+ t1 = text("label: ");
626
+ span1 = element("span");
627
+ t2 = text(t2_value);
628
+ t3 = space();
629
+ if (if_block) if_block.c();
630
+ this.h();
631
+ },
632
+ l(nodes) {
633
+ div2 = claim_element(nodes, "DIV", { id: true, class: true });
634
+ var div2_nodes = children(div2);
635
+ div1 = claim_element(div2_nodes, "DIV", { class: true });
636
+ var div1_nodes = children(div1);
637
+ div0 = claim_element(div1_nodes, "DIV", {});
638
+ var div0_nodes = children(div0);
639
+ audio = claim_element(div0_nodes, "AUDIO", { src: true });
640
+ var audio_nodes = children(audio);
641
+ source = claim_element(audio_nodes, "SOURCE", { src: true, type: true });
642
+ audio_nodes.forEach(detach);
643
+ div0_nodes.forEach(detach);
644
+ t0 = claim_space(div1_nodes);
645
+ span0 = claim_element(div1_nodes, "SPAN", { class: true });
646
+ var span0_nodes = children(span0);
647
+ t1 = claim_text(span0_nodes, "label: ");
648
+ span0_nodes.forEach(detach);
649
+ span1 = claim_element(div1_nodes, "SPAN", { class: true });
650
+ var span1_nodes = children(span1);
651
+ t2 = claim_text(span1_nodes, t2_value);
652
+ span1_nodes.forEach(detach);
653
+ t3 = claim_space(div1_nodes);
654
+ if (if_block) if_block.l(div1_nodes);
655
+ div1_nodes.forEach(detach);
656
+ div2_nodes.forEach(detach);
657
+ this.h();
658
+ },
659
+ h() {
660
+ if (!src_url_equal(source.src, source_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) attr(source, "src", source_src_value);
661
+ attr(source, "type", source_type_value = "audio/" + /*entry*/ ctx[0][/*idColumn*/ ctx[4]].split(".").at(-1));
662
+ audio.controls = true;
663
+ if (!src_url_equal(audio.src, audio_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) attr(audio, "src", audio_src_value);
664
+ set_style(div0, "display", `flex`, false);
665
+ attr(span0, "class", "label svelte-1qkvlix");
666
+ attr(span1, "class", "value svelte-1qkvlix");
667
+ attr(div1, "class", "box svelte-1qkvlix");
668
+ attr(div2, "id", "container");
669
+ attr(div2, "class", "svelte-1qkvlix");
670
+ },
671
+ m(target, anchor) {
672
+ insert_hydration(target, div2, anchor);
673
+ append_hydration(div2, div1);
674
+ append_hydration(div1, div0);
675
+ append_hydration(div0, audio);
676
+ append_hydration(audio, source);
677
+ append_hydration(div1, t0);
678
+ append_hydration(div1, span0);
679
+ append_hydration(span0, t1);
680
+ append_hydration(div1, span1);
681
+ append_hydration(span1, t2);
682
+ append_hydration(div1, t3);
683
+ if (if_block) if_block.m(div1, null);
684
+ },
685
+ p(ctx, [dirty]) {
686
+ if (dirty & /*entry, dataColumn*/ 9 && !src_url_equal(source.src, source_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) {
687
+ attr(source, "src", source_src_value);
688
+ }
689
+
690
+ if (dirty & /*entry, idColumn*/ 17 && source_type_value !== (source_type_value = "audio/" + /*entry*/ ctx[0][/*idColumn*/ ctx[4]].split(".").at(-1))) {
691
+ attr(source, "type", source_type_value);
692
+ }
693
+
694
+ if (dirty & /*entry, dataColumn*/ 9 && !src_url_equal(audio.src, audio_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) {
695
+ attr(audio, "src", audio_src_value);
696
+ }
697
+
698
+ if (dirty & /*entry, labelColumn*/ 5 && t2_value !== (t2_value = /*entry*/ ctx[0][/*labelColumn*/ ctx[2]] + "")) set_data(t2, t2_value);
699
+
700
+ if (/*modelColumn*/ ctx[1] && /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] !== undefined) {
701
+ if (if_block) {
702
+ if_block.p(ctx, dirty);
703
+ } else {
704
+ if_block = create_if_block(ctx);
705
+ if_block.c();
706
+ if_block.m(div1, null);
707
+ }
708
+ } else if (if_block) {
709
+ if_block.d(1);
710
+ if_block = null;
711
+ }
712
+ },
713
+ i: noop,
714
+ o: noop,
715
+ d(detaching) {
716
+ if (detaching) detach(div2);
717
+ if (if_block) if_block.d();
718
+ }
719
+ };
720
+ }
721
+
722
+ function instance($$self, $$props, $$invalidate) {
723
+ let { entry } = $$props;
724
+ let { options } = $$props;
725
+ let { modelColumn } = $$props;
726
+ let { labelColumn } = $$props;
727
+ let { dataColumn } = $$props;
728
+ let { idColumn } = $$props;
729
+
730
+ $$self.$$set = $$props => {
731
+ if ('entry' in $$props) $$invalidate(0, entry = $$props.entry);
732
+ if ('options' in $$props) $$invalidate(5, options = $$props.options);
733
+ if ('modelColumn' in $$props) $$invalidate(1, modelColumn = $$props.modelColumn);
734
+ if ('labelColumn' in $$props) $$invalidate(2, labelColumn = $$props.labelColumn);
735
+ if ('dataColumn' in $$props) $$invalidate(3, dataColumn = $$props.dataColumn);
736
+ if ('idColumn' in $$props) $$invalidate(4, idColumn = $$props.idColumn);
737
+ };
738
+
739
+ return [entry, modelColumn, labelColumn, dataColumn, idColumn, options];
740
+ }
741
+
742
+ class InstanceView extends SvelteComponent {
743
+ constructor(options) {
744
+ super();
745
+
746
+ init(
747
+ this,
748
+ options,
749
+ instance,
750
+ create_fragment,
751
+ safe_not_equal,
752
+ {
753
+ entry: 0,
754
+ options: 5,
755
+ modelColumn: 1,
756
+ labelColumn: 2,
757
+ dataColumn: 3,
758
+ idColumn: 4
759
+ },
760
+ add_css
761
+ );
762
+ }
763
+ }
764
+
765
+ function getInstance(
766
+ div,
767
+ options,
768
+ entry,
769
+ modelColumn,
770
+ labelColumn,
771
+ dataColumn,
772
+ idColumn
773
+ ) {
774
+ new InstanceView({
775
+ target: div,
776
+ props: {
777
+ entry: entry,
778
+ options: options,
779
+ modelColumn: modelColumn,
780
+ labelColumn: labelColumn,
781
+ dataColumn: dataColumn,
782
+ idColumn: idColumn,
783
+ },
784
+ hydrate: true,
785
+ });
786
+ }
787
+
788
+ export { getInstance };
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.8
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ # Set home to the user's home directory
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH
11
+ WORKDIR $HOME/app
12
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
13
+ COPY --chown=user . $HOME/app
14
+ ADD --chown=user ./.zeno_cache $HOME/app/.zeno_cache
15
+ RUN chown user:user -R $HOME/app
16
+
17
+ COPY ./requirements.txt /code/requirements.txt
18
+
19
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
20
+
21
+
22
+ CMD ["zeno", "config.toml"]
config.toml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ view = "audio-transcription"
2
+ functions = "./functions/"
3
+ models = ["silero_sst", "whisper"]
4
+ metadata = "metadata.csv"
5
+ data_path = "/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/"
6
+ data_column = "id"
7
+ id_column = "id"
8
+ label_column = "label"
9
+ port = 7860
10
+ host = "0.0.0.0"
11
+ editable = false
functions/audio_characteristics.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import librosa
4
+ import numpy as np
5
+ from zeno import ZenoOptions, distill, DistillReturn
6
+
7
+
8
+ @distill
9
+ def amplitude(df, ops: ZenoOptions):
10
+ files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
11
+ amps = []
12
+ for audio in files:
13
+ y, _ = librosa.load(audio)
14
+ amps.append(float(np.abs(y).mean()))
15
+ return DistillReturn(distill_output=amps)
16
+
17
+
18
+ @distill
19
+ def length(df, ops: ZenoOptions):
20
+ files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
21
+ amps = []
22
+ for audio in files:
23
+ y, _ = librosa.load(audio)
24
+ amps.append(len(y))
25
+ return DistillReturn(distill_output=amps)
functions/model.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+
4
+ import pandas as pd
5
+ import torch
6
+ import whisper
7
+ from jiwer import wer
8
+
9
+ from zeno import (
10
+ ZenoOptions,
11
+ distill,
12
+ metric,
13
+ model,
14
+ DistillReturn,
15
+ ModelReturn,
16
+ MetricReturn,
17
+ )
18
+
19
+
20
+ @model
21
+ def load_model(model_path):
22
+ if "sst" in model_path:
23
+ device = torch.device("cpu")
24
+ model, decoder, utils = torch.hub.load(
25
+ repo_or_dir="snakers4/silero-models",
26
+ model="silero_stt",
27
+ language="en",
28
+ device=device,
29
+ )
30
+ (read_batch, _, _, prepare_model_input) = utils
31
+
32
+ def pred(df, ops: ZenoOptions):
33
+ files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
34
+ input = prepare_model_input(read_batch(files), device=device)
35
+ return ModelReturn(model_output=[decoder(x.cpu()) for x in model(input)])
36
+
37
+ return pred
38
+
39
+ elif "whisper" in model_path:
40
+ model = whisper.load_model("tiny")
41
+
42
+ def pred(df, ops: ZenoOptions):
43
+ files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
44
+ outs = []
45
+ for f in files:
46
+ outs.append(model.transcribe(f)["text"])
47
+ return ModelReturn(model_output=outs)
48
+
49
+ return pred
50
+
51
+
52
+ @distill
53
+ def country(df, ops: ZenoOptions):
54
+ if df["birthplace"][0] == df["birthplace"][0]:
55
+ return DistillReturn(distill_output=[df["birthplace"].str.split(", ")[-1][-1]])
56
+ return DistillReturn(distill_output=[""] * len(df))
57
+
58
+
59
+ @distill
60
+ def wer_m(df, ops: ZenoOptions):
61
+ return DistillReturn(
62
+ distill_output=df.apply(
63
+ lambda x: wer(x[ops.label_column], x[ops.output_column]), axis=1
64
+ )
65
+ )
66
+
67
+
68
+ @metric
69
+ def avg_wer(df, ops: ZenoOptions):
70
+ avg = df[ops.distill_columns["wer_m"]].mean()
71
+ if pd.isnull(avg) or math.isnan(avg):
72
+ return MetricReturn(metric=0)
73
+ return MetricReturn(metric=avg)
jupyter_accent.ipynb ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "%load_ext autoreload\n",
10
+ "%autoreload 2\n",
11
+ "\n",
12
+ "from zeno import zeno\n",
13
+ "import math\n",
14
+ "import os\n",
15
+ "import pandas as pd"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": 9,
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "df = pd.read_csv(\"metadata.csv\")"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 10,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "df.set_index('id', inplace=True, drop=False)"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 12,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "ename": "ValueError",
43
+ "evalue": "'id' is both an index level and a column label, which is ambiguous.",
44
+ "output_type": "error",
45
+ "traceback": [
46
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
47
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
48
+ "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df\u001b[39m.\u001b[39;49mgroupby(\u001b[39m'\u001b[39;49m\u001b[39mid\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
49
+ "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/frame.py:8402\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 8399\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mYou have to supply one of \u001b[39m\u001b[39m'\u001b[39m\u001b[39mby\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39mlevel\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 8400\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_axis_number(axis)\n\u001b[0;32m-> 8402\u001b[0m \u001b[39mreturn\u001b[39;00m DataFrameGroupBy(\n\u001b[1;32m 8403\u001b[0m obj\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[1;32m 8404\u001b[0m keys\u001b[39m=\u001b[39;49mby,\n\u001b[1;32m 8405\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m 8406\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m 8407\u001b[0m as_index\u001b[39m=\u001b[39;49mas_index,\n\u001b[1;32m 8408\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[1;32m 8409\u001b[0m group_keys\u001b[39m=\u001b[39;49mgroup_keys,\n\u001b[1;32m 8410\u001b[0m squeeze\u001b[39m=\u001b[39;49msqueeze,\n\u001b[1;32m 8411\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[1;32m 8412\u001b[0m dropna\u001b[39m=\u001b[39;49mdropna,\n\u001b[1;32m 8413\u001b[0m )\n",
50
+ "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:965\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 962\u001b[0m \u001b[39mif\u001b[39;00m grouper \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 963\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgroupby\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgrouper\u001b[39;00m \u001b[39mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 965\u001b[0m grouper, exclusions, obj \u001b[39m=\u001b[39m get_grouper(\n\u001b[1;32m 966\u001b[0m obj,\n\u001b[1;32m 967\u001b[0m keys,\n\u001b[1;32m 968\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m 969\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m 970\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[1;32m 971\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[1;32m 972\u001b[0m mutated\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmutated,\n\u001b[1;32m 973\u001b[0m dropna\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdropna,\n\u001b[1;32m 974\u001b[0m )\n\u001b[1;32m 976\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj \u001b[39m=\u001b[39m obj\n\u001b[1;32m 977\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39m_get_axis_number(axis)\n",
51
+ "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/groupby/grouper.py:878\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 876\u001b[0m \u001b[39mif\u001b[39;00m gpr \u001b[39min\u001b[39;00m obj:\n\u001b[1;32m 877\u001b[0m \u001b[39mif\u001b[39;00m validate:\n\u001b[0;32m--> 878\u001b[0m obj\u001b[39m.\u001b[39;49m_check_label_or_level_ambiguity(gpr, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[1;32m 879\u001b[0m in_axis, name, gpr \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m, gpr, obj[gpr]\n\u001b[1;32m 880\u001b[0m \u001b[39mif\u001b[39;00m gpr\u001b[39m.\u001b[39mndim \u001b[39m!=\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 881\u001b[0m \u001b[39m# non-unique columns; raise here to get the name in the\u001b[39;00m\n\u001b[1;32m 882\u001b[0m \u001b[39m# exception message\u001b[39;00m\n",
52
+ "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/generic.py:1797\u001b[0m, in \u001b[0;36mNDFrame._check_label_or_level_ambiguity\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1789\u001b[0m label_article, label_type \u001b[39m=\u001b[39m (\n\u001b[1;32m 1790\u001b[0m (\u001b[39m\"\u001b[39m\u001b[39ma\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mcolumn\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mif\u001b[39;00m axis \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m \u001b[39melse\u001b[39;00m (\u001b[39m\"\u001b[39m\u001b[39man\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 1791\u001b[0m )\n\u001b[1;32m 1793\u001b[0m msg \u001b[39m=\u001b[39m (\n\u001b[1;32m 1794\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is both \u001b[39m\u001b[39m{\u001b[39;00mlevel_article\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m{\u001b[39;00mlevel_type\u001b[39m}\u001b[39;00m\u001b[39m level and \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1795\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mlabel_article\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m{\u001b[39;00mlabel_type\u001b[39m}\u001b[39;00m\u001b[39m label, which is ambiguous.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1796\u001b[0m )\n\u001b[0;32m-> 1797\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n",
53
+ "\u001b[0;31mValueError\u001b[0m: 'id' is both an index level and a column label, which is ambiguous."
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "df.groupby('id')"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": null,
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "zeno({\n",
68
+ " \"metadata\": df[0:10],\n",
69
+ " \"view\": \"audio-transcription\",\n",
70
+ " \"data_path\": \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/\",\n",
71
+ " \"label_column\": \"label\",\n",
72
+ " \"data_column\": \"id\"\n",
73
+ "})"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "import torch\n",
83
+ "import whisper\n",
84
+ "from jiwer import wer\n",
85
+ "from zeno import ZenoOptions, distill, metric, model\n",
86
+ "import numpy as np\n",
87
+ "from zeno import ZenoOptions, distill"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": null,
93
+ "metadata": {},
94
+ "outputs": [],
95
+ "source": [
96
+ "@model\n",
97
+ "def load_model(model_path):\n",
98
+ " if \"sst\" in model_path:\n",
99
+ " device = torch.device(\"cpu\")\n",
100
+ " model, decoder, utils = torch.hub.load(\n",
101
+ " repo_or_dir=\"snakers4/silero-models\",\n",
102
+ " model=\"silero_stt\",\n",
103
+ " language=\"en\",\n",
104
+ " device=device,\n",
105
+ " )\n",
106
+ " (read_batch, _, _, prepare_model_input) = utils\n",
107
+ "\n",
108
+ " def pred(df, ops: ZenoOptions):\n",
109
+ " files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
110
+ " input = prepare_model_input(read_batch(files), device=device)\n",
111
+ " return [decoder(x.cpu()) for x in model(input)]\n",
112
+ "\n",
113
+ " return pred\n",
114
+ "\n",
115
+ " elif \"whisper\" in model_path:\n",
116
+ " model = whisper.load_model(\"tiny\")\n",
117
+ "\n",
118
+ " def pred(df, ops: ZenoOptions):\n",
119
+ " files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
120
+ " outs = []\n",
121
+ " for f in files:\n",
122
+ " outs.append(model.transcribe(f)[\"text\"])\n",
123
+ " return outs\n",
124
+ "\n",
125
+ " return pred\n",
126
+ "\n",
127
+ "\n",
128
+ "@distill\n",
129
+ "def country(df, ops: ZenoOptions):\n",
130
+ " if df[\"0birthplace\"][0] == df[\"0birthplace\"][0]:\n",
131
+ " return df[\"0birthplace\"].str.split(\", \")[-1][-1]\n",
132
+ " return \"\"\n",
133
+ "\n",
134
+ "\n",
135
+ "@distill\n",
136
+ "def wer_m(df, ops: ZenoOptions):\n",
137
+ " return df.apply(lambda x: wer(x[ops.label_column], x[ops.output_column]), axis=1)\n",
138
+ "\n",
139
+ "\n",
140
+ "@metric\n",
141
+ "def avg_wer(df, ops: ZenoOptions):\n",
142
+ " avg = df[ops.distill_columns[\"wer_m\"]].mean()\n",
143
+ " if math.isnan(avg):\n",
144
+ " return 0\n",
145
+ " return avg\n",
146
+ "\n",
147
+ "# @distill\n",
148
+ "# def amplitude(df, ops: ZenoOptions):\n",
149
+ "# files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
150
+ "# amps = []\n",
151
+ "# for audio in files:\n",
152
+ "# y, _ = librosa.load(audio)\n",
153
+ "# amps.append(float(np.abs(y).mean()))\n",
154
+ "# return amps\n",
155
+ "\n",
156
+ "\n",
157
+ "# @distill\n",
158
+ "# def length(df, ops: ZenoOptions):\n",
159
+ "# files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
160
+ "# amps = []\n",
161
+ "# for audio in files:\n",
162
+ "# y, _ = librosa.load(audio)\n",
163
+ "# amps.append(len(y))\n",
164
+ "# return amps"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": null,
170
+ "metadata": {
171
+ "tags": []
172
+ },
173
+ "outputs": [],
174
+ "source": [
175
+ "zeno({\n",
176
+ " \"metadata\": df,\n",
177
+ " \"functions\": [load_model, country, wer_m, avg_wer],\n",
178
+ " \"view\": \"audio-transcription\",\n",
179
+ " \"models\": [\"silero_sst\", \"whisper\"],\n",
180
+ " \"data_path\": \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/\",\n",
181
+ " \"data_column\": \"id\",\n",
182
+ " \"label_column\": \"label\",\n",
183
+ " \"samples\": 10,\n",
184
+ "})\n",
185
+ "# metadata = \"metadata.csv\"\n",
186
+ "# # data_path = \"https://zenoml.s3.amazonaws.com/accents/\""
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": []
195
+ }
196
+ ],
197
+ "metadata": {
198
+ "kernelspec": {
199
+ "display_name": "Python 3 (ipykernel)",
200
+ "language": "python",
201
+ "name": "python3"
202
+ },
203
+ "language_info": {
204
+ "codemirror_mode": {
205
+ "name": "ipython",
206
+ "version": 3
207
+ },
208
+ "file_extension": ".py",
209
+ "mimetype": "text/x-python",
210
+ "name": "python",
211
+ "nbconvert_exporter": "python",
212
+ "pygments_lexer": "ipython3",
213
+ "version": "3.8.12"
214
+ },
215
+ "vscode": {
216
+ "interpreter": {
217
+ "hash": "59d606a796fde3c997548ee5ab3f3009081de8aa2fb58c2406e58b3c7613e786"
218
+ }
219
+ }
220
+ },
221
+ "nbformat": 4,
222
+ "nbformat_minor": 4
223
+ }
latest_silero_models.yml ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pre-trained STT models
2
+ stt_models:
3
+ en:
4
+ latest:
5
+ meta:
6
+ name: "en_v6"
7
+ sample: "https://models.silero.ai/examples/en_sample.wav"
8
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
9
+ jit: "https://models.silero.ai/models/en/en_v6.jit"
10
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
11
+ jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
12
+ jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
13
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
14
+ v6:
15
+ meta:
16
+ name: "en_v6"
17
+ sample: "https://models.silero.ai/examples/en_sample.wav"
18
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
19
+ jit: "https://models.silero.ai/models/en/en_v6.jit"
20
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
21
+ jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
22
+ jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
23
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
24
+ v5:
25
+ meta:
26
+ name: "en_v5"
27
+ sample: "https://models.silero.ai/examples/en_sample.wav"
28
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
29
+ jit: "https://models.silero.ai/models/en/en_v5.jit"
30
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
31
+ onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
32
+ jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
33
+ jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
34
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
35
+ v4_0:
36
+ meta:
37
+ name: "en_v4_0"
38
+ sample: "https://models.silero.ai/examples/en_sample.wav"
39
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
40
+ jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
41
+ onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
42
+ v3:
43
+ meta:
44
+ name: "en_v3"
45
+ sample: "https://models.silero.ai/examples/en_sample.wav"
46
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
47
+ jit: "https://models.silero.ai/models/en/en_v3_jit.model"
48
+ onnx: "https://models.silero.ai/models/en/en_v3.onnx"
49
+ jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
50
+ jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
51
+ jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
52
+ onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
53
+ jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
54
+ jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
55
+ onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
56
+ v2:
57
+ meta:
58
+ name: "en_v2"
59
+ sample: "https://models.silero.ai/examples/en_sample.wav"
60
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
61
+ jit: "https://models.silero.ai/models/en/en_v2_jit.model"
62
+ onnx: "https://models.silero.ai/models/en/en_v2.onnx"
63
+ tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
64
+ v1:
65
+ meta:
66
+ name: "en_v1"
67
+ sample: "https://models.silero.ai/examples/en_sample.wav"
68
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
69
+ jit: "https://models.silero.ai/models/en/en_v1_jit.model"
70
+ onnx: "https://models.silero.ai/models/en/en_v1.onnx"
71
+ tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
72
+ de:
73
+ latest:
74
+ meta:
75
+ name: "de_v1"
76
+ sample: "https://models.silero.ai/examples/de_sample.wav"
77
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
78
+ jit: "https://models.silero.ai/models/de/de_v1_jit.model"
79
+ onnx: "https://models.silero.ai/models/de/de_v1.onnx"
80
+ tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
81
+ v1:
82
+ meta:
83
+ name: "de_v1"
84
+ sample: "https://models.silero.ai/examples/de_sample.wav"
85
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
86
+ jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
87
+ onnx: "https://models.silero.ai/models/de/de_v1.onnx"
88
+ tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
89
+ v3:
90
+ meta:
91
+ name: "de_v3"
92
+ sample: "https://models.silero.ai/examples/de_sample.wav"
93
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
94
+ jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
95
+ v4:
96
+ meta:
97
+ name: "de_v4"
98
+ sample: "https://models.silero.ai/examples/de_sample.wav"
99
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
100
+ jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
101
+ onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
102
+ es:
103
+ latest:
104
+ meta:
105
+ name: "es_v1"
106
+ sample: "https://models.silero.ai/examples/es_sample.wav"
107
+ labels: "https://models.silero.ai/models/es/es_v1_labels.json"
108
+ jit: "https://models.silero.ai/models/es/es_v1_jit.model"
109
+ onnx: "https://models.silero.ai/models/es/es_v1.onnx"
110
+ tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
111
+ ua:
112
+ latest:
113
+ meta:
114
+ name: "ua_v3"
115
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
116
+ credits:
117
+ datasets:
118
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
119
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
120
+ jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
121
+ jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
122
+ onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
123
+ v3:
124
+ meta:
125
+ name: "ua_v3"
126
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
127
+ credits:
128
+ datasets:
129
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
130
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
131
+ jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
132
+ jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
133
+ onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
134
+ v1:
135
+ meta:
136
+ name: "ua_v1"
137
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
138
+ credits:
139
+ datasets:
140
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
141
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
142
+ jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
143
+ jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
144
+ tts_models:
145
+ ru:
146
+ v3_1_ru:
147
+ latest:
148
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
149
+ package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
150
+ sample_rate: [8000, 24000, 48000]
151
+ ru_v3:
152
+ latest:
153
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
154
+ package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
155
+ sample_rate: [8000, 24000, 48000]
156
+ aidar_v2:
157
+ latest:
158
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
159
+ package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
160
+ sample_rate: [8000, 16000]
161
+ aidar_8khz:
162
+ latest:
163
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
164
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
165
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
166
+ sample_rate: 8000
167
+ v1:
168
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
169
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
170
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
171
+ sample_rate: 8000
172
+ aidar_16khz:
173
+ latest:
174
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
175
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
176
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
177
+ sample_rate: 16000
178
+ v1:
179
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
180
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
181
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
182
+ sample_rate: 16000
183
+ baya_v2:
184
+ latest:
185
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
186
+ package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
187
+ sample_rate: [8000, 16000]
188
+ baya_8khz:
189
+ latest:
190
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
191
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
192
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
193
+ sample_rate: 8000
194
+ v1:
195
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
196
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
197
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
198
+ sample_rate: 8000
199
+ baya_16khz:
200
+ latest:
201
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
202
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
203
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
204
+ sample_rate: 16000
205
+ v1:
206
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
207
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
208
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
209
+ sample_rate: 16000
210
+ irina_v2:
211
+ latest:
212
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
213
+ package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
214
+ sample_rate: [8000, 16000]
215
+ irina_8khz:
216
+ latest:
217
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
218
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
219
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
220
+ sample_rate: 8000
221
+ v1:
222
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
223
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
224
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
225
+ sample_rate: 8000
226
+ irina_16khz:
227
+ latest:
228
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
229
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
230
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
231
+ sample_rate: 16000
232
+ v1:
233
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
234
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
235
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
236
+ sample_rate: 16000
237
+ kseniya_v2:
238
+ latest:
239
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
240
+ package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
241
+ sample_rate: [8000, 16000]
242
+ kseniya_8khz:
243
+ latest:
244
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
245
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
246
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
247
+ sample_rate: 8000
248
+ v1:
249
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
250
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
251
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
252
+ sample_rate: 8000
253
+ kseniya_16khz:
254
+ latest:
255
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
256
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
257
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
258
+ sample_rate: 16000
259
+ v1:
260
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
261
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
262
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
263
+ sample_rate: 16000
264
+ natasha_v2:
265
+ latest:
266
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
267
+ package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
268
+ sample_rate: [8000, 16000]
269
+ natasha_8khz:
270
+ latest:
271
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
272
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
273
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
274
+ sample_rate: 8000
275
+ v1:
276
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
277
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
278
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
279
+ sample_rate: 8000
280
+ natasha_16khz:
281
+ latest:
282
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
283
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
284
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
285
+ sample_rate: 16000
286
+ v1:
287
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
288
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
289
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
290
+ sample_rate: 16000
291
+ ruslan_v2:
292
+ latest:
293
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
294
+ package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
295
+ sample_rate: [8000, 16000]
296
+ ruslan_8khz:
297
+ latest:
298
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
299
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
300
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
301
+ sample_rate: 8000
302
+ v1:
303
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
304
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
305
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
306
+ sample_rate: 8000
307
+ ruslan_16khz:
308
+ latest:
309
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
310
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
311
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
312
+ sample_rate: 16000
313
+ v1:
314
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
315
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
316
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
317
+ sample_rate: 16000
318
+ en:
319
+ v3_en:
320
+ latest:
321
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
322
+ package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
323
+ sample_rate: [8000, 24000, 48000]
324
+ v3_en_indic:
325
+ latest:
326
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
327
+ package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
328
+ sample_rate: [8000, 24000, 48000]
329
+ lj_v2:
330
+ latest:
331
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
332
+ package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
333
+ sample_rate: [8000, 16000]
334
+ lj_8khz:
335
+ latest:
336
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
337
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
338
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
339
+ sample_rate: 8000
340
+ v1:
341
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
342
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
343
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
344
+ sample_rate: 8000
345
+ lj_16khz:
346
+ latest:
347
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
348
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
349
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
350
+ sample_rate: 16000
351
+ v1:
352
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
353
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
354
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
355
+ sample_rate: 16000
356
+ de:
357
+ v3_de:
358
+ latest:
359
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
360
+ package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
361
+ sample_rate: [8000, 24000, 48000]
362
+ thorsten_v2:
363
+ latest:
364
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
365
+ package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
366
+ sample_rate: [8000, 16000]
367
+ thorsten_8khz:
368
+ latest:
369
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
370
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
371
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
372
+ sample_rate: 8000
373
+ v1:
374
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
375
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
376
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
377
+ sample_rate: 8000
378
+ thorsten_16khz:
379
+ latest:
380
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
381
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
382
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
383
+ sample_rate: 16000
384
+ v1:
385
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
386
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
387
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
388
+ sample_rate: 16000
389
+ es:
390
+ v3_es:
391
+ latest:
392
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
393
+ package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
394
+ sample_rate: [8000, 24000, 48000]
395
+ tux_v2:
396
+ latest:
397
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
398
+ package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
399
+ sample_rate: [8000, 16000]
400
+ tux_8khz:
401
+ latest:
402
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
403
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
404
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
405
+ sample_rate: 8000
406
+ v1:
407
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
408
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
409
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
410
+ sample_rate: 8000
411
+ tux_16khz:
412
+ latest:
413
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
414
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
415
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
416
+ sample_rate: 16000
417
+ v1:
418
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
419
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
420
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
421
+ sample_rate: 16000
422
+ fr:
423
+ v3_fr:
424
+ latest:
425
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
426
+ package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
427
+ sample_rate: [8000, 24000, 48000]
428
+ gilles_v2:
429
+ latest:
430
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
431
+ package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
432
+ sample_rate: [8000, 16000]
433
+ gilles_8khz:
434
+ latest:
435
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
436
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
437
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
438
+ sample_rate: 8000
439
+ v1:
440
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
441
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
442
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
443
+ sample_rate: 8000
444
+ gilles_16khz:
445
+ latest:
446
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
447
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
448
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
449
+ sample_rate: 16000
450
+ v1:
451
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
452
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
453
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
454
+ sample_rate: 16000
455
+ ba:
456
+ aigul_v2:
457
+ latest:
458
+ example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
459
+ package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
460
+ sample_rate: [8000, 16000]
461
+ language_name: 'bashkir'
462
+ xal:
463
+ v3_xal:
464
+ latest:
465
+ example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
466
+ package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
467
+ sample_rate: [8000, 24000, 48000]
468
+ erdni_v2:
469
+ latest:
470
+ example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
471
+ package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
472
+ sample_rate: [8000, 16000]
473
+ language_name: 'kalmyk'
474
+ tt:
475
+ v3_tt:
476
+ latest:
477
+ example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
478
+ package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
479
+ sample_rate: [8000, 24000, 48000]
480
+ dilyara_v2:
481
+ latest:
482
+ example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
483
+ package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
484
+ sample_rate: [8000, 16000]
485
+ language_name: 'tatar'
486
+ uz:
487
+ v3_uz:
488
+ latest:
489
+ example: 'Tanishganimdan xursandman.'
490
+ package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
491
+ sample_rate: [8000, 24000, 48000]
492
+ dilnavoz_v2:
493
+ latest:
494
+ example: 'Tanishganimdan xursandman.'
495
+ package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
496
+ sample_rate: [8000, 16000]
497
+ language_name: 'uzbek'
498
+ ua:
499
+ v3_ua:
500
+ latest:
501
+ example: 'К+отики - пухн+асті жив+отики.'
502
+ package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
503
+ sample_rate: [8000, 24000, 48000]
504
+ mykyta_v2:
505
+ latest:
506
+ example: 'К+отики - пухн+асті жив+отики.'
507
+ package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
508
+ sample_rate: [8000, 24000, 48000]
509
+ language_name: 'ukrainian'
510
+ indic:
511
+ v3_indic:
512
+ latest:
513
+ example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
514
+ package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
515
+ sample_rate: [8000, 24000, 48000]
516
+ multi:
517
+ multi_v2:
518
+ latest:
519
+ package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
520
+ sample_rate: [8000, 16000]
521
+ speakers:
522
+ aidar:
523
+ lang: 'ru'
524
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
525
+ baya:
526
+ lang: 'ru'
527
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
528
+ kseniya:
529
+ lang: 'ru'
530
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
531
+ irina:
532
+ lang: 'ru'
533
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
534
+ ruslan:
535
+ lang: 'ru'
536
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
537
+ natasha:
538
+ lang: 'ru'
539
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
540
+ thorsten:
541
+ lang: 'de'
542
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
543
+ tux:
544
+ lang: 'es'
545
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
546
+ gilles:
547
+ lang: 'fr'
548
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
549
+ lj:
550
+ lang: 'en'
551
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
552
+ dilyara:
553
+ lang: 'tt'
554
+ example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
555
+ te_models:
556
+ latest:
557
+ package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
558
+ languages: ['en', 'de', 'ru', 'es']
559
+ punct: '.,-!?—'
560
+ v2:
561
+ package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
562
+ languages: ['en', 'de', 'ru', 'es']
563
+ punct: '.,-!?—'
metadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
process_metadata.ipynb ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 19,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 20,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "df = pd.read_csv(\"./speakers_all.csv\")"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 21,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "df['id'] = df['filename'].apply(lambda x: x + \".wav\")\n",
28
+ "df = df[df['file_missing?'] == False]"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 22,
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "data": {
38
+ "text/html": [
39
+ "<div>\n",
40
+ "<style scoped>\n",
41
+ " .dataframe tbody tr th:only-of-type {\n",
42
+ " vertical-align: middle;\n",
43
+ " }\n",
44
+ "\n",
45
+ " .dataframe tbody tr th {\n",
46
+ " vertical-align: top;\n",
47
+ " }\n",
48
+ "\n",
49
+ " .dataframe thead th {\n",
50
+ " text-align: right;\n",
51
+ " }\n",
52
+ "</style>\n",
53
+ "<table border=\"1\" class=\"dataframe\">\n",
54
+ " <thead>\n",
55
+ " <tr style=\"text-align: right;\">\n",
56
+ " <th></th>\n",
57
+ " <th>age</th>\n",
58
+ " <th>age_onset</th>\n",
59
+ " <th>birthplace</th>\n",
60
+ " <th>filename</th>\n",
61
+ " <th>native_language</th>\n",
62
+ " <th>sex</th>\n",
63
+ " <th>speakerid</th>\n",
64
+ " <th>country</th>\n",
65
+ " <th>file_missing?</th>\n",
66
+ " <th>Unnamed: 9</th>\n",
67
+ " <th>Unnamed: 10</th>\n",
68
+ " <th>Unnamed: 11</th>\n",
69
+ " <th>id</th>\n",
70
+ " </tr>\n",
71
+ " </thead>\n",
72
+ " <tbody>\n",
73
+ " <tr>\n",
74
+ " <th>32</th>\n",
75
+ " <td>27.0</td>\n",
76
+ " <td>9.0</td>\n",
77
+ " <td>virginia, south africa</td>\n",
78
+ " <td>afrikaans1</td>\n",
79
+ " <td>afrikaans</td>\n",
80
+ " <td>female</td>\n",
81
+ " <td>1</td>\n",
82
+ " <td>south africa</td>\n",
83
+ " <td>False</td>\n",
84
+ " <td>NaN</td>\n",
85
+ " <td>NaN</td>\n",
86
+ " <td>NaN</td>\n",
87
+ " <td>afrikaans1.wav</td>\n",
88
+ " </tr>\n",
89
+ " <tr>\n",
90
+ " <th>33</th>\n",
91
+ " <td>40.0</td>\n",
92
+ " <td>5.0</td>\n",
93
+ " <td>pretoria, south africa</td>\n",
94
+ " <td>afrikaans2</td>\n",
95
+ " <td>afrikaans</td>\n",
96
+ " <td>male</td>\n",
97
+ " <td>2</td>\n",
98
+ " <td>south africa</td>\n",
99
+ " <td>False</td>\n",
100
+ " <td>NaN</td>\n",
101
+ " <td>NaN</td>\n",
102
+ " <td>NaN</td>\n",
103
+ " <td>afrikaans2.wav</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>34</th>\n",
107
+ " <td>43.0</td>\n",
108
+ " <td>4.0</td>\n",
109
+ " <td>pretoria, transvaal, south africa</td>\n",
110
+ " <td>afrikaans3</td>\n",
111
+ " <td>afrikaans</td>\n",
112
+ " <td>male</td>\n",
113
+ " <td>418</td>\n",
114
+ " <td>south africa</td>\n",
115
+ " <td>False</td>\n",
116
+ " <td>NaN</td>\n",
117
+ " <td>NaN</td>\n",
118
+ " <td>NaN</td>\n",
119
+ " <td>afrikaans3.wav</td>\n",
120
+ " </tr>\n",
121
+ " <tr>\n",
122
+ " <th>35</th>\n",
123
+ " <td>26.0</td>\n",
124
+ " <td>8.0</td>\n",
125
+ " <td>pretoria, south africa</td>\n",
126
+ " <td>afrikaans4</td>\n",
127
+ " <td>afrikaans</td>\n",
128
+ " <td>male</td>\n",
129
+ " <td>1159</td>\n",
130
+ " <td>south africa</td>\n",
131
+ " <td>False</td>\n",
132
+ " <td>NaN</td>\n",
133
+ " <td>NaN</td>\n",
134
+ " <td>NaN</td>\n",
135
+ " <td>afrikaans4.wav</td>\n",
136
+ " </tr>\n",
137
+ " <tr>\n",
138
+ " <th>36</th>\n",
139
+ " <td>19.0</td>\n",
140
+ " <td>6.0</td>\n",
141
+ " <td>cape town, south africa</td>\n",
142
+ " <td>afrikaans5</td>\n",
143
+ " <td>afrikaans</td>\n",
144
+ " <td>male</td>\n",
145
+ " <td>1432</td>\n",
146
+ " <td>south africa</td>\n",
147
+ " <td>False</td>\n",
148
+ " <td>NaN</td>\n",
149
+ " <td>NaN</td>\n",
150
+ " <td>NaN</td>\n",
151
+ " <td>afrikaans5.wav</td>\n",
152
+ " </tr>\n",
153
+ " </tbody>\n",
154
+ "</table>\n",
155
+ "</div>"
156
+ ],
157
+ "text/plain": [
158
+ " age age_onset birthplace filename \\\n",
159
+ "32 27.0 9.0 virginia, south africa afrikaans1 \n",
160
+ "33 40.0 5.0 pretoria, south africa afrikaans2 \n",
161
+ "34 43.0 4.0 pretoria, transvaal, south africa afrikaans3 \n",
162
+ "35 26.0 8.0 pretoria, south africa afrikaans4 \n",
163
+ "36 19.0 6.0 cape town, south africa afrikaans5 \n",
164
+ "\n",
165
+ " native_language sex speakerid country file_missing? \\\n",
166
+ "32 afrikaans female 1 south africa False \n",
167
+ "33 afrikaans male 2 south africa False \n",
168
+ "34 afrikaans male 418 south africa False \n",
169
+ "35 afrikaans male 1159 south africa False \n",
170
+ "36 afrikaans male 1432 south africa False \n",
171
+ "\n",
172
+ " Unnamed: 9 Unnamed: 10 Unnamed: 11 id \n",
173
+ "32 NaN NaN NaN afrikaans1.wav \n",
174
+ "33 NaN NaN NaN afrikaans2.wav \n",
175
+ "34 NaN NaN NaN afrikaans3.wav \n",
176
+ "35 NaN NaN NaN afrikaans4.wav \n",
177
+ "36 NaN NaN NaN afrikaans5.wav "
178
+ ]
179
+ },
180
+ "execution_count": 22,
181
+ "metadata": {},
182
+ "output_type": "execute_result"
183
+ }
184
+ ],
185
+ "source": [
186
+ "df.head()"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 23,
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": [
195
+ "df['label'] = \"Please call Stella. Ask her to bring these things with her from the store: Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob. We also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.\""
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": 24,
201
+ "metadata": {},
202
+ "outputs": [
203
+ {
204
+ "data": {
205
+ "text/plain": [
206
+ "Index(['age', 'age_onset', 'birthplace', 'filename', 'native_language', 'sex',\n",
207
+ " 'speakerid', 'country', 'file_missing?', 'Unnamed: 9', 'Unnamed: 10',\n",
208
+ " 'Unnamed: 11', 'id', 'label'],\n",
209
+ " dtype='object')"
210
+ ]
211
+ },
212
+ "execution_count": 24,
213
+ "metadata": {},
214
+ "output_type": "execute_result"
215
+ }
216
+ ],
217
+ "source": [
218
+ "df.columns"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": 25,
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": [
227
+ "df = df.drop(\"Unnamed: 9\", axis=1)\n",
228
+ "df = df.drop(\"Unnamed: 10\", axis=1)\n",
229
+ "df = df.drop(\"Unnamed: 11\", axis=1)\n",
230
+ "df = df.drop(\"file_missing?\", axis=1)\n",
231
+ "df = df.drop(\"filename\", axis=1)"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": 26,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": [
240
+ "df.loc[df['sex'] == 'famale', 'sex'] = 'female'"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": 27,
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "import pycountry_convert as pc\n",
250
+ "\n",
251
+ "def country_to_continent(country_name):\n",
252
+ " try:\n",
253
+ " country_alpha2 = pc.country_name_to_country_alpha2(country_name, cn_name_format=pc.COUNTRY_NAME_FORMAT_LOWER)\n",
254
+ " country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)\n",
255
+ " country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)\n",
256
+ " return country_continent_name\n",
257
+ " except:\n",
258
+ " return None\n",
259
+ "\n",
260
+ "df[\"continent\"] = df[\"country\"].map(lambda x: country_to_continent(x))"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": 28,
266
+ "metadata": {},
267
+ "outputs": [
268
+ {
269
+ "data": {
270
+ "text/plain": [
271
+ "False 1647\n",
272
+ "True 493\n",
273
+ "Name: continent, dtype: int64"
274
+ ]
275
+ },
276
+ "execution_count": 28,
277
+ "metadata": {},
278
+ "output_type": "execute_result"
279
+ }
280
+ ],
281
+ "source": [
282
+ "df[\"continent\"].isnull().value_counts()"
283
+ ]
284
+ },
285
+ {
286
+ "cell_type": "code",
287
+ "execution_count": 29,
288
+ "metadata": {},
289
+ "outputs": [],
290
+ "source": [
291
+ "df = df.drop([1544, 1771])"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": 30,
297
+ "metadata": {},
298
+ "outputs": [],
299
+ "source": [
300
+ "df.to_csv(\"metadata.csv\", index=False)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "markdown",
305
+ "metadata": {},
306
+ "source": [
307
+ "## Whisper"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": 5,
313
+ "metadata": {},
314
+ "outputs": [
315
+ {
316
+ "name": "stderr",
317
+ "output_type": "stream",
318
+ "text": [
319
+ "100%|███████████████████████████████████████| 139M/139M [00:04<00:00, 30.3MiB/s]\n"
320
+ ]
321
+ }
322
+ ],
323
+ "source": [
324
+ "import whisper\n",
325
+ "model = whisper.load_model(\"base\")"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": 8,
331
+ "metadata": {},
332
+ "outputs": [
333
+ {
334
+ "name": "stderr",
335
+ "output_type": "stream",
336
+ "text": [
337
+ "/opt/anaconda3/lib/python3.8/site-packages/whisper/transcribe.py:78: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
338
+ " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
339
+ ]
340
+ },
341
+ {
342
+ "ename": "TypeError",
343
+ "evalue": "expected np.ndarray (got list)",
344
+ "output_type": "error",
345
+ "traceback": [
346
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
347
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
348
+ "\u001b[0;32m/var/folders/tq/kqg2ct9d123gd0wmshf2bd3r0000gp/T/ipykernel_157/3894641212.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtranscribe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
349
+ "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/whisper/transcribe.py\u001b[0m in \u001b[0;36mtranscribe\u001b[0;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, **decode_options)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0mdecode_options\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"fp16\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mmel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlog_mel_spectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdecode_options\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"language\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
350
+ "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/whisper/audio.py\u001b[0m in \u001b[0;36mlog_mel_spectrogram\u001b[0;34m(audio, n_mels)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0maudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_audio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 112\u001b[0;31m \u001b[0maudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0mwindow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhann_window\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_FFT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
351
+ "\u001b[0;31mTypeError\u001b[0m: expected np.ndarray (got list)"
352
+ ]
353
+ }
354
+ ],
355
+ "source": [
356
+ "result = model.transcribe([\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\", \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"])"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": 7,
362
+ "metadata": {},
363
+ "outputs": [
364
+ {
365
+ "data": {
366
+ "text/plain": [
367
+ "' Please call Stella, ask her to bring these things with her from the store. 6 spoons of fresh snow peas, 5 thick slabs of blue cheese and maybe a snack for her brother Bob. We also need a small plastic snake and a big twig frog for the kids. She can scoop these things into free-rate bags and we will go meet a wind state train station.'"
368
+ ]
369
+ },
370
+ "execution_count": 7,
371
+ "metadata": {},
372
+ "output_type": "execute_result"
373
+ }
374
+ ],
375
+ "source": [
376
+ "result[\"text\"]"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "markdown",
381
+ "metadata": {},
382
+ "source": []
383
+ }
384
+ ],
385
+ "metadata": {
386
+ "kernelspec": {
387
+ "display_name": "Python 3.8.12 ('base')",
388
+ "language": "python",
389
+ "name": "python3"
390
+ },
391
+ "language_info": {
392
+ "codemirror_mode": {
393
+ "name": "ipython",
394
+ "version": 3
395
+ },
396
+ "file_extension": ".py",
397
+ "mimetype": "text/x-python",
398
+ "name": "python",
399
+ "nbconvert_exporter": "python",
400
+ "pygments_lexer": "ipython3",
401
+ "version": "3.8.12"
402
+ },
403
+ "orig_nbformat": 4,
404
+ "vscode": {
405
+ "interpreter": {
406
+ "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
407
+ }
408
+ }
409
+ },
410
+ "nbformat": 4,
411
+ "nbformat_minor": 2
412
+ }
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ zenoml>=0.4.6
2
+ inspiredco
3
+ sentence_transformers
speakers_all.csv ADDED
The diff for this file is too large to render. See raw diff