Spaces:
Running
Running
Delete Silero.js
Browse files
Silero.js
DELETED
@@ -1,155 +0,0 @@
|
|
1 |
-
class OnnxWrapper {
|
2 |
-
constructor(path, force_onnx_cpu = true) {
|
3 |
-
console.log(`Initializing OnnxWrapper with path: ${path}`);
|
4 |
-
this.sessionReady = this.initSession(path, force_onnx_cpu);
|
5 |
-
this.resetStates();
|
6 |
-
this.sample_rates = [8000, 16000];
|
7 |
-
}
|
8 |
-
|
9 |
-
async ready() {
|
10 |
-
console.log('Waiting for OnnxWrapper session to be ready');
|
11 |
-
await this.sessionReady;
|
12 |
-
console.log('OnnxWrapper session is ready');
|
13 |
-
}
|
14 |
-
|
15 |
-
async initSession(path, force_onnx_cpu) {
|
16 |
-
console.log(`Initializing ONNX session with force_onnx_cpu: ${force_onnx_cpu}`);
|
17 |
-
const options = {
|
18 |
-
executionProviders: force_onnx_cpu ? ['wasm'] : ['webgl', 'wasm'],
|
19 |
-
graphOptimizationLevel: 'all',
|
20 |
-
executionMode: 'sequential',
|
21 |
-
enableCpuMemArena: true,
|
22 |
-
enableMemPattern: true,
|
23 |
-
extra: {
|
24 |
-
session: {
|
25 |
-
intra_op_num_threads: 1,
|
26 |
-
inter_op_num_threads: 1,
|
27 |
-
}
|
28 |
-
}
|
29 |
-
};
|
30 |
-
|
31 |
-
this.session = await ort.InferenceSession.create(path, options);
|
32 |
-
console.log('ONNX session created successfully');
|
33 |
-
}
|
34 |
-
|
35 |
-
_validate_input(x, sr) {
|
36 |
-
if (!Array.isArray(x[0])) {
|
37 |
-
x = [x];
|
38 |
-
}
|
39 |
-
if (x.length > 2) {
|
40 |
-
throw new Error(`Too many dimensions for input audio chunk ${x.length}`);
|
41 |
-
}
|
42 |
-
if (sr !== 16000 && (sr % 16000 === 0)) {
|
43 |
-
const step = Math.floor(sr / 16000);
|
44 |
-
x = x.map(row => row.filter((_, i) => i % step === 0));
|
45 |
-
sr = 16000;
|
46 |
-
}
|
47 |
-
if (!this.sample_rates.includes(sr)) {
|
48 |
-
throw new Error(`Supported sampling rates: ${this.sample_rates} (or multiply of 16000)`);
|
49 |
-
}
|
50 |
-
if (sr / x[0].length > 31.25) {
|
51 |
-
throw new Error("Input audio chunk is too short");
|
52 |
-
}
|
53 |
-
return [x, sr];
|
54 |
-
}
|
55 |
-
|
56 |
-
resetStates(batch_size = 1) {
|
57 |
-
console.log(`Resetting states with batch_size: ${batch_size}`);
|
58 |
-
this._state = Array(2).fill(0).map(() => Array(batch_size * 128).fill(0));
|
59 |
-
this._context = [];
|
60 |
-
this._last_sr = 0;
|
61 |
-
this._last_batch_size = 0;
|
62 |
-
}
|
63 |
-
|
64 |
-
async call(x, sr) {
|
65 |
-
console.log(`Calling model with input shape: [${x.length}, ${x[0].length}], sample rate: ${sr}`);
|
66 |
-
await this.ready();
|
67 |
-
[x, sr] = this._validate_input(x, sr);
|
68 |
-
const num_samples = sr === 16000 ? 512 : 256;
|
69 |
-
|
70 |
-
if (x[0].length !== num_samples) {
|
71 |
-
throw new Error(`Provided number of samples is ${x[0].length} (Supported values: 256 for 8000 sample rate, 512 for 16000)`);
|
72 |
-
}
|
73 |
-
|
74 |
-
const batch_size = x.length;
|
75 |
-
const context_size = sr === 16000 ? 64 : 32;
|
76 |
-
|
77 |
-
if (!this._last_batch_size) {
|
78 |
-
this.resetStates(batch_size);
|
79 |
-
}
|
80 |
-
if (this._last_sr && this._last_sr !== sr) {
|
81 |
-
this.resetStates(batch_size);
|
82 |
-
}
|
83 |
-
if (this._last_batch_size && this._last_batch_size !== batch_size) {
|
84 |
-
this.resetStates(batch_size);
|
85 |
-
}
|
86 |
-
if (this._context.length === 0) {
|
87 |
-
this._context = Array(batch_size * context_size).fill(0);
|
88 |
-
}
|
89 |
-
|
90 |
-
x = x.map((row, i) => [...this._context.slice(i * context_size, (i + 1) * context_size), ...row]);
|
91 |
-
|
92 |
-
if (sr === 8000 || sr === 16000) {
|
93 |
-
const inputTensor = new ort.Tensor('float32', x.flat(), [batch_size, x[0].length]);
|
94 |
-
const stateTensor = new ort.Tensor('float32', this._state.flat(), [2, batch_size, 128]);
|
95 |
-
const srTensor = new ort.Tensor('int64', [sr], []);
|
96 |
-
|
97 |
-
const feeds = {
|
98 |
-
input: inputTensor,
|
99 |
-
state: stateTensor,
|
100 |
-
sr: srTensor
|
101 |
-
};
|
102 |
-
|
103 |
-
const results = await this.session.run(feeds);
|
104 |
-
const outputData = results.output.data;
|
105 |
-
const stateData = results.stateN.data;
|
106 |
-
|
107 |
-
this._state = Array(2).fill(0).map((_, i) =>
|
108 |
-
Array.from(stateData.slice(i * batch_size * 128, (i + 1) * batch_size * 128))
|
109 |
-
);
|
110 |
-
|
111 |
-
const outputShape = results.output.dims;
|
112 |
-
const out = Array(outputShape[0]).fill(0).map((_, i) =>
|
113 |
-
Array.from(outputData.slice(i * outputShape[1], (i + 1) * outputShape[1]))
|
114 |
-
);
|
115 |
-
|
116 |
-
this._context = x.map(row => row.slice(-context_size)).flat();
|
117 |
-
this._last_sr = sr;
|
118 |
-
this._last_batch_size = batch_size;
|
119 |
-
|
120 |
-
console.log(`Model call completed, output shape: [${out.length}, ${out[0].length}]`);
|
121 |
-
return out;
|
122 |
-
} else {
|
123 |
-
throw new Error(`Unsupported sample rate: ${sr}. Supported rates are 8000 and 16000.`);
|
124 |
-
}
|
125 |
-
}
|
126 |
-
|
127 |
-
async audio_forward(x, sr) {
|
128 |
-
console.log(`Running audio_forward with input shape: [${x.length}, ${x[0].length}], sample rate: ${sr}`);
|
129 |
-
const outs = [];
|
130 |
-
[x, sr] = this._validate_input(x, sr);
|
131 |
-
this.resetStates();
|
132 |
-
const num_samples = sr === 16000 ? 512 : 256;
|
133 |
-
|
134 |
-
if (x[0].length % num_samples !== 0) {
|
135 |
-
const pad_num = num_samples - (x[0].length % num_samples);
|
136 |
-
x = x.map(row => [...row, ...Array(pad_num).fill(0)]);
|
137 |
-
}
|
138 |
-
|
139 |
-
for (let i = 0; i < x[0].length; i += num_samples) {
|
140 |
-
const wavs_batch = x.map(row => row.slice(i, i + num_samples));
|
141 |
-
const out_chunk = await this.call(wavs_batch, sr);
|
142 |
-
outs.push(out_chunk);
|
143 |
-
}
|
144 |
-
|
145 |
-
console.log(`audio_forward completed, output shape: [${outs.length}, ${outs[0].length}]`);
|
146 |
-
return outs.reduce((acc, curr) => acc.map((row, i) => [...row, ...curr[i]]));
|
147 |
-
}
|
148 |
-
|
149 |
-
close() {
|
150 |
-
console.log('Closing OnnxWrapper session');
|
151 |
-
this.session.release();
|
152 |
-
}
|
153 |
-
}
|
154 |
-
|
155 |
-
export default OnnxWrapper;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|