Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

atlury commited on Sep 15, 2024

Commit

20497dc

verified ·

1 Parent(s): c10b84f

Delete VoiceActivityDetector.js

Browse files

Files changed (1) hide show

VoiceActivityDetector.js +0 -109

VoiceActivityDetector.js DELETED Viewed

@@ -1,109 +0,0 @@
-import OnnxWrapper from './Silero.js';
-const modelPath = "silero_vad.onnx";  // Make sure this path is correct
-export class VadDetector {
-    constructor(startThreshold, endThreshold, samplingRate, minSilenceDurationMs, speechPadMs) {
-        if (samplingRate !== 8000 && samplingRate !== 16000) {
-            throw new Error("Does not support sampling rates other than [8000, 16000]");
-        }
-        this.model = new OnnxWrapper(modelPath);
-        this.startThreshold = startThreshold;
-        this.endThreshold = endThreshold;
-        this.samplingRate = samplingRate;
-        this.minSilenceSamples = samplingRate * minSilenceDurationMs / 1000;
-        this.speechPadSamples = samplingRate * speechPadMs / 1000;
-        this.reset();
-        console.log(`VadDetector initialized with: startThreshold=${startThreshold}, endThreshold=${endThreshold}, samplingRate=${samplingRate}`);
-    }
-    reset() {
-        this.model.resetStates();
-        this.triggered = false;
-        this.tempEnd = 0;
-        this.currentSample = 0;
-        console.log('VadDetector reset');
-    }
-    async apply(data, returnSeconds) {
-        console.log(`Applying VAD to data of length ${data.length}`);
-        const windowSizeSamples = data.length;
-        this.currentSample += windowSizeSamples;
-        const rowLength = this.samplingRate === 16000 ? 512 : 256;
-		// Ensure data is the correct length
-        if (data.length < rowLength) {
-            console.warn(`Input data length (${data.length}) is less than required (${rowLength}). Padding with zeros.`);
-            data = [...data, ...new Array(rowLength - data.length).fill(0)];
-        } else if (data.length > rowLength) {
-            console.warn(`Input data length (${data.length}) is greater than required (${rowLength}). Truncating.`);
-            data = data.slice(0, rowLength);
-        }
-        const x = [Array.from(data)];
-        let speechProb;
-        try {
-            console.log(`Calling model with input shape: [${x.length}, ${x[0].length}], sample rate: ${this.samplingRate}`);
-            const result = await this.model.call(x, this.samplingRate);
-            if (result && Array.isArray(result) && result[0] && result[0][0] !== undefined) {
-                speechProb = result[0][0];
-                console.log(`Speech probability: ${speechProb}`);
-            } else {
-                throw new Error("Unexpected response from model");
-            }
-        } catch (e) {
-            console.error("Error in VadDetector.apply:", e);
-            throw new Error("Error calling the model: " + e);
-        }
-        if (speechProb >= this.startThreshold && this.tempEnd !== 0) {
-            this.tempEnd = 0;
-        }
-        if (speechProb >= this.startThreshold && !this.triggered) {
-            this.triggered = true;
-            let speechStart = Math.max(this.currentSample - this.speechPadSamples, 0);
-            console.log(`Speech start detected at sample ${speechStart}`);
-            if (returnSeconds) {
-                const speechStartSeconds = speechStart / this.samplingRate;
-                return { start: Number(speechStartSeconds.toFixed(1)) };
-            } else {
-                return { start: speechStart };
-            }
-        }
-        if (speechProb < this.endThreshold && this.triggered) {
-            console.log(`Potential speech end at sample ${this.currentSample}`);
-            if (this.tempEnd === 0) {
-                this.tempEnd = this.currentSample;
-            }
-            if (this.currentSample - this.tempEnd < this.minSilenceSamples) {
-                console.log('Silence duration too short, continuing');
-                return {};
-            } else {
-                const speechEnd = this.tempEnd + this.speechPadSamples;
-                console.log(`Speech end confirmed at sample ${speechEnd}`);
-                this.tempEnd = 0;
-                this.triggered = false;
-                if (returnSeconds) {
-                    const speechEndSeconds = speechEnd / this.samplingRate;
-                    return { end: Number(speechEndSeconds.toFixed(1)) };
-                } else {
-                    return { end: speechEnd };
-                }
-            }
-        }
-        return {};
-    }
-    async close() {
-        this.reset();
-        await this.model.close();
-    }
-}