csukuangfj commited on
Commit
a9a963e
·
1 Parent(s): 1da156f

update model

Browse files
Files changed (2) hide show
  1. app-asr.js +55 -31
  2. index.html +53 -13
app-asr.js CHANGED
@@ -5,7 +5,6 @@
5
  const startBtn = document.getElementById('startBtn');
6
  const stopBtn = document.getElementById('stopBtn');
7
  const clearBtn = document.getElementById('clearBtn');
8
- const hint = document.getElementById('hint');
9
  const soundClips = document.getElementById('sound-clips');
10
 
11
  let textArea = document.getElementById('results');
@@ -16,7 +15,7 @@ let resultList = [];
16
  clearBtn.onclick = function() {
17
  resultList = [];
18
  textArea.value = getDisplayResult();
19
- textArea.scrollTop = textArea.scrollHeight; // auto scroll
20
  };
21
 
22
  function getDisplayResult() {
@@ -37,11 +36,39 @@ function getDisplayResult() {
37
  return ans;
38
  }
39
 
40
-
41
  Module = {};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  Module.onRuntimeInitialized = function() {
43
  console.log('inited!');
44
- hint.innerText = 'Model loaded! Please click start';
45
 
46
  startBtn.disabled = false;
47
 
@@ -53,11 +80,11 @@ let audioCtx;
53
  let mediaStream;
54
 
55
  let expectedSampleRate = 16000;
56
- let recordSampleRate; // the sampleRate of the microphone
57
- let recorder = null; // the microphone
58
- let leftchannel = []; // TODO: Use a single channel
59
 
60
- let recordingLength = 0; // number of samples so far
61
 
62
  let recognizer = null;
63
  let recognizer_stream = null;
@@ -66,11 +93,11 @@ if (navigator.mediaDevices.getUserMedia) {
66
  console.log('getUserMedia supported.');
67
 
68
  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
69
- const constraints = {audio: true};
70
 
71
  let onSuccess = function(stream) {
72
  if (!audioCtx) {
73
- audioCtx = new AudioContext({sampleRate: 16000});
74
  }
75
  console.log(audioCtx);
76
  recordSampleRate = audioCtx.sampleRate;
@@ -120,7 +147,6 @@ if (navigator.mediaDevices.getUserMedia) {
120
  result = recognizer.getResult(recognizer_stream).text;
121
  }
122
 
123
-
124
  if (result.length > 0 && lastResult != result) {
125
  lastResult = result;
126
  }
@@ -134,7 +160,7 @@ if (navigator.mediaDevices.getUserMedia) {
134
  }
135
 
136
  textArea.value = getDisplayResult();
137
- textArea.scrollTop = textArea.scrollHeight; // auto scroll
138
 
139
  let buf = new Int16Array(samples.length);
140
  for (var i = 0; i < samples.length; ++i) {
@@ -221,9 +247,8 @@ if (navigator.mediaDevices.getUserMedia) {
221
  };
222
  };
223
 
224
- let onError = function(err) {
225
- console.log('The following error occured: ' + err);
226
- };
227
 
228
  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
229
  } else {
@@ -231,7 +256,6 @@ if (navigator.mediaDevices.getUserMedia) {
231
  alert('getUserMedia not supported on your browser!');
232
  }
233
 
234
-
235
  // this function is copied/modified from
236
  // https://gist.github.com/meziantou/edb7217fddfbb70e899e
237
  function flatten(listOfSamples) {
@@ -257,22 +281,22 @@ function toWav(samples) {
257
 
258
  // http://soundfile.sapp.org/doc/WaveFormat/
259
  // F F I R
260
- view.setUint32(0, 0x46464952, true); // chunkID
261
- view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
262
  // E V A W
263
- view.setUint32(8, 0x45564157, true); // format
264
- //
265
  // t m f
266
- view.setUint32(12, 0x20746d66, true); // subchunk1ID
267
- view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
268
- view.setUint32(20, 1, true); // audioFormat, 1 for PCM
269
- view.setUint16(22, 1, true); // numChannels: 1 channel
270
- view.setUint32(24, expectedSampleRate, true); // sampleRate
271
- view.setUint32(28, expectedSampleRate * 2, true); // byteRate
272
- view.setUint16(32, 2, true); // blockAlign
273
- view.setUint16(34, 16, true); // bitsPerSample
274
- view.setUint32(36, 0x61746164, true); // Subchunk2ID
275
- view.setUint32(40, samples.length * 2, true); // subchunk2Size
276
 
277
  let offset = 44;
278
  for (let i = 0; i < samples.length; ++i) {
@@ -280,7 +304,7 @@ function toWav(samples) {
280
  offset += 2;
281
  }
282
 
283
- return new Blob([view], {type: 'audio/wav'});
284
  }
285
 
286
  // this function is copied from
 
5
  const startBtn = document.getElementById('startBtn');
6
  const stopBtn = document.getElementById('stopBtn');
7
  const clearBtn = document.getElementById('clearBtn');
 
8
  const soundClips = document.getElementById('sound-clips');
9
 
10
  let textArea = document.getElementById('results');
 
15
  clearBtn.onclick = function() {
16
  resultList = [];
17
  textArea.value = getDisplayResult();
18
+ textArea.scrollTop = textArea.scrollHeight; // auto scroll
19
  };
20
 
21
  function getDisplayResult() {
 
36
  return ans;
37
  }
38
 
 
39
  Module = {};
40
+
41
+ // https://emscripten.org/docs/api_reference/module.html#Module.locateFile
42
+ Module.locateFile = function(path, scriptDirectory = '') {
43
+ console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
44
+ return scriptDirectory + path;
45
+ };
46
+
47
+ // https://emscripten.org/docs/api_reference/module.html#Module.locateFile
48
+ Module.setStatus = function(status) {
49
+ console.log(`status ${status}`);
50
+ const statusElement = document.getElementById('status');
51
+ if (status == "Running...") {
52
+ status = 'Model downloaded. Initializing recongizer...'
53
+ }
54
+ statusElement.textContent = status;
55
+ if (status === '') {
56
+ statusElement.style.display = 'none';
57
+ // statusElement.parentNode.removeChild(statusElement);
58
+
59
+ document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
60
+ tabContentElement.classList.remove('loading');
61
+ });
62
+ } else {
63
+ statusElement.style.display = 'block';
64
+ document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
65
+ tabContentElement.classList.add('loading');
66
+ });
67
+ }
68
+ };
69
+
70
  Module.onRuntimeInitialized = function() {
71
  console.log('inited!');
 
72
 
73
  startBtn.disabled = false;
74
 
 
80
  let mediaStream;
81
 
82
  let expectedSampleRate = 16000;
83
+ let recordSampleRate; // the sampleRate of the microphone
84
+ let recorder = null; // the microphone
85
+ let leftchannel = []; // TODO: Use a single channel
86
 
87
+ let recordingLength = 0; // number of samples so far
88
 
89
  let recognizer = null;
90
  let recognizer_stream = null;
 
93
  console.log('getUserMedia supported.');
94
 
95
  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
96
+ const constraints = {audio : true};
97
 
98
  let onSuccess = function(stream) {
99
  if (!audioCtx) {
100
+ audioCtx = new AudioContext({sampleRate : 16000});
101
  }
102
  console.log(audioCtx);
103
  recordSampleRate = audioCtx.sampleRate;
 
147
  result = recognizer.getResult(recognizer_stream).text;
148
  }
149
 
 
150
  if (result.length > 0 && lastResult != result) {
151
  lastResult = result;
152
  }
 
160
  }
161
 
162
  textArea.value = getDisplayResult();
163
+ textArea.scrollTop = textArea.scrollHeight; // auto scroll
164
 
165
  let buf = new Int16Array(samples.length);
166
  for (var i = 0; i < samples.length; ++i) {
 
247
  };
248
  };
249
 
250
+ let onError = function(
251
+ err) { console.log('The following error occured: ' + err); };
 
252
 
253
  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
254
  } else {
 
256
  alert('getUserMedia not supported on your browser!');
257
  }
258
 
 
259
  // this function is copied/modified from
260
  // https://gist.github.com/meziantou/edb7217fddfbb70e899e
261
  function flatten(listOfSamples) {
 
281
 
282
  // http://soundfile.sapp.org/doc/WaveFormat/
283
  // F F I R
284
+ view.setUint32(0, 0x46464952, true); // chunkID
285
+ view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
286
  // E V A W
287
+ view.setUint32(8, 0x45564157, true); // format
288
+ //
289
  // t m f
290
+ view.setUint32(12, 0x20746d66, true); // subchunk1ID
291
+ view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
292
+ view.setUint32(20, 1, true); // audioFormat, 1 for PCM
293
+ view.setUint16(22, 1, true); // numChannels: 1 channel
294
+ view.setUint32(24, expectedSampleRate, true); // sampleRate
295
+ view.setUint32(28, expectedSampleRate * 2, true); // byteRate
296
+ view.setUint16(32, 2, true); // blockAlign
297
+ view.setUint16(34, 16, true); // bitsPerSample
298
+ view.setUint32(36, 0x61746164, true); // Subchunk2ID
299
+ view.setUint32(40, samples.length * 2, true); // subchunk2Size
300
 
301
  let offset = 44;
302
  for (let i = 0; i < samples.length; ++i) {
 
304
  offset += 2;
305
  }
306
 
307
+ return new Blob([ view ], {type : 'audio/wav'});
308
  }
309
 
310
  // this function is copied from
index.html CHANGED
@@ -11,30 +11,70 @@
11
  textarea {
12
  width:100%;
13
  }
 
 
 
14
  </style>
15
  </head>
16
 
17
- <body>
18
  <h1>
19
  Next-gen Kaldi + WebAssembly<br/>
20
  ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
21
  (with Zipformer)
22
  </h1>
23
 
24
- <div>
25
- <span id="hint">Loading model ... ...</span>
26
- <br/>
27
- <br/>
28
- <button id="startBtn" disabled>Start</button>
29
- <button id="stopBtn" disabled>Stop</button>
30
- <button id="clearBtn">Clear</button>
31
- <br/>
32
- <br/>
33
- <textarea id="results" rows="10" readonly></textarea>
 
 
 
 
 
 
 
 
 
 
 
34
  </div>
35
 
36
- <section flex="1" overflow="auto" id="sound-clips">
37
- </section>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  <script src="sherpa-onnx-asr.js"></script>
40
  <script src="app-asr.js"></script>
 
11
  textarea {
12
  width:100%;
13
  }
14
+ .loading {
15
+ display: none !important;
16
+ }
17
  </style>
18
  </head>
19
 
20
+ <body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
21
  <h1>
22
  Next-gen Kaldi + WebAssembly<br/>
23
  ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
24
  (with Zipformer)
25
  </h1>
26
 
27
+ <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
28
+ <div id="status">Loading...</div>
29
+
30
+ <div id="singleAudioContent" class="tab-content loading">
31
+ <div style="display: flex; gap: 1.5rem;">
32
+ <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
33
+ <button id="startBtn" disabled>Start</button>
34
+ <button id="stopBtn" disabled>Stop</button>
35
+ <button id="clearBtn">Clear</button>
36
+ </div>
37
+ </div>
38
+
39
+ <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
40
+ <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; color: #6c757d;">Transcript</div>
41
+ <textarea id="results" rows="10" placeholder="Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
42
+ </div>
43
+ </div>
44
+
45
+ <section flex="1" overflow="auto" id="sound-clips">
46
+ </section>
47
+
48
  </div>
49
 
50
+ <!-- Footer Section -->
51
+ <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
52
+ <h3>Description</h3>
53
+ <ul>
54
+ <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
55
+ <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
56
+ <ul>
57
+ <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
58
+ <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
59
+ <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
60
+ </ul>
61
+ </ul>
62
+ <h3>About This Demo</h3>
63
+ <ul>
64
+ <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
65
+ <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
66
+ </ul>
67
+ <h3>Latest Update</h3>
68
+ <ul>
69
+ <li>Update UI.</li>
70
+ <li>First working version.</li>
71
+ </ul>
72
+
73
+ <h3>Acknowledgement</h3>
74
+ <ul>
75
+ <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
76
+ </ul>
77
+ </div>
78
 
79
  <script src="sherpa-onnx-asr.js"></script>
80
  <script src="app-asr.js"></script>