tomer9080 commited on
Commit
a8fb4f9
·
1 Parent(s): 2faff30

tempretaure removced, added multilingual

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. static/client.html +16 -4
  3. unified_socket_server.py +10 -4
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  *.ipynb
2
  old_demo_code/
3
  htokenf.txt
4
- __pycache__/
 
 
1
  *.ipynb
2
  old_demo_code/
3
  htokenf.txt
4
+ __pycache__/
5
+ .vscode/
static/client.html CHANGED
@@ -238,7 +238,13 @@
238
 
239
  <div class="config-group">
240
  <label for="chunkSize">Chunk Size (ms):</label>
241
- <input type="number" id="chunkSize" value="300" min="100" max="2000" step="100">
 
 
 
 
 
 
242
  </div>
243
 
244
  <div class="config-group">
@@ -247,8 +253,14 @@
247
  </div>
248
 
249
  <div class="config-group">
250
- <label for="temperature">Temperature:</label>
251
- <input type="number" id="temperature" value="0.0" min="0.0" max="1.0" step="0.1">
 
 
 
 
 
 
252
  </div>
253
  </div>
254
 
@@ -357,7 +369,7 @@
357
  model_size: document.getElementById('modelSize').value,
358
  chunk_size: parseInt(document.getElementById('chunkSize').value),
359
  beam_size: parseInt(document.getElementById('beamSize').value),
360
- temperature: parseFloat(document.getElementById('temperature').value)
361
  };
362
 
363
  log('Starting transcription session...');
 
238
 
239
  <div class="config-group">
240
  <label for="chunkSize">Chunk Size (ms):</label>
241
+ <select id="chunkSize">
242
+ <option value="40">40</option>
243
+ <option value="100" selected>100</option>
244
+ <option value="200">200</option>
245
+ <option value="300" selected>300</option>
246
+ <option value="1000">1000</option>
247
+ </select>
248
  </div>
249
 
250
  <div class="config-group">
 
253
  </div>
254
 
255
  <div class="config-group">
256
+ <label for="language">Language:</label>
257
+ <select id="language">
258
+ <option value="en" selected>English</option>
259
+ <option value="fr">French</option>
260
+ <option value="es">Spanish</option>
261
+ <option value="de">German</option>
262
+ <option value="pt">Portuguese</option>
263
+ </select>
264
  </div>
265
  </div>
266
 
 
369
  model_size: document.getElementById('modelSize').value,
370
  chunk_size: parseInt(document.getElementById('chunkSize').value),
371
  beam_size: parseInt(document.getElementById('beamSize').value),
372
+ language: document.getElementById('language').value
373
  };
374
 
375
  log('Starting transcription session...');
unified_socket_server.py CHANGED
@@ -126,7 +126,7 @@ class UnifiedTranscriptionServer:
126
  logger.info(f"Received config from {client_id}: {config}")
127
 
128
  # Validate config
129
- required_fields = ['model_size', 'chunk_size', 'beam_size', 'temperature']
130
  for field in required_fields:
131
  if field not in config:
132
  await ws.send_str(json.dumps({"error": f"Missing required field: {field}"}))
@@ -138,9 +138,16 @@ class UnifiedTranscriptionServer:
138
 
139
  logger.info(f"Loading model {model_size} for client {client_id}")
140
 
 
 
 
 
 
 
 
141
  # Try to use whisper_stream, fallback to regular whisper
142
  try:
143
- model = load_streaming_model_correct(model_size, chunk_size)
144
  client['first_chunk'] = True
145
  if torch.cuda.is_available():
146
  model = model.to("cuda")
@@ -236,12 +243,11 @@ class UnifiedTranscriptionServer:
236
  if hasattr(model, 'decode') and 'use_streaming' not in client:
237
  # Using whisper_stream
238
  decoding_options = DecodingOptions(
239
- language="en",
240
  gran=(config['chunk_size'] // 20),
241
  single_frame_mel=True,
242
  without_timestamps=True,
243
  beam_size=config['beam_size'],
244
- temperature=config['temperature'],
245
  stream_decode=True,
246
  use_ca_kv_cache=True,
247
  look_ahead_blocks=model.extra_gran_blocks
 
126
  logger.info(f"Received config from {client_id}: {config}")
127
 
128
  # Validate config
129
+ required_fields = ['model_size', 'chunk_size', 'beam_size', 'language']
130
  for field in required_fields:
131
  if field not in config:
132
  await ws.send_str(json.dumps({"error": f"Missing required field: {field}"}))
 
138
 
139
  logger.info(f"Loading model {model_size} for client {client_id}")
140
 
141
+ # Check - if language is other than english, throw an error.
142
+ # Only large-v2 300msec is available.
143
+ if multilingual := config['language'] != "en":
144
+ if model_size != "large-v2" or chunk_size != 300:
145
+ await ws.send_str(json.dumps({"error": f"Running multilingual transcription is available for now only on large-v2 model using chunk size of 300ms."}))
146
+ return
147
+
148
  # Try to use whisper_stream, fallback to regular whisper
149
  try:
150
+ model = load_streaming_model_correct(model_size, chunk_size, multilingual)
151
  client['first_chunk'] = True
152
  if torch.cuda.is_available():
153
  model = model.to("cuda")
 
243
  if hasattr(model, 'decode') and 'use_streaming' not in client:
244
  # Using whisper_stream
245
  decoding_options = DecodingOptions(
246
+ language=config['language'],
247
  gran=(config['chunk_size'] // 20),
248
  single_frame_mel=True,
249
  without_timestamps=True,
250
  beam_size=config['beam_size'],
 
251
  stream_decode=True,
252
  use_ca_kv_cache=True,
253
  look_ahead_blocks=model.extra_gran_blocks