AshDavid12 commited on
Commit
228afd7
·
1 Parent(s): 5b3e211

added asyncio to thread

Browse files
Files changed (1) hide show
  1. infer.py +4 -6
infer.py CHANGED
@@ -126,14 +126,14 @@ async def read_root():
126
  import tempfile
127
 
128
 
129
- def transcribe_core_ws(audio_file):
130
 
131
  ret = {'segments': []}
132
 
133
  try:
134
  # Transcribe the entire audio file
135
  logging.debug(f"Initiating model transcription for file: {audio_file}")
136
- segs, _ = model.transcribe(audio_file, language='he', word_timestamps=True)
137
  logging.info('Transcription completed successfully.')
138
  except Exception as e:
139
  logging.error(f"Error during transcription: {e}")
@@ -231,7 +231,6 @@ async def websocket_transcribe(websocket: WebSocket):
231
  # Receive the next chunk of PCM audio data
232
  logging.info("in try before recive ")
233
  audio_chunk = await asyncio.wait_for(websocket.receive_bytes(), timeout=10.0)
234
- logging.info(f"type of audio chunk : {type(audio_chunk)}")
235
 
236
  logging.info("after recieve")
237
  sys.stdout.flush()
@@ -241,7 +240,6 @@ async def websocket_transcribe(websocket: WebSocket):
241
 
242
  # Accumulate the raw PCM data into the buffer
243
  pcm_audio_buffer.extend(audio_chunk)
244
- print(f"type of pcm buffer: {type(pcm_audio_buffer)}")
245
  print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
246
  logging.info("after buffer extend")
247
 
@@ -274,7 +272,7 @@ async def websocket_transcribe(websocket: WebSocket):
274
  temp_wav_file.flush()
275
 
276
  if not validate_wav_file(temp_wav_file.name):
277
- logging.error(f"Invalid WAV file created: {temp_wav_file.name}, type of file {type(temp_wav_file.name)}")
278
  await websocket.send_json({"error": "Invalid WAV file created."})
279
  return
280
 
@@ -290,7 +288,7 @@ async def websocket_transcribe(websocket: WebSocket):
290
 
291
  with open(temp_wav_file.name, 'rb') as audio_file:
292
  audio_data = audio_file.read()
293
- partial_result = transcribe_core_ws(audio_data)
294
  segments.extend(partial_result['segments'])
295
 
296
  # Clear the buffer after transcription
 
126
  import tempfile
127
 
128
 
129
+ async def transcribe_core_ws(audio_file):
130
 
131
  ret = {'segments': []}
132
 
133
  try:
134
  # Transcribe the entire audio file
135
  logging.debug(f"Initiating model transcription for file: {audio_file}")
136
+ segs, _ = await asyncio.to_thread(model.transcribe,audio_file, language='he', word_timestamps=True)
137
  logging.info('Transcription completed successfully.')
138
  except Exception as e:
139
  logging.error(f"Error during transcription: {e}")
 
231
  # Receive the next chunk of PCM audio data
232
  logging.info("in try before recive ")
233
  audio_chunk = await asyncio.wait_for(websocket.receive_bytes(), timeout=10.0)
 
234
 
235
  logging.info("after recieve")
236
  sys.stdout.flush()
 
240
 
241
  # Accumulate the raw PCM data into the buffer
242
  pcm_audio_buffer.extend(audio_chunk)
 
243
  print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
244
  logging.info("after buffer extend")
245
 
 
272
  temp_wav_file.flush()
273
 
274
  if not validate_wav_file(temp_wav_file.name):
275
+ logging.error(f"Invalid WAV file created: {temp_wav_file.name}")
276
  await websocket.send_json({"error": "Invalid WAV file created."})
277
  return
278
 
 
288
 
289
  with open(temp_wav_file.name, 'rb') as audio_file:
290
  audio_data = audio_file.read()
291
+ partial_result = await asyncio.to_thread(transcribe_core_ws,audio_data)
292
  segments.extend(partial_result['segments'])
293
 
294
  # Clear the buffer after transcription