AshDavid12 commited on
Commit
e9d738a
·
1 Parent(s): 7c685fa

process audio back array diemntion error

Browse files
Files changed (1) hide show
  1. infer.py +13 -13
infer.py CHANGED
@@ -141,7 +141,7 @@ async def websocket_endpoint(websocket: WebSocket):
141
  async def process_audio_stream(websocket: WebSocket):
142
  """Continuously receive audio chunks and initiate transcription tasks."""
143
  sampling_rate = 16000
144
- min_chunk_size = 1 # in seconds
145
  audio_buffer = np.array([], dtype=np.float32)
146
 
147
  transcription_task = None
@@ -160,10 +160,10 @@ async def process_audio_stream(websocket: WebSocket):
160
  total_bytes_received += chunk_size
161
  #logger.debug(f"Received chunk {chunk_counter}: {chunk_size} bytes")
162
 
163
- #audio_chunk = process_received_audio(data)
164
  #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
165
 
166
- audio_buffer = np.concatenate((audio_buffer, data))
167
  #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
168
  except Exception as e:
169
  logger.error(f"Error receiving data: {e}")
@@ -238,16 +238,16 @@ def sync_transcribe_audio(audio_data):
238
  logger.error(f"Transcription error: {e}")
239
  return {}
240
 
241
- # def process_received_audio(data):
242
- # """Convert received bytes into normalized float32 NumPy array."""
243
- # #logger.debug(f"Processing received audio data of size {len(data)} bytes")
244
- # audio_int16 = np.frombuffer(data, dtype=np.int16)
245
- # #logger.debug(f"Converted to int16 NumPy array with {len(audio_int16)} samples")
246
- #
247
- # audio_float32 = audio_int16.astype(np.float32) / 32768.0 # Normalize to [-1, 1]
248
- # #logger.debug(f"Normalized audio data to float32 with {len(audio_float32)} samples")
249
- #
250
- # return audio_float32
251
 
252
 
253
 
 
141
  async def process_audio_stream(websocket: WebSocket):
142
  """Continuously receive audio chunks and initiate transcription tasks."""
143
  sampling_rate = 16000
144
+ min_chunk_size = 5 # in seconds
145
  audio_buffer = np.array([], dtype=np.float32)
146
 
147
  transcription_task = None
 
160
  total_bytes_received += chunk_size
161
  #logger.debug(f"Received chunk {chunk_counter}: {chunk_size} bytes")
162
 
163
+ audio_chunk = process_received_audio(data)
164
  #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
165
 
166
+ audio_buffer = np.concatenate((audio_buffer, audio_chunk))
167
  #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
168
  except Exception as e:
169
  logger.error(f"Error receiving data: {e}")
 
238
  logger.error(f"Transcription error: {e}")
239
  return {}
240
 
241
+ def process_received_audio(data):
242
+ """Convert received bytes into normalized float32 NumPy array."""
243
+ #logger.debug(f"Processing received audio data of size {len(data)} bytes")
244
+ audio_int16 = np.frombuffer(data, dtype=np.int16)
245
+ #logger.debug(f"Converted to int16 NumPy array with {len(audio_int16)} samples")
246
+
247
+ audio_float32 = audio_int16.astype(np.float32) / 32768.0 # Normalize to [-1, 1]
248
+ #logger.debug(f"Normalized audio data to float32 with {len(audio_float32)} samples")
249
+
250
+ return audio_float32
251
 
252
 
253