Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
e9d738a
1
Parent(s):
7c685fa
process audio back array diemntion error
Browse files
infer.py
CHANGED
@@ -141,7 +141,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
141 |
async def process_audio_stream(websocket: WebSocket):
|
142 |
"""Continuously receive audio chunks and initiate transcription tasks."""
|
143 |
sampling_rate = 16000
|
144 |
-
min_chunk_size =
|
145 |
audio_buffer = np.array([], dtype=np.float32)
|
146 |
|
147 |
transcription_task = None
|
@@ -160,10 +160,10 @@ async def process_audio_stream(websocket: WebSocket):
|
|
160 |
total_bytes_received += chunk_size
|
161 |
#logger.debug(f"Received chunk {chunk_counter}: {chunk_size} bytes")
|
162 |
|
163 |
-
|
164 |
#logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
|
165 |
|
166 |
-
audio_buffer = np.concatenate((audio_buffer,
|
167 |
#logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
|
168 |
except Exception as e:
|
169 |
logger.error(f"Error receiving data: {e}")
|
@@ -238,16 +238,16 @@ def sync_transcribe_audio(audio_data):
|
|
238 |
logger.error(f"Transcription error: {e}")
|
239 |
return {}
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
#
|
244 |
-
|
245 |
-
#
|
246 |
-
|
247 |
-
|
248 |
-
#
|
249 |
-
|
250 |
-
|
251 |
|
252 |
|
253 |
|
|
|
141 |
async def process_audio_stream(websocket: WebSocket):
|
142 |
"""Continuously receive audio chunks and initiate transcription tasks."""
|
143 |
sampling_rate = 16000
|
144 |
+
min_chunk_size = 5 # in seconds
|
145 |
audio_buffer = np.array([], dtype=np.float32)
|
146 |
|
147 |
transcription_task = None
|
|
|
160 |
total_bytes_received += chunk_size
|
161 |
#logger.debug(f"Received chunk {chunk_counter}: {chunk_size} bytes")
|
162 |
|
163 |
+
audio_chunk = process_received_audio(data)
|
164 |
#logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
|
165 |
|
166 |
+
audio_buffer = np.concatenate((audio_buffer, audio_chunk))
|
167 |
#logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
|
168 |
except Exception as e:
|
169 |
logger.error(f"Error receiving data: {e}")
|
|
|
238 |
logger.error(f"Transcription error: {e}")
|
239 |
return {}
|
240 |
|
241 |
+
def process_received_audio(data):
|
242 |
+
"""Convert received bytes into normalized float32 NumPy array."""
|
243 |
+
#logger.debug(f"Processing received audio data of size {len(data)} bytes")
|
244 |
+
audio_int16 = np.frombuffer(data, dtype=np.int16)
|
245 |
+
#logger.debug(f"Converted to int16 NumPy array with {len(audio_int16)} samples")
|
246 |
+
|
247 |
+
audio_float32 = audio_int16.astype(np.float32) / 32768.0 # Normalize to [-1, 1]
|
248 |
+
#logger.debug(f"Normalized audio data to float32 with {len(audio_float32)} samples")
|
249 |
+
|
250 |
+
return audio_float32
|
251 |
|
252 |
|
253 |
|