AshDavid12 commited on
Commit
da37a77
·
1 Parent(s): d7b2452

added tmp dir for wav file

Browse files
Files changed (2) hide show
  1. client.py +1 -1
  2. infer.py +30 -12
client.py CHANGED
@@ -39,7 +39,7 @@ async def send_audio(websocket):
39
  break # End of file
40
 
41
  await websocket.send(pcm_chunk) # Send raw PCM data chunk
42
- print(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
43
  await asyncio.sleep(0.01) # Simulate real-time sending
44
 
45
  else:
 
39
  break # End of file
40
 
41
  await websocket.send(pcm_chunk) # Send raw PCM data chunk
42
+ #print(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
43
  await asyncio.sleep(0.01) # Simulate real-time sending
44
 
45
  else:
infer.py CHANGED
@@ -197,6 +197,12 @@ async def websocket_transcribe(websocket: WebSocket):
197
  channels = 1 # Mono
198
  sample_width = 2 # 2 bytes per sample (16-bit audio)
199
 
 
 
 
 
 
 
200
  while True:
201
  try:
202
  # Receive the next chunk of PCM audio data
@@ -217,19 +223,27 @@ async def websocket_transcribe(websocket: WebSocket):
217
  if accumulated_audio_time >= min_transcription_time:
218
  logging.info("Buffered enough audio time, starting transcription.")
219
 
220
- # Create a temporary WAV file from the accumulated PCM data
221
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
222
- with wave.open(temp_wav_file.name, 'wb') as wav_file:
223
- wav_file.setnchannels(channels)
224
- wav_file.setsampwidth(sample_width)
225
- wav_file.setframerate(sample_rate)
226
- wav_file.writeframes(pcm_audio_buffer)
 
 
227
 
228
- logging.info(f"Temporary WAV file created at {temp_wav_file.name} for transcription.")
 
 
 
 
 
 
229
 
230
- # Call the transcription function with the WAV file
231
- partial_result, last_transcribed_time = transcribe_core_ws(temp_wav_file.name, last_transcribed_time)
232
- processed_segments.extend(partial_result['new_segments'])
233
 
234
  # Clear the buffer after transcription
235
  pcm_audio_buffer.clear()
@@ -243,6 +257,11 @@ async def websocket_transcribe(websocket: WebSocket):
243
  logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
244
  await websocket.send_json(response)
245
 
 
 
 
 
 
246
  except WebSocketDisconnect:
247
  logging.info("WebSocket connection closed by the client.")
248
  break
@@ -254,7 +273,6 @@ async def websocket_transcribe(websocket: WebSocket):
254
  finally:
255
  logging.info("Cleaning up and closing WebSocket connection.")
256
 
257
-
258
  from fastapi.responses import FileResponse
259
 
260
 
 
197
  channels = 1 # Mono
198
  sample_width = 2 # 2 bytes per sample (16-bit audio)
199
 
200
+ # Ensure the /tmp directory exists
201
+ tmp_directory = "/tmp"
202
+ if not os.path.exists(tmp_directory):
203
+ logging.info(f"Creating /tmp directory: {tmp_directory}")
204
+ os.makedirs(tmp_directory)
205
+
206
  while True:
207
  try:
208
  # Receive the next chunk of PCM audio data
 
223
  if accumulated_audio_time >= min_transcription_time:
224
  logging.info("Buffered enough audio time, starting transcription.")
225
 
226
+ # Create a temporary WAV file in /tmp for transcription
227
+ temp_wav_path = os.path.join(tmp_directory, f"temp_audio_{last_transcribed_time}.wav")
228
+ with wave.open(temp_wav_path, 'wb') as wav_file:
229
+ wav_file.setnchannels(channels)
230
+ wav_file.setsampwidth(sample_width)
231
+ wav_file.setframerate(sample_rate)
232
+ wav_file.writeframes(pcm_audio_buffer)
233
+
234
+ logging.info(f"Temporary WAV file created at {temp_wav_path} for transcription.")
235
 
236
+ # Log to confirm that the file exists and has the expected size
237
+ if os.path.exists(temp_wav_path):
238
+ file_size = os.path.getsize(temp_wav_path)
239
+ logging.info(f"Temporary WAV file size: {file_size} bytes.")
240
+ else:
241
+ logging.error(f"Temporary WAV file {temp_wav_path} does not exist.")
242
+ raise Exception(f"Temporary WAV file {temp_wav_path} not found.")
243
 
244
+ # Call the transcription function with the WAV file path
245
+ partial_result, last_transcribed_time = transcribe_core_ws(temp_wav_path, last_transcribed_time)
246
+ processed_segments.extend(partial_result['new_segments'])
247
 
248
  # Clear the buffer after transcription
249
  pcm_audio_buffer.clear()
 
257
  logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
258
  await websocket.send_json(response)
259
 
260
+ # Optionally delete the temporary WAV file after processing
261
+ if os.path.exists(temp_wav_path):
262
+ os.remove(temp_wav_path)
263
+ logging.info(f"Temporary WAV file {temp_wav_path} removed.")
264
+
265
  except WebSocketDisconnect:
266
  logging.info("WebSocket connection closed by the client.")
267
  break
 
273
  finally:
274
  logging.info("Cleaning up and closing WebSocket connection.")
275
 
 
276
  from fastapi.responses import FileResponse
277
 
278