AshDavid12 commited on
Commit
ea326b2
·
1 Parent(s): aba4a2e

more logs also took off new segs part from core

Browse files
Files changed (1) hide show
  1. infer.py +21 -24
infer.py CHANGED
@@ -133,7 +133,7 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
133
  """
134
  logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
135
 
136
- ret = {'new_segments': []}
137
  new_last_transcribed_time = last_transcribed_time
138
 
139
  try:
@@ -148,26 +148,19 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
148
  # Track the new segments and update the last transcribed time
149
  for s in segs:
150
  logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
 
151
 
152
- # Only process segments that start after the last transcribed time
153
- if s.start >= last_transcribed_time:
154
- logging.info(f"New segment found starting at {s.start} seconds.")
155
- words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
 
 
 
156
 
157
- seg = {
158
- 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
159
- 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
160
- 'no_speech_prob': s.no_speech_prob, 'words': words
161
- }
162
- logging.info(f'Adding new transcription segment: {seg}')
163
- ret['new_segments'].append(seg)
164
-
165
- # Update the last transcribed time to the end of the current segment
166
- new_last_transcribed_time = max(new_last_transcribed_time, s.end)
167
- logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
168
 
169
  #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
170
- return ret, new_last_transcribed_time
171
 
172
 
173
  import tempfile
@@ -218,7 +211,7 @@ async def websocket_transcribe(websocket: WebSocket):
218
  logging.info("WebSocket connection established successfully.")
219
 
220
  try:
221
- processed_segments = [] # Keeps track of the segments already transcribed
222
  accumulated_audio_time = 0 # Track the total audio duration accumulated
223
  last_transcribed_time = 0.0
224
  min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
@@ -244,6 +237,8 @@ async def websocket_transcribe(websocket: WebSocket):
244
  # Receive the next chunk of PCM audio data
245
  logging.info("in try before recive ")
246
  audio_chunk = await websocket.receive_bytes()
 
 
247
  logging.info("after recieve")
248
  sys.stdout.flush()
249
  if not audio_chunk:
@@ -252,6 +247,9 @@ async def websocket_transcribe(websocket: WebSocket):
252
 
253
  # Accumulate the raw PCM data into the buffer
254
  pcm_audio_buffer.extend(audio_chunk)
 
 
 
255
 
256
  # Validate the PCM data after each chunk
257
  if not validate_pcm_data(pcm_audio_buffer, sample_rate, channels, sample_width):
@@ -282,7 +280,7 @@ async def websocket_transcribe(websocket: WebSocket):
282
  temp_wav_file.flush()
283
 
284
  if not validate_wav_file(temp_wav_file.name):
285
- logging.error(f"Invalid WAV file created: {temp_wav_file.name}")
286
  await websocket.send_json({"error": "Invalid WAV file created."})
287
  return
288
 
@@ -297,9 +295,9 @@ async def websocket_transcribe(websocket: WebSocket):
297
  raise Exception(f"Temporary WAV file {temp_wav_file.name} not found.")
298
 
299
  # Call the transcription function with the WAV file path
300
- partial_result, last_transcribed_time = transcribe_core_ws(temp_wav_file.name,
301
  last_transcribed_time)
302
- processed_segments.extend(partial_result['new_segments'])
303
 
304
  # Clear the buffer after transcription
305
  pcm_audio_buffer.clear()
@@ -307,10 +305,9 @@ async def websocket_transcribe(websocket: WebSocket):
307
 
308
  # Send the transcription result back to the client with both new and all processed segments
309
  response = {
310
- "new_segments": partial_result['new_segments'],
311
- "processed_segments": processed_segments
312
  }
313
- logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
314
  await websocket.send_json(response)
315
 
316
  # Optionally delete the temporary WAV file after processing
 
133
  """
134
  logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
135
 
136
+ ret = {'segments': []}
137
  new_last_transcribed_time = last_transcribed_time
138
 
139
  try:
 
148
  # Track the new segments and update the last transcribed time
149
  for s in segs:
150
  logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
151
+ words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
152
 
153
+ seg = {
154
+ 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
155
+ 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
156
+ 'no_speech_prob': s.no_speech_prob, 'words': words
157
+ }
158
+ logging.info(f'Adding new transcription segment: {seg}')
159
+ ret['segments'].append(seg)
160
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
163
+ return ret
164
 
165
 
166
  import tempfile
 
211
  logging.info("WebSocket connection established successfully.")
212
 
213
  try:
214
+ segments = [] # Keeps track of the segments already transcribed
215
  accumulated_audio_time = 0 # Track the total audio duration accumulated
216
  last_transcribed_time = 0.0
217
  min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
 
237
  # Receive the next chunk of PCM audio data
238
  logging.info("in try before recive ")
239
  audio_chunk = await websocket.receive_bytes()
240
+ logging.info(f"type of audio chunk : {type(audio_chunk)}")
241
+
242
  logging.info("after recieve")
243
  sys.stdout.flush()
244
  if not audio_chunk:
 
247
 
248
  # Accumulate the raw PCM data into the buffer
249
  pcm_audio_buffer.extend(audio_chunk)
250
+ print(f"type of pcm buffer: {type(pcm_audio_buffer)}")
251
+ print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
252
+ logging.info("after buffer extend")
253
 
254
  # Validate the PCM data after each chunk
255
  if not validate_pcm_data(pcm_audio_buffer, sample_rate, channels, sample_width):
 
280
  temp_wav_file.flush()
281
 
282
  if not validate_wav_file(temp_wav_file.name):
283
+ logging.error(f"Invalid WAV file created: {temp_wav_file.name}, type of file {type(temp_wav_file.name)}")
284
  await websocket.send_json({"error": "Invalid WAV file created."})
285
  return
286
 
 
295
  raise Exception(f"Temporary WAV file {temp_wav_file.name} not found.")
296
 
297
  # Call the transcription function with the WAV file path
298
+ partial_result = transcribe_core_ws(temp_wav_file.name,
299
  last_transcribed_time)
300
+ segments.extend(partial_result['segments'])
301
 
302
  # Clear the buffer after transcription
303
  pcm_audio_buffer.clear()
 
305
 
306
  # Send the transcription result back to the client with both new and all processed segments
307
  response = {
308
+ "segments": segments
 
309
  }
310
+ logging.info(f"Sending {len(partial_result['segments'])} segments to the client.")
311
  await websocket.send_json(response)
312
 
313
  # Optionally delete the temporary WAV file after processing