Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
ea326b2
1
Parent(s):
aba4a2e
more logs also took off new segs part from core
Browse files
infer.py
CHANGED
@@ -133,7 +133,7 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
|
|
133 |
"""
|
134 |
logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
|
135 |
|
136 |
-
ret = {'
|
137 |
new_last_transcribed_time = last_transcribed_time
|
138 |
|
139 |
try:
|
@@ -148,26 +148,19 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
|
|
148 |
# Track the new segments and update the last transcribed time
|
149 |
for s in segs:
|
150 |
logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
|
|
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
156 |
|
157 |
-
seg = {
|
158 |
-
'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
|
159 |
-
'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
|
160 |
-
'no_speech_prob': s.no_speech_prob, 'words': words
|
161 |
-
}
|
162 |
-
logging.info(f'Adding new transcription segment: {seg}')
|
163 |
-
ret['new_segments'].append(seg)
|
164 |
-
|
165 |
-
# Update the last transcribed time to the end of the current segment
|
166 |
-
new_last_transcribed_time = max(new_last_transcribed_time, s.end)
|
167 |
-
logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
|
168 |
|
169 |
#logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
|
170 |
-
return ret
|
171 |
|
172 |
|
173 |
import tempfile
|
@@ -218,7 +211,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
218 |
logging.info("WebSocket connection established successfully.")
|
219 |
|
220 |
try:
|
221 |
-
|
222 |
accumulated_audio_time = 0 # Track the total audio duration accumulated
|
223 |
last_transcribed_time = 0.0
|
224 |
min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
|
@@ -244,6 +237,8 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
244 |
# Receive the next chunk of PCM audio data
|
245 |
logging.info("in try before recive ")
|
246 |
audio_chunk = await websocket.receive_bytes()
|
|
|
|
|
247 |
logging.info("after recieve")
|
248 |
sys.stdout.flush()
|
249 |
if not audio_chunk:
|
@@ -252,6 +247,9 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
252 |
|
253 |
# Accumulate the raw PCM data into the buffer
|
254 |
pcm_audio_buffer.extend(audio_chunk)
|
|
|
|
|
|
|
255 |
|
256 |
# Validate the PCM data after each chunk
|
257 |
if not validate_pcm_data(pcm_audio_buffer, sample_rate, channels, sample_width):
|
@@ -282,7 +280,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
282 |
temp_wav_file.flush()
|
283 |
|
284 |
if not validate_wav_file(temp_wav_file.name):
|
285 |
-
logging.error(f"Invalid WAV file created: {temp_wav_file.name}")
|
286 |
await websocket.send_json({"error": "Invalid WAV file created."})
|
287 |
return
|
288 |
|
@@ -297,9 +295,9 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
297 |
raise Exception(f"Temporary WAV file {temp_wav_file.name} not found.")
|
298 |
|
299 |
# Call the transcription function with the WAV file path
|
300 |
-
partial_result
|
301 |
last_transcribed_time)
|
302 |
-
|
303 |
|
304 |
# Clear the buffer after transcription
|
305 |
pcm_audio_buffer.clear()
|
@@ -307,10 +305,9 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
307 |
|
308 |
# Send the transcription result back to the client with both new and all processed segments
|
309 |
response = {
|
310 |
-
"
|
311 |
-
"processed_segments": processed_segments
|
312 |
}
|
313 |
-
logging.info(f"Sending {len(partial_result['
|
314 |
await websocket.send_json(response)
|
315 |
|
316 |
# Optionally delete the temporary WAV file after processing
|
|
|
133 |
"""
|
134 |
logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
|
135 |
|
136 |
+
ret = {'segments': []}
|
137 |
new_last_transcribed_time = last_transcribed_time
|
138 |
|
139 |
try:
|
|
|
148 |
# Track the new segments and update the last transcribed time
|
149 |
for s in segs:
|
150 |
logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
|
151 |
+
words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
|
152 |
|
153 |
+
seg = {
|
154 |
+
'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
|
155 |
+
'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
|
156 |
+
'no_speech_prob': s.no_speech_prob, 'words': words
|
157 |
+
}
|
158 |
+
logging.info(f'Adding new transcription segment: {seg}')
|
159 |
+
ret['segments'].append(seg)
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
#logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
|
163 |
+
return ret
|
164 |
|
165 |
|
166 |
import tempfile
|
|
|
211 |
logging.info("WebSocket connection established successfully.")
|
212 |
|
213 |
try:
|
214 |
+
segments = [] # Keeps track of the segments already transcribed
|
215 |
accumulated_audio_time = 0 # Track the total audio duration accumulated
|
216 |
last_transcribed_time = 0.0
|
217 |
min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
|
|
|
237 |
# Receive the next chunk of PCM audio data
|
238 |
logging.info("in try before recive ")
|
239 |
audio_chunk = await websocket.receive_bytes()
|
240 |
+
logging.info(f"type of audio chunk : {type(audio_chunk)}")
|
241 |
+
|
242 |
logging.info("after recieve")
|
243 |
sys.stdout.flush()
|
244 |
if not audio_chunk:
|
|
|
247 |
|
248 |
# Accumulate the raw PCM data into the buffer
|
249 |
pcm_audio_buffer.extend(audio_chunk)
|
250 |
+
print(f"type of pcm buffer: {type(pcm_audio_buffer)}")
|
251 |
+
print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
|
252 |
+
logging.info("after buffer extend")
|
253 |
|
254 |
# Validate the PCM data after each chunk
|
255 |
if not validate_pcm_data(pcm_audio_buffer, sample_rate, channels, sample_width):
|
|
|
280 |
temp_wav_file.flush()
|
281 |
|
282 |
if not validate_wav_file(temp_wav_file.name):
|
283 |
+
logging.error(f"Invalid WAV file created: {temp_wav_file.name}, type of file {type(temp_wav_file.name)}")
|
284 |
await websocket.send_json({"error": "Invalid WAV file created."})
|
285 |
return
|
286 |
|
|
|
295 |
raise Exception(f"Temporary WAV file {temp_wav_file.name} not found.")
|
296 |
|
297 |
# Call the transcription function with the WAV file path
|
298 |
+
partial_result = transcribe_core_ws(temp_wav_file.name,
|
299 |
last_transcribed_time)
|
300 |
+
segments.extend(partial_result['segments'])
|
301 |
|
302 |
# Clear the buffer after transcription
|
303 |
pcm_audio_buffer.clear()
|
|
|
305 |
|
306 |
# Send the transcription result back to the client with both new and all processed segments
|
307 |
response = {
|
308 |
+
"segments": segments
|
|
|
309 |
}
|
310 |
+
logging.info(f"Sending {len(partial_result['segments'])} segments to the client.")
|
311 |
await websocket.send_json(response)
|
312 |
|
313 |
# Optionally delete the temporary WAV file after processing
|