Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
228afd7
1
Parent(s):
5b3e211
added asyncio to thread
Browse files
infer.py
CHANGED
@@ -126,14 +126,14 @@ async def read_root():
|
|
126 |
import tempfile
|
127 |
|
128 |
|
129 |
-
def transcribe_core_ws(audio_file):
|
130 |
|
131 |
ret = {'segments': []}
|
132 |
|
133 |
try:
|
134 |
# Transcribe the entire audio file
|
135 |
logging.debug(f"Initiating model transcription for file: {audio_file}")
|
136 |
-
segs, _ = model.transcribe
|
137 |
logging.info('Transcription completed successfully.')
|
138 |
except Exception as e:
|
139 |
logging.error(f"Error during transcription: {e}")
|
@@ -231,7 +231,6 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
231 |
# Receive the next chunk of PCM audio data
|
232 |
logging.info("in try before recive ")
|
233 |
audio_chunk = await asyncio.wait_for(websocket.receive_bytes(), timeout=10.0)
|
234 |
-
logging.info(f"type of audio chunk : {type(audio_chunk)}")
|
235 |
|
236 |
logging.info("after recieve")
|
237 |
sys.stdout.flush()
|
@@ -241,7 +240,6 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
241 |
|
242 |
# Accumulate the raw PCM data into the buffer
|
243 |
pcm_audio_buffer.extend(audio_chunk)
|
244 |
-
print(f"type of pcm buffer: {type(pcm_audio_buffer)}")
|
245 |
print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
|
246 |
logging.info("after buffer extend")
|
247 |
|
@@ -274,7 +272,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
274 |
temp_wav_file.flush()
|
275 |
|
276 |
if not validate_wav_file(temp_wav_file.name):
|
277 |
-
logging.error(f"Invalid WAV file created: {temp_wav_file.name}
|
278 |
await websocket.send_json({"error": "Invalid WAV file created."})
|
279 |
return
|
280 |
|
@@ -290,7 +288,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
290 |
|
291 |
with open(temp_wav_file.name, 'rb') as audio_file:
|
292 |
audio_data = audio_file.read()
|
293 |
-
partial_result = transcribe_core_ws
|
294 |
segments.extend(partial_result['segments'])
|
295 |
|
296 |
# Clear the buffer after transcription
|
|
|
126 |
import tempfile
|
127 |
|
128 |
|
129 |
+
async def transcribe_core_ws(audio_file):
|
130 |
|
131 |
ret = {'segments': []}
|
132 |
|
133 |
try:
|
134 |
# Transcribe the entire audio file
|
135 |
logging.debug(f"Initiating model transcription for file: {audio_file}")
|
136 |
+
segs, _ = await asyncio.to_thread(model.transcribe,audio_file, language='he', word_timestamps=True)
|
137 |
logging.info('Transcription completed successfully.')
|
138 |
except Exception as e:
|
139 |
logging.error(f"Error during transcription: {e}")
|
|
|
231 |
# Receive the next chunk of PCM audio data
|
232 |
logging.info("in try before recive ")
|
233 |
audio_chunk = await asyncio.wait_for(websocket.receive_bytes(), timeout=10.0)
|
|
|
234 |
|
235 |
logging.info("after recieve")
|
236 |
sys.stdout.flush()
|
|
|
240 |
|
241 |
# Accumulate the raw PCM data into the buffer
|
242 |
pcm_audio_buffer.extend(audio_chunk)
|
|
|
243 |
print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
|
244 |
logging.info("after buffer extend")
|
245 |
|
|
|
272 |
temp_wav_file.flush()
|
273 |
|
274 |
if not validate_wav_file(temp_wav_file.name):
|
275 |
+
logging.error(f"Invalid WAV file created: {temp_wav_file.name}")
|
276 |
await websocket.send_json({"error": "Invalid WAV file created."})
|
277 |
return
|
278 |
|
|
|
288 |
|
289 |
with open(temp_wav_file.name, 'rb') as audio_file:
|
290 |
audio_data = audio_file.read()
|
291 |
+
partial_result = await asyncio.to_thread(transcribe_core_ws,audio_data)
|
292 |
segments.extend(partial_result['segments'])
|
293 |
|
294 |
# Clear the buffer after transcription
|