aviadr1 commited on
Commit
dd0871e
·
2 Parent(s): 861eb71 228afd7

Merge branch 'main' of https://huggingface.co/spaces/Gigaverse/ivrit-ai-streaming

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. client.py +104 -29
  3. downloaded_audio.wav +0 -1
  4. infer.py +148 -99
  5. pyproject.toml +7 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.wav
2
+ *.ogg
client.py CHANGED
@@ -1,36 +1,111 @@
1
  import asyncio
2
  import json
 
 
3
 
4
  import websockets
5
  import requests
6
  import ssl
 
 
 
 
 
7
 
8
  # Parameters for reading and sending the audio
9
  AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  async def send_audio(websocket):
12
- buffer_size = 1024*512 # Buffer audio chunks up to 512KB before sending
13
- audio_buffer = bytearray()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
16
- if response.status_code == 200:
17
- print("Starting to stream audio file...")
 
 
 
 
 
 
 
 
18
 
19
- for chunk in response.iter_content(chunk_size=1024): # Stream in chunks
20
- if chunk:
21
- audio_buffer.extend(chunk)
22
- #print(f"Received audio chunk of size {len(chunk)} bytes.")
 
 
 
 
 
23
 
24
- # Send buffered audio data once it's large enough
25
- if len(audio_buffer) >= buffer_size:
26
- await websocket.send(audio_buffer)
27
- #print(f"Sent {len(audio_buffer)} bytes of audio data.")
28
- audio_buffer.clear()
29
- await asyncio.sleep(0.01)
30
 
31
- print("Finished sending audio.")
32
- else:
33
- print(f"Failed to download audio file. Status code: {response.status_code}")
34
 
35
  async def receive_transcription(websocket):
36
  while True:
@@ -38,17 +113,17 @@ async def receive_transcription(websocket):
38
  transcription = await websocket.recv() # Receive transcription from the server
39
  print(f"Transcription: {transcription}")
40
  transcription = json.loads(transcription)
41
- download_url = transcription.get('download_url')
42
- if download_url:
43
- print(f"Download URL: {download_url}")
44
- # Download the audio file
45
- response = requests.get(download_url)
46
- if response.status_code == 200:
47
- with open("downloaded_audio.wav", "wb") as f:
48
- f.write(response.content)
49
- print("File downloaded successfully")
50
- else:
51
- print(f"Failed to download file. Status code: {response.status_code}")
52
  except Exception as e:
53
  print(f"Error receiving transcription: {e}")
54
  break
 
1
  import asyncio
2
  import json
3
+ import logging
4
+ import wave
5
 
6
  import websockets
7
  import requests
8
  import ssl
9
+ import sys
10
+
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
12
+ handlers=[logging.StreamHandler(sys.stdout)], force=True)
13
+ logger = logging.getLogger(__name__)
14
 
15
  # Parameters for reading and sending the audio
16
  AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
17
 
18
+ from pydub import AudioSegment
19
+
20
+
21
+ # Convert and resample audio before writing it to WAV
22
+ # Convert and resample audio before writing it to WAV
23
+ def convert_to_mono_16k(audio_file_path):
24
+ logging.info(f"Starting audio conversion to mono and resampling to 16kHz for file: {audio_file_path}")
25
+
26
+ try:
27
+ # Load the audio file into an AudioSegment object
28
+ audio_segment = AudioSegment.from_file(audio_file_path, format="wav")
29
+
30
+ # Convert the audio to mono and resample it to 16kHz
31
+ audio_segment = audio_segment.set_channels(1).set_frame_rate(16000)
32
+
33
+ logging.info("Audio conversion to mono and 16kHz completed successfully.")
34
+ except Exception as e:
35
+ logging.error(f"Error during audio conversion: {e}")
36
+ raise e
37
+
38
+ # Return the modified AudioSegment object
39
+ return audio_segment
40
+
41
+
42
  async def send_audio(websocket):
43
+ print(f"hi")
44
+ buffer_size = 1024 * 16 # Send smaller chunks (16KB) for real-time processing
45
+ logging.info("Converting the audio to mono and 16kHz.")
46
+
47
+ try:
48
+ converted_audio = convert_to_mono_16k('test_copy.wav')
49
+ except Exception as e:
50
+ logging.error(f"Failed to convert audio: {e}")
51
+ return
52
+
53
+ # Send metadata to the server
54
+ metadata = {
55
+ 'sample_rate': 16000, # Resampled rate
56
+ 'channels': 1, # Converted to mono
57
+ 'sampwidth': 2 # Assuming 16-bit audio
58
+ }
59
+ await websocket.send(json.dumps(metadata))
60
+ logging.info(f"Sent metadata: {metadata}")
61
+
62
+ try:
63
+ raw_data = converted_audio.raw_data
64
+ logging.info(f"Starting to send raw PCM audio data. Total data size: {len(raw_data)} bytes.")
65
+
66
+ for i in range(0, len(raw_data), buffer_size):
67
+ pcm_chunk = raw_data[i:i + buffer_size]
68
+ await websocket.send(pcm_chunk) # Send raw PCM data chunk
69
+ #logging.info(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
70
+ await asyncio.sleep(0.01) # Simulate real-time sending
71
+
72
+ logging.info("Completed sending all audio data.")
73
+ except Exception as e:
74
+ logging.error(f"Error while sending audio data: {e}")
75
+
76
+ # Download the WAV file locally
77
+ # with requests.get(AUDIO_FILE_URL, stream=True) as response:
78
+ # if response.status_code == 200:
79
+ # with open('downloaded_audio.wav', 'wb') as f:
80
+ # for chunk in response.iter_content(chunk_size=1024):
81
+ # f.write(chunk)
82
+ # print("Audio file downloaded successfully.")
83
 
84
+ # Open the downloaded WAV file and extract PCM data
85
+ # with wave.open('test_copy.wav', 'rb') as wav_file:
86
+ # metadata = {
87
+ # 'sample_rate': wav_file.getframerate(),
88
+ # 'channels': wav_file.getnchannels(),
89
+ # 'sampwidth': wav_file.getsampwidth(),
90
+ # }
91
+ #
92
+ # # Send metadata to the server before sending the audio
93
+ # await websocket.send(json.dumps(metadata))
94
+ # print(f"Sent metadata: {metadata}")
95
 
96
+ # # Send the PCM audio data in chunks
97
+ # while True:
98
+ # pcm_chunk = wav_file.readframes(buffer_size)
99
+ # if not pcm_chunk:
100
+ # break # End of file
101
+ #
102
+ # await websocket.send(pcm_chunk) # Send raw PCM data chunk
103
+ # #print(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
104
+ # await asyncio.sleep(0.01) # Simulate real-time sending
105
 
106
+ # else:
107
+ # print(f"Failed to download audio file. Status code: {response.status_code}")
 
 
 
 
108
 
 
 
 
109
 
110
  async def receive_transcription(websocket):
111
  while True:
 
113
  transcription = await websocket.recv() # Receive transcription from the server
114
  print(f"Transcription: {transcription}")
115
  transcription = json.loads(transcription)
116
+ #download_url = transcription.get('download_url')
117
+ # if download_url:
118
+ # print(f"Download URL: {download_url}")
119
+ # # Download the audio file
120
+ # response = requests.get(download_url)
121
+ # if response.status_code == 200:
122
+ # with open("downloaded_audio.wav", "wb") as f:
123
+ # f.write(response.content)
124
+ # print("File downloaded successfully")
125
+ # else:
126
+ # print(f"Failed to download file. Status code: {response.status_code}")
127
  except Exception as e:
128
  print(f"Error receiving transcription: {e}")
129
  break
downloaded_audio.wav DELETED
@@ -1 +0,0 @@
1
- {"error":"File not found"}
 
 
infer.py CHANGED
@@ -21,7 +21,7 @@ import asyncio
21
  from model import segment_to_dict
22
 
23
  # Configure logging
24
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s: %(message)s',
25
  handlers=[logging.StreamHandler(sys.stdout)], force=True)
26
  logger = logging.getLogger(__name__)
27
  #logging.getLogger("asyncio").setLevel(logging.DEBUG)
@@ -184,24 +184,14 @@ async def read_root():
184
  import tempfile
185
 
186
 
 
187
 
188
- def transcribe_core_ws(audio_file, last_transcribed_time):
189
- """
190
- Transcribe the audio file and return only the segments that have not been processed yet.
191
-
192
- :param audio_file: Path to the growing audio file.
193
- :param last_transcribed_time: The last time (in seconds) that was transcribed.
194
- :return: Newly transcribed segments and the updated last transcribed time.
195
- """
196
- logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
197
-
198
- ret = {'new_segments': []}
199
- new_last_transcribed_time = last_transcribed_time
200
 
201
  try:
202
  # Transcribe the entire audio file
203
  logging.debug(f"Initiating model transcription for file: {audio_file}")
204
- segs, _ = model.transcribe(audio_file, language='he', word_timestamps=True)
205
  logging.info('Transcription completed successfully.')
206
  except Exception as e:
207
  logging.error(f"Error during transcription: {e}")
@@ -210,31 +200,62 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
210
  # Track the new segments and update the last transcribed time
211
  for s in segs:
212
  logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
 
213
 
214
- # Only process segments that start after the last transcribed time
215
- if s.start >= last_transcribed_time:
216
- logging.info(f"New segment found starting at {s.start} seconds.")
217
- words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
 
 
 
218
 
219
- seg = {
220
- 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
221
- 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
222
- 'no_speech_prob': s.no_speech_prob, 'words': words
223
- }
224
- logging.info(f'Adding new transcription segment: {seg}')
225
- ret['new_segments'].append(seg)
226
-
227
- # Update the last transcribed time to the end of the current segment
228
- new_last_transcribed_time = max(new_last_transcribed_time, s.end)
229
- logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
230
 
231
  #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
232
- return ret, new_last_transcribed_time
233
 
234
 
235
  import tempfile
236
 
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  @app.websocket("/wtranscribe")
239
  async def websocket_transcribe(websocket: WebSocket):
240
  logging.info("New WebSocket connection request received.")
@@ -242,77 +263,111 @@ async def websocket_transcribe(websocket: WebSocket):
242
  logging.info("WebSocket connection established successfully.")
243
 
244
  try:
245
- processed_segments = [] # Keeps track of the segments already transcribed
246
- accumulated_audio_size = 0 # Track how much audio data has been buffered
247
  accumulated_audio_time = 0 # Track the total audio duration accumulated
248
  last_transcribed_time = 0.0
249
- #min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
250
-
251
- # A temporary file to store the growing audio data
252
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
253
- logging.info(f"Temporary audio file created at {temp_audio_file.name}")
254
- #temp_audio_filename = os.path.basename(temp_audio_file.name)
255
- output_directory = "/tmp"
256
- os.makedirs(output_directory, exist_ok=True)
257
- chunk_counter = 0
258
-
259
- while True:
260
- try:
261
- # Receive the next chunk of audio data
262
- audio_chunk = await websocket.receive_bytes()
263
- if not audio_chunk:
264
- logging.warning("Received empty audio chunk, skipping processing hey.")
265
- continue
266
-
267
-
268
- # Create a new file for the chunk
269
- chunk_filename = os.path.join(output_directory, f"audio_chunk_{chunk_counter}.wav")
270
- chunk_counter += 1
271
-
272
- with wave.open(chunk_filename, 'wb') as wav_file:
273
- wav_file.setnchannels(1) # Mono channel
274
- wav_file.setsampwidth(2) # 2 bytes per sample (16-bit audio)
275
- wav_file.setframerate(16000) # 16 kHz sample rate
276
- wav_file.writeframes(audio_chunk)
277
-
278
- # with open(chunk_filename, 'wb') as audio_file:
279
- # audio_file.write(audio_chunk)
280
-
281
- # Write audio chunk to file and accumulate size and time
282
- temp_audio_file.write(audio_chunk)
283
- temp_audio_file.flush()
284
- accumulated_audio_size += len(audio_chunk)
285
-
286
- # Estimate the duration of the chunk based on its size (e.g., 16kHz audio)
287
- chunk_duration = len(audio_chunk) / (16000 * 2) # Assuming 16kHz mono WAV (2 bytes per sample)
288
- accumulated_audio_time += chunk_duration
289
- logging.info(f"Received and buffered {len(audio_chunk)} bytes, total buffered: {accumulated_audio_size} bytes, total time: {accumulated_audio_time:.2f} seconds")
290
-
291
- # Transcribe when enough time (audio) is accumulated (e.g., at least 5 seconds of audio)
292
- #if accumulated_audio_time >= min_transcription_time:
293
- #logging.info("Buffered enough audio time, starting transcription.")
294
-
295
-
296
- # Call the transcription function with the last processed time
297
- partial_result, last_transcribed_time = transcribe_core_ws(temp_audio_file.name, last_transcribed_time)
298
- accumulated_audio_time = 0 # Reset the accumulated audio time
299
- processed_segments.extend(partial_result['new_segments'])
300
-
301
- # Reset the accumulated audio size after transcription
302
- accumulated_audio_size = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  # Send the transcription result back to the client with both new and all processed segments
305
  response = {
306
- "new_segments": partial_result['new_segments'],
307
- "processed_segments": processed_segments,
308
- "download_url": f"https://gigaverse-ivrit-ai-streaming.hf.space/download_audio/{os.path.basename(chunk_filename)}"
309
  }
310
- logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
311
  await websocket.send_json(response)
312
 
313
- except WebSocketDisconnect:
314
- logging.info("WebSocket connection closed by the client.")
315
- break
 
 
 
 
 
316
 
317
  except Exception as e:
318
  logging.error(f"Unexpected error during WebSocket transcription: {e}")
@@ -459,12 +514,6 @@ async def download_audio(filename: str):
459
  #
460
 
461
 
462
-
463
-
464
-
465
-
466
-
467
-
468
  # @app.websocket("/wtranscribe")
469
  # async def websocket_transcribe(websocket: WebSocket):
470
  # logging.info("New WebSocket connection request received.")
 
21
  from model import segment_to_dict
22
 
23
  # Configure logging
24
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
25
  handlers=[logging.StreamHandler(sys.stdout)], force=True)
26
  logger = logging.getLogger(__name__)
27
  #logging.getLogger("asyncio").setLevel(logging.DEBUG)
 
184
  import tempfile
185
 
186
 
187
+ async def transcribe_core_ws(audio_file):
188
 
189
+ ret = {'segments': []}
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  try:
192
  # Transcribe the entire audio file
193
  logging.debug(f"Initiating model transcription for file: {audio_file}")
194
+ segs, _ = await asyncio.to_thread(model.transcribe,audio_file, language='he', word_timestamps=True)
195
  logging.info('Transcription completed successfully.')
196
  except Exception as e:
197
  logging.error(f"Error during transcription: {e}")
 
200
  # Track the new segments and update the last transcribed time
201
  for s in segs:
202
  logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
203
+ words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
204
 
205
+ seg = {
206
+ 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
207
+ 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
208
+ 'no_speech_prob': s.no_speech_prob, 'words': words
209
+ }
210
+ logging.info(f'Adding new transcription segment: {seg}')
211
+ ret['segments'].append(seg)
212
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
215
+ return ret
216
 
217
 
218
  import tempfile
219
 
220
 
221
+ # Function to verify if the PCM data is valid
222
+ def validate_pcm_data(pcm_audio_buffer, sample_rate, channels, sample_width):
223
+ """Validates the PCM data buffer to ensure it conforms to the expected format."""
224
+ logging.info(f"Validating PCM data: total size = {len(pcm_audio_buffer)} bytes.")
225
+
226
+ # Calculate the expected sample size
227
+ expected_sample_size = sample_rate * channels * sample_width
228
+ actual_sample_size = len(pcm_audio_buffer)
229
+
230
+ if actual_sample_size == 0:
231
+ logging.error("Received PCM data is empty.")
232
+ return False
233
+
234
+ logging.info(f"Expected sample size per second: {expected_sample_size} bytes.")
235
+
236
+ if actual_sample_size % expected_sample_size != 0:
237
+ logging.warning(
238
+ f"PCM data size {actual_sample_size} is not a multiple of the expected sample size per second ({expected_sample_size} bytes). Data may be corrupted or incomplete.")
239
+
240
+ return True
241
+
242
+
243
+ # Function to validate if the created WAV file is valid
244
+ def validate_wav_file(wav_file_path):
245
+ """Validates if the WAV file was created correctly and can be opened."""
246
+ try:
247
+ with wave.open(wav_file_path, 'rb') as wav_file:
248
+ sample_rate = wav_file.getframerate()
249
+ channels = wav_file.getnchannels()
250
+ sample_width = wav_file.getsampwidth()
251
+ logging.info(
252
+ f"WAV file details - Sample Rate: {sample_rate}, Channels: {channels}, Sample Width: {sample_width}")
253
+ return True
254
+ except wave.Error as e:
255
+ logging.error(f"Error reading WAV file: {e}")
256
+ return False
257
+
258
+
259
  @app.websocket("/wtranscribe")
260
  async def websocket_transcribe(websocket: WebSocket):
261
  logging.info("New WebSocket connection request received.")
 
263
  logging.info("WebSocket connection established successfully.")
264
 
265
  try:
266
+ segments = [] # Keeps track of the segments already transcribed
 
267
  accumulated_audio_time = 0 # Track the total audio duration accumulated
268
  last_transcribed_time = 0.0
269
+ min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
270
+
271
+ # A buffer to store raw PCM audio data
272
+ pcm_audio_buffer = bytearray()
273
+ logging.info("im here, is it failing?.")
274
+
275
+ # Metadata for the incoming PCM data (sample rate, channels, and sample width should be consistent)
276
+ sample_rate = 16000 # 16kHz
277
+ channels = 1 # Mono
278
+ sample_width = 2 # 2 bytes per sample (16-bit audio)
279
+
280
+ # Ensure the /tmp directory exists
281
+ tmp_directory = "/tmp"
282
+ if not os.path.exists(tmp_directory):
283
+ logging.info(f"Creating /tmp directory: {tmp_directory}")
284
+ os.makedirs(tmp_directory)
285
+ logging.info("im here, is it failing?2.")
286
+ while True:
287
+ logging.info("in while true")
288
+ try:
289
+ # Receive the next chunk of PCM audio data
290
+ logging.info("in try before recive ")
291
+ audio_chunk = await asyncio.wait_for(websocket.receive_bytes(), timeout=10.0)
292
+
293
+ logging.info("after recieve")
294
+ sys.stdout.flush()
295
+ if not audio_chunk:
296
+ logging.warning("Received empty audio chunk, skipping processing.")
297
+ continue
298
+
299
+ # Accumulate the raw PCM data into the buffer
300
+ pcm_audio_buffer.extend(audio_chunk)
301
+ print(f"len of pcm buffer: {len(pcm_audio_buffer)}")
302
+ logging.info("after buffer extend")
303
+
304
+ # Validate the PCM data after each chunk
305
+ if not validate_pcm_data(pcm_audio_buffer, sample_rate, channels, sample_width):
306
+ logging.error("Invalid PCM data received. Aborting transcription.")
307
+ await websocket.send_json({"error": "Invalid PCM data received."})
308
+ return
309
+
310
+ # Estimate the duration of the chunk based on its size
311
+ chunk_duration = len(audio_chunk) / (sample_rate * channels * sample_width)
312
+ accumulated_audio_time += chunk_duration
313
+ logging.info(
314
+ f"Received and buffered {len(audio_chunk)} bytes, total buffered: {len(pcm_audio_buffer)} bytes, total time: {accumulated_audio_time:.2f} seconds")
315
+
316
+ # Transcribe when enough time (audio) is accumulated (e.g., at least 5 seconds of audio)
317
+ if accumulated_audio_time >= min_transcription_time:
318
+ logging.info("Buffered enough audio time, starting transcription.")
319
+
320
+ # Create a temporary WAV file in /tmp for transcription
321
+
322
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, dir="/tmp") as temp_wav_file:
323
+ logging.info(f"Temporary audio file created at {temp_wav_file.name}")
324
+
325
+ with wave.open(temp_wav_file.name, 'wb') as wav_file:
326
+ wav_file.setnchannels(channels)
327
+ wav_file.setsampwidth(sample_width)
328
+ wav_file.setframerate(sample_rate)
329
+ wav_file.writeframes(pcm_audio_buffer)
330
+ temp_wav_file.flush()
331
+
332
+ if not validate_wav_file(temp_wav_file.name):
333
+ logging.error(f"Invalid WAV file created: {temp_wav_file.name}")
334
+ await websocket.send_json({"error": "Invalid WAV file created."})
335
+ return
336
+
337
+ logging.info(f"Temporary WAV file created at {temp_wav_file.name} for transcription.")
338
+
339
+ # Log to confirm that the file exists and has the expected size
340
+ if os.path.exists(temp_wav_file.name):
341
+ file_size = os.path.getsize(temp_wav_file.name)
342
+ logging.info(f"Temporary WAV file size: {file_size} bytes.")
343
+ else:
344
+ logging.error(f"Temporary WAV file {temp_wav_file.name} does not exist.")
345
+ raise Exception(f"Temporary WAV file {temp_wav_file.name} not found.")
346
+
347
+ with open(temp_wav_file.name, 'rb') as audio_file:
348
+ audio_data = audio_file.read()
349
+ partial_result = await asyncio.to_thread(transcribe_core_ws,audio_data)
350
+ segments.extend(partial_result['segments'])
351
+
352
+ # Clear the buffer after transcription
353
+ pcm_audio_buffer.clear()
354
+ accumulated_audio_time = 0 # Reset accumulated time
355
 
356
  # Send the transcription result back to the client with both new and all processed segments
357
  response = {
358
+ "segments": segments
 
 
359
  }
360
+ logging.info(f"Sending {len(partial_result['segments'])} segments to the client.")
361
  await websocket.send_json(response)
362
 
363
+ # Optionally delete the temporary WAV file after processing
364
+ if os.path.exists(temp_wav_file.name):
365
+ os.remove(temp_wav_file.name)
366
+ logging.info(f"Temporary WAV file {temp_wav_file.name} removed.")
367
+
368
+ except WebSocketDisconnect:
369
+ logging.info("WebSocket connection closed by the client.")
370
+ break
371
 
372
  except Exception as e:
373
  logging.error(f"Unexpected error during WebSocket transcription: {e}")
 
514
  #
515
 
516
 
 
 
 
 
 
 
517
  # @app.websocket("/wtranscribe")
518
  # async def websocket_transcribe(websocket: WebSocket):
519
  # logging.info("New WebSocket connection request received.")
pyproject.toml CHANGED
@@ -37,6 +37,13 @@ librosa = "^0.10.2.post1"
37
  uvicorn = "^0.30.6"
38
  torchaudio = "^2.4.1"
39
  silero-vad = "^5.1"
 
 
 
 
 
 
 
40
 
41
 
42
 
 
37
  uvicorn = "^0.30.6"
38
  torchaudio = "^2.4.1"
39
  silero-vad = "^5.1"
40
+ #openai = "^1.42.0"
41
+ #numpy = "^1.22.0"
42
+ #torch = "2.1.0"
43
+ #sounddevice = "^0.5.0"
44
+ #pydub = "^0.25.1"
45
+ #ffmpeg = "^1.4"
46
+
47
 
48
 
49