AshDavid12 commited on
Commit
f1bf1b3
·
1 Parent(s): cf31b20

change to wav

Browse files
Files changed (2) hide show
  1. client.py +36 -9
  2. infer.py +5 -3
client.py CHANGED
@@ -1,21 +1,48 @@
1
  import asyncio
2
  import websockets
3
  import wave
 
4
 
5
  # Parameters for reading and sending the audio
6
  SAMPLE_RATE = 16000
7
- CHUNK_SIZE = 1024 # Size of the audio chunk sent at a time
8
- AUDIO_FILE = "https://raw.githubusercontent.com/AshDavid12/hugging_face_ivrit_streaming/main/test_copy.mp3" # Path to the mp3 file
9
 
10
 
11
  async def send_audio(websocket):
12
- with wave.open(AUDIO_FILE, "rb") as wf:
13
- data = wf.readframes(CHUNK_SIZE)
14
- while data:
15
- await websocket.send(data) # Send audio chunk to the server
16
- await asyncio.sleep(CHUNK_SIZE / SAMPLE_RATE) # Simulate real-time by waiting for the duration of the chunk
17
- data = wf.readframes(CHUNK_SIZE)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  async def receive_transcription(websocket):
21
  while True:
@@ -33,7 +60,7 @@ async def run_client():
33
  ssl_context.check_hostname = False
34
  ssl_context.verify_mode = ssl.CERT_NONE
35
 
36
- async with websockets.connect(uri, ssl=ssl_context) as websocket:
37
  await asyncio.gather(
38
  send_audio(websocket),
39
  receive_transcription(websocket)
 
1
  import asyncio
2
  import websockets
3
  import wave
4
+ import requests
5
 
6
  # Parameters for reading and sending the audio
7
  SAMPLE_RATE = 16000
8
+ CHUNK_SIZE = 8192 # Size of the audio chunk sent at a time
9
+ AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod_serverless_whisper/main/me-hebrew.wav" # Path to the mp3 file
10
 
11
 
12
  async def send_audio(websocket):
13
+ buffer_size = 1024 * 1024 # Buffer 1MB of audio data before sending for transcription
14
+ audio_buffer = bytearray() # Collect audio chunks directly in memory
 
 
 
 
15
 
16
+ # Stream the audio file in real-time
17
+ with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
18
+ if response.status_code == 200:
19
+ print("Starting to stream audio file...")
20
+
21
+ for chunk in response.iter_content(chunk_size=8192): # Stream in chunks of 8192 bytes
22
+ if chunk:
23
+ # Append each chunk to the in-memory buffer
24
+ audio_buffer.extend(chunk)
25
+ print(f"Received audio chunk of size {len(chunk)} bytes.")
26
+
27
+ # Once we have buffered enough audio data, send it for transcription
28
+ if len(audio_buffer) >= buffer_size:
29
+ await websocket.send(audio_buffer) # Send buffered data directly
30
+ print(f"Sent {len(audio_buffer)} bytes of audio data to the server for transcription.")
31
+ audio_buffer.clear() # Clear buffer after sending
32
+ await asyncio.sleep(0.01) # Simulate real-time streaming
33
+
34
+ print("Finished sending audio.")
35
+ else:
36
+ print(f"Failed to download audio file. Status code: {response.status_code}")
37
+
38
+ async def receive_transcription(websocket):
39
+ while True:
40
+ try:
41
+ transcription = await websocket.recv()
42
+ print(f"Received transcription: {transcription}")
43
+ except Exception as e:
44
+ print(f"Error receiving transcription: {e}")
45
+ break
46
 
47
  async def receive_transcription(websocket):
48
  while True:
 
60
  ssl_context.check_hostname = False
61
  ssl_context.verify_mode = ssl.CERT_NONE
62
 
63
+ async with websockets.connect(uri, ssl=ssl_context, timeout=30) as websocket:
64
  await asyncio.gather(
65
  send_audio(websocket),
66
  receive_transcription(websocket)
infer.py CHANGED
@@ -178,7 +178,7 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
178
  new_last_transcribed_time = max(new_last_transcribed_time, s.end)
179
  logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
180
 
181
- logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
182
  return ret, new_last_transcribed_time
183
 
184
 
@@ -197,7 +197,7 @@ async def websocket_transcribe(websocket: WebSocket):
197
  logging.info("Initialized processed_segments and audio_data buffer.")
198
 
199
  # A temporary file to store the growing audio data
200
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
201
  logging.info(f"Temporary audio file created at {temp_audio_file.name}")
202
 
203
  # Continuously receive and process audio chunks
@@ -218,7 +218,7 @@ async def websocket_transcribe(websocket: WebSocket):
218
  logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
219
 
220
  audio_data.extend(audio_chunk) # In-memory data buffer (if needed)
221
- logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
222
 
223
  # Perform transcription and track new segments
224
  logging.info(
@@ -228,6 +228,8 @@ async def websocket_transcribe(websocket: WebSocket):
228
  logging.info(
229
  f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
230
  # Send the new transcription result back to the client
 
 
231
  await websocket.send_json(partial_result)
232
 
233
  except WebSocketDisconnect:
 
178
  new_last_transcribed_time = max(new_last_transcribed_time, s.end)
179
  logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
180
 
181
+ #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
182
  return ret, new_last_transcribed_time
183
 
184
 
 
197
  logging.info("Initialized processed_segments and audio_data buffer.")
198
 
199
  # A temporary file to store the growing audio data
200
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
201
  logging.info(f"Temporary audio file created at {temp_audio_file.name}")
202
 
203
  # Continuously receive and process audio chunks
 
218
  logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
219
 
220
  audio_data.extend(audio_chunk) # In-memory data buffer (if needed)
221
+ #logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
222
 
223
  # Perform transcription and track new segments
224
  logging.info(
 
228
  logging.info(
229
  f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
230
  # Send the new transcription result back to the client
231
+ logging.info(
232
+ f"partial result{partial_result}")
233
  await websocket.send_json(partial_result)
234
 
235
  except WebSocketDisconnect: