AshDavid12 commited on
Commit
a9516a4
·
1 Parent(s): e9d738a
Files changed (4) hide show
  1. client.py +23 -31
  2. infer.py +9 -11
  3. poetry.lock +21 -1
  4. pyproject.toml +1 -0
client.py CHANGED
@@ -1,5 +1,6 @@
1
  import asyncio
2
  import io
 
3
 
4
  import numpy as np
5
  import websockets
@@ -8,6 +9,8 @@ import ssl
8
  import wave
9
  import logging
10
  import sys
 
 
11
 
12
  # Parameters for reading and sending the audio
13
  #AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
@@ -35,41 +38,26 @@ async def send_receive():
35
  await asyncio.gather(send_task, receive_task)
36
  except Exception as e:
37
  logger.error(f"WebSocket connection error: {e}")
 
38
 
39
- async def send_audio(websocket):
40
- wav_file = AUDIO_FILE_URL # Replace with the path to your WAV file
41
- logger.info(f"Opening WAV file: {wav_file}")
42
 
43
- try:
44
- # Download the WAV file
45
- response = requests.get(wav_file)
46
- response.raise_for_status()
47
- wav_bytes = io.BytesIO(response.content)
48
-
49
-
50
- # Send audio data in chunks directly from the WAV file
51
- chunk_size = 1024 # Sending data in chunks of 3200 bytes, which can be adjusted
52
-
53
- total_chunks = 0
54
- total_bytes_sent = 0
55
-
56
- # While loop to send audio data chunk by chunk
57
- while True:
58
- chunk = wav_bytes.read(chunk_size)
59
- if not chunk:
60
- break
61
- await websocket.send(chunk)
62
- total_chunks += 1
63
- total_bytes_sent += len(chunk)
64
- #logger.debug(f"Sent chunk {total_chunks}: {len(chunk)} bytes")
65
- #await asyncio.sleep(0.1) # Simulate real-time streamin
66
- #logger.info(f"Finished sending audio data: {total_chunks} chunks sent, total bytes sent: {total_bytes_sent}")
67
 
68
- except Exception as e:
69
- logger.error(f"Send audio error: {e}")
 
 
 
 
 
 
 
 
 
70
 
71
- finally:
72
- logger.info("WAV file closed")
73
 
74
  async def receive_transcriptions(websocket):
75
  try:
@@ -80,6 +68,10 @@ async def receive_transcriptions(websocket):
80
  except Exception as e:
81
  logger.error(f"Receive transcription error: {e}")
82
 
 
 
 
 
83
  if __name__ == "__main__":
84
  asyncio.run(send_receive())
85
 
 
1
  import asyncio
2
  import io
3
+ import json
4
 
5
  import numpy as np
6
  import websockets
 
9
  import wave
10
  import logging
11
  import sys
12
+ import sounddevice as sd
13
+
14
 
15
  # Parameters for reading and sending the audio
16
  #AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
 
38
  await asyncio.gather(send_task, receive_task)
39
  except Exception as e:
40
  logger.error(f"WebSocket connection error: {e}")
41
+ max_size_bytes = 50_000_000 # 10 MB
42
 
43
+ SAMPLE_RATE = 16000
44
+ CHUNK_SIZE =1024
 
45
 
46
+ async def send_audio_chunks(websocket):
47
+ """Capture audio and send chunks to the server via WebSocket."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ def audio_callback(indata, frames, time, status):
50
+ """Callback function called when new audio is available."""
51
+ # Convert the audio input to a JSON-serializable format (e.g., list of samples)
52
+ audio_chunk = indata[:, 0].tolist() # Use only the first channel
53
+ asyncio.run_coroutine_threadsafe(
54
+ websocket.send(json.dumps(audio_chunk)), asyncio.get_event_loop()
55
+ )
56
+
57
+ # Start the audio stream
58
+ with sd.InputStream(callback=audio_callback, channels=1, samplerate=SAMPLE_RATE, blocksize=CHUNK_SIZE):
59
+ await asyncio.Future() # Keep the stream open and running
60
 
 
 
61
 
62
  async def receive_transcriptions(websocket):
63
  try:
 
68
  except Exception as e:
69
  logger.error(f"Receive transcription error: {e}")
70
 
71
+
72
+
73
+
74
+
75
  if __name__ == "__main__":
76
  asyncio.run(send_receive())
77
 
infer.py CHANGED
@@ -162,23 +162,21 @@ async def process_audio_stream(websocket: WebSocket):
162
 
163
  audio_chunk = process_received_audio(data)
164
  #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
 
 
 
 
 
 
 
 
165
 
166
- audio_buffer = np.concatenate((audio_buffer, audio_chunk))
167
  #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
168
  except Exception as e:
169
  logger.error(f"Error receiving data: {e}")
170
  break
171
 
172
- # Check if enough audio has been buffered
173
- if len(audio_buffer) >= min_chunk_size * sampling_rate:
174
- if transcription_task is None or transcription_task.done():
175
- # Start a new transcription task
176
- #logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
177
- transcription_task = asyncio.create_task(
178
- transcribe_and_send(websocket, audio_buffer.copy())
179
- )
180
- audio_buffer = np.array([], dtype=np.float32)
181
- #logger.debug("Audio buffer reset after starting transcription task")
182
 
183
  async def transcribe_and_send(websocket: WebSocket, audio_data):
184
  """Run transcription in a separate thread and send the result to the client."""
 
162
 
163
  audio_chunk = process_received_audio(data)
164
  #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
165
+ # Check if enough audio has been buffered
166
+ if len(audio_buffer) >= min_chunk_size * sampling_rate:
167
+ if transcription_task is None or transcription_task.done():
168
+ # Start a new transcription task
169
+ # logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
170
+ transcription_task = asyncio.create_task(
171
+ transcribe_and_send(websocket, audio_chunk)
172
+ )
173
 
 
174
  #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
175
  except Exception as e:
176
  logger.error(f"Error receiving data: {e}")
177
  break
178
 
179
+
 
 
 
 
 
 
 
 
 
180
 
181
  async def transcribe_and_send(websocket: WebSocket, audio_data):
182
  """Run transcription in a separate thread and send the result to the client."""
poetry.lock CHANGED
@@ -2935,6 +2935,26 @@ files = [
2935
  {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
2936
  ]
2937
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2938
  [[package]]
2939
  name = "soundfile"
2940
  version = "0.12.1"
@@ -3842,4 +3862,4 @@ type = ["pytest-mypy"]
3842
  [metadata]
3843
  lock-version = "2.0"
3844
  python-versions = "3.9.1"
3845
- content-hash = "7e3bbbe5cc618ae8b5762bdf1991ca224636038b44b6b425c66ea3f5ec0f15af"
 
2935
  {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
2936
  ]
2937
 
2938
+ [[package]]
2939
+ name = "sounddevice"
2940
+ version = "0.5.0"
2941
+ description = "Play and Record Sound with Python"
2942
+ optional = false
2943
+ python-versions = ">=3.7"
2944
+ files = [
2945
+ {file = "sounddevice-0.5.0-py3-none-any.whl", hash = "sha256:8a734043ab1f751cb20f6f25d8f07408a1aadf2eeca923061849d38bb59f9e3d"},
2946
+ {file = "sounddevice-0.5.0-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:73eb7cb1e8ab1e1ba09c228239e9d0b160006de380921687e44610ad9a19ac32"},
2947
+ {file = "sounddevice-0.5.0-py3-none-win32.whl", hash = "sha256:919de43040e8737258370ddf929a9cd1a3d6c493ca173bab70a3c7cb15c71e97"},
2948
+ {file = "sounddevice-0.5.0-py3-none-win_amd64.whl", hash = "sha256:f28b7ef16f293d7b048a614dd087dfe39c3e313d94a50539bb52022b7ef27ece"},
2949
+ {file = "sounddevice-0.5.0.tar.gz", hash = "sha256:0de95277654b3d403d9c15ded3c6cedf307e9b27cc9ce7bd995a2891d0c955af"},
2950
+ ]
2951
+
2952
+ [package.dependencies]
2953
+ CFFI = ">=1.0"
2954
+
2955
+ [package.extras]
2956
+ numpy = ["NumPy"]
2957
+
2958
  [[package]]
2959
  name = "soundfile"
2960
  version = "0.12.1"
 
3862
  [metadata]
3863
  lock-version = "2.0"
3864
  python-versions = "3.9.1"
3865
+ content-hash = "8b654ee2a2cc97497e78fbe0de6258f3fb006e3f9bbe7234f800843f66adcb7b"
pyproject.toml CHANGED
@@ -23,6 +23,7 @@ soundfile = "^0.12.1"
23
  openai = "^1.42.0"
24
  numpy = "^1.22.0"
25
  torch = "2.1.0"
 
26
 
27
 
28
 
 
23
  openai = "^1.42.0"
24
  numpy = "^1.22.0"
25
  torch = "2.1.0"
26
+ sounddevice = "^0.5.0"
27
 
28
 
29