Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
a9516a4
1
Parent(s):
e9d738a
no buffer
Browse files- client.py +23 -31
- infer.py +9 -11
- poetry.lock +21 -1
- pyproject.toml +1 -0
client.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import asyncio
|
2 |
import io
|
|
|
3 |
|
4 |
import numpy as np
|
5 |
import websockets
|
@@ -8,6 +9,8 @@ import ssl
|
|
8 |
import wave
|
9 |
import logging
|
10 |
import sys
|
|
|
|
|
11 |
|
12 |
# Parameters for reading and sending the audio
|
13 |
#AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
|
@@ -35,41 +38,26 @@ async def send_receive():
|
|
35 |
await asyncio.gather(send_task, receive_task)
|
36 |
except Exception as e:
|
37 |
logger.error(f"WebSocket connection error: {e}")
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
logger.info(f"Opening WAV file: {wav_file}")
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
response = requests.get(wav_file)
|
46 |
-
response.raise_for_status()
|
47 |
-
wav_bytes = io.BytesIO(response.content)
|
48 |
-
|
49 |
-
|
50 |
-
# Send audio data in chunks directly from the WAV file
|
51 |
-
chunk_size = 1024 # Sending data in chunks of 3200 bytes, which can be adjusted
|
52 |
-
|
53 |
-
total_chunks = 0
|
54 |
-
total_bytes_sent = 0
|
55 |
-
|
56 |
-
# While loop to send audio data chunk by chunk
|
57 |
-
while True:
|
58 |
-
chunk = wav_bytes.read(chunk_size)
|
59 |
-
if not chunk:
|
60 |
-
break
|
61 |
-
await websocket.send(chunk)
|
62 |
-
total_chunks += 1
|
63 |
-
total_bytes_sent += len(chunk)
|
64 |
-
#logger.debug(f"Sent chunk {total_chunks}: {len(chunk)} bytes")
|
65 |
-
#await asyncio.sleep(0.1) # Simulate real-time streamin
|
66 |
-
#logger.info(f"Finished sending audio data: {total_chunks} chunks sent, total bytes sent: {total_bytes_sent}")
|
67 |
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
finally:
|
72 |
-
logger.info("WAV file closed")
|
73 |
|
74 |
async def receive_transcriptions(websocket):
|
75 |
try:
|
@@ -80,6 +68,10 @@ async def receive_transcriptions(websocket):
|
|
80 |
except Exception as e:
|
81 |
logger.error(f"Receive transcription error: {e}")
|
82 |
|
|
|
|
|
|
|
|
|
83 |
if __name__ == "__main__":
|
84 |
asyncio.run(send_receive())
|
85 |
|
|
|
1 |
import asyncio
|
2 |
import io
|
3 |
+
import json
|
4 |
|
5 |
import numpy as np
|
6 |
import websockets
|
|
|
9 |
import wave
|
10 |
import logging
|
11 |
import sys
|
12 |
+
import sounddevice as sd
|
13 |
+
|
14 |
|
15 |
# Parameters for reading and sending the audio
|
16 |
#AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
|
|
|
38 |
await asyncio.gather(send_task, receive_task)
|
39 |
except Exception as e:
|
40 |
logger.error(f"WebSocket connection error: {e}")
|
41 |
+
max_size_bytes = 50_000_000 # 10 MB
|
42 |
|
43 |
+
SAMPLE_RATE = 16000
|
44 |
+
CHUNK_SIZE =1024
|
|
|
45 |
|
46 |
+
async def send_audio_chunks(websocket):
|
47 |
+
"""Capture audio and send chunks to the server via WebSocket."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
def audio_callback(indata, frames, time, status):
|
50 |
+
"""Callback function called when new audio is available."""
|
51 |
+
# Convert the audio input to a JSON-serializable format (e.g., list of samples)
|
52 |
+
audio_chunk = indata[:, 0].tolist() # Use only the first channel
|
53 |
+
asyncio.run_coroutine_threadsafe(
|
54 |
+
websocket.send(json.dumps(audio_chunk)), asyncio.get_event_loop()
|
55 |
+
)
|
56 |
+
|
57 |
+
# Start the audio stream
|
58 |
+
with sd.InputStream(callback=audio_callback, channels=1, samplerate=SAMPLE_RATE, blocksize=CHUNK_SIZE):
|
59 |
+
await asyncio.Future() # Keep the stream open and running
|
60 |
|
|
|
|
|
61 |
|
62 |
async def receive_transcriptions(websocket):
|
63 |
try:
|
|
|
68 |
except Exception as e:
|
69 |
logger.error(f"Receive transcription error: {e}")
|
70 |
|
71 |
+
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
if __name__ == "__main__":
|
76 |
asyncio.run(send_receive())
|
77 |
|
infer.py
CHANGED
@@ -162,23 +162,21 @@ async def process_audio_stream(websocket: WebSocket):
|
|
162 |
|
163 |
audio_chunk = process_received_audio(data)
|
164 |
#logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
-
audio_buffer = np.concatenate((audio_buffer, audio_chunk))
|
167 |
#logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
|
168 |
except Exception as e:
|
169 |
logger.error(f"Error receiving data: {e}")
|
170 |
break
|
171 |
|
172 |
-
|
173 |
-
if len(audio_buffer) >= min_chunk_size * sampling_rate:
|
174 |
-
if transcription_task is None or transcription_task.done():
|
175 |
-
# Start a new transcription task
|
176 |
-
#logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
|
177 |
-
transcription_task = asyncio.create_task(
|
178 |
-
transcribe_and_send(websocket, audio_buffer.copy())
|
179 |
-
)
|
180 |
-
audio_buffer = np.array([], dtype=np.float32)
|
181 |
-
#logger.debug("Audio buffer reset after starting transcription task")
|
182 |
|
183 |
async def transcribe_and_send(websocket: WebSocket, audio_data):
|
184 |
"""Run transcription in a separate thread and send the result to the client."""
|
|
|
162 |
|
163 |
audio_chunk = process_received_audio(data)
|
164 |
#logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
|
165 |
+
# Check if enough audio has been buffered
|
166 |
+
if len(audio_buffer) >= min_chunk_size * sampling_rate:
|
167 |
+
if transcription_task is None or transcription_task.done():
|
168 |
+
# Start a new transcription task
|
169 |
+
# logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
|
170 |
+
transcription_task = asyncio.create_task(
|
171 |
+
transcribe_and_send(websocket, audio_chunk)
|
172 |
+
)
|
173 |
|
|
|
174 |
#logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
|
175 |
except Exception as e:
|
176 |
logger.error(f"Error receiving data: {e}")
|
177 |
break
|
178 |
|
179 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
async def transcribe_and_send(websocket: WebSocket, audio_data):
|
182 |
"""Run transcription in a separate thread and send the result to the client."""
|
poetry.lock
CHANGED
@@ -2935,6 +2935,26 @@ files = [
|
|
2935 |
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
2936 |
]
|
2937 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2938 |
[[package]]
|
2939 |
name = "soundfile"
|
2940 |
version = "0.12.1"
|
@@ -3842,4 +3862,4 @@ type = ["pytest-mypy"]
|
|
3842 |
[metadata]
|
3843 |
lock-version = "2.0"
|
3844 |
python-versions = "3.9.1"
|
3845 |
-
content-hash = "
|
|
|
2935 |
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
2936 |
]
|
2937 |
|
2938 |
+
[[package]]
|
2939 |
+
name = "sounddevice"
|
2940 |
+
version = "0.5.0"
|
2941 |
+
description = "Play and Record Sound with Python"
|
2942 |
+
optional = false
|
2943 |
+
python-versions = ">=3.7"
|
2944 |
+
files = [
|
2945 |
+
{file = "sounddevice-0.5.0-py3-none-any.whl", hash = "sha256:8a734043ab1f751cb20f6f25d8f07408a1aadf2eeca923061849d38bb59f9e3d"},
|
2946 |
+
{file = "sounddevice-0.5.0-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:73eb7cb1e8ab1e1ba09c228239e9d0b160006de380921687e44610ad9a19ac32"},
|
2947 |
+
{file = "sounddevice-0.5.0-py3-none-win32.whl", hash = "sha256:919de43040e8737258370ddf929a9cd1a3d6c493ca173bab70a3c7cb15c71e97"},
|
2948 |
+
{file = "sounddevice-0.5.0-py3-none-win_amd64.whl", hash = "sha256:f28b7ef16f293d7b048a614dd087dfe39c3e313d94a50539bb52022b7ef27ece"},
|
2949 |
+
{file = "sounddevice-0.5.0.tar.gz", hash = "sha256:0de95277654b3d403d9c15ded3c6cedf307e9b27cc9ce7bd995a2891d0c955af"},
|
2950 |
+
]
|
2951 |
+
|
2952 |
+
[package.dependencies]
|
2953 |
+
CFFI = ">=1.0"
|
2954 |
+
|
2955 |
+
[package.extras]
|
2956 |
+
numpy = ["NumPy"]
|
2957 |
+
|
2958 |
[[package]]
|
2959 |
name = "soundfile"
|
2960 |
version = "0.12.1"
|
|
|
3862 |
[metadata]
|
3863 |
lock-version = "2.0"
|
3864 |
python-versions = "3.9.1"
|
3865 |
+
content-hash = "8b654ee2a2cc97497e78fbe0de6258f3fb006e3f9bbe7234f800843f66adcb7b"
|
pyproject.toml
CHANGED
@@ -23,6 +23,7 @@ soundfile = "^0.12.1"
|
|
23 |
openai = "^1.42.0"
|
24 |
numpy = "^1.22.0"
|
25 |
torch = "2.1.0"
|
|
|
26 |
|
27 |
|
28 |
|
|
|
23 |
openai = "^1.42.0"
|
24 |
numpy = "^1.22.0"
|
25 |
torch = "2.1.0"
|
26 |
+
sounddevice = "^0.5.0"
|
27 |
|
28 |
|
29 |
|