Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
f1bf1b3
1
Parent(s):
cf31b20
change to wav
Browse files
client.py
CHANGED
@@ -1,21 +1,48 @@
|
|
1 |
import asyncio
|
2 |
import websockets
|
3 |
import wave
|
|
|
4 |
|
5 |
# Parameters for reading and sending the audio
|
6 |
SAMPLE_RATE = 16000
|
7 |
-
CHUNK_SIZE =
|
8 |
-
|
9 |
|
10 |
|
11 |
async def send_audio(websocket):
|
12 |
-
|
13 |
-
|
14 |
-
while data:
|
15 |
-
await websocket.send(data) # Send audio chunk to the server
|
16 |
-
await asyncio.sleep(CHUNK_SIZE / SAMPLE_RATE) # Simulate real-time by waiting for the duration of the chunk
|
17 |
-
data = wf.readframes(CHUNK_SIZE)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
async def receive_transcription(websocket):
|
21 |
while True:
|
@@ -33,7 +60,7 @@ async def run_client():
|
|
33 |
ssl_context.check_hostname = False
|
34 |
ssl_context.verify_mode = ssl.CERT_NONE
|
35 |
|
36 |
-
async with websockets.connect(uri, ssl=ssl_context) as websocket:
|
37 |
await asyncio.gather(
|
38 |
send_audio(websocket),
|
39 |
receive_transcription(websocket)
|
|
|
1 |
import asyncio
|
2 |
import websockets
|
3 |
import wave
|
4 |
+
import requests
|
5 |
|
6 |
# Parameters for reading and sending the audio
|
7 |
SAMPLE_RATE = 16000
|
8 |
+
CHUNK_SIZE = 8192 # Size of the audio chunk sent at a time
|
9 |
+
AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod_serverless_whisper/main/me-hebrew.wav" # Path to the mp3 file
|
10 |
|
11 |
|
12 |
async def send_audio(websocket):
|
13 |
+
buffer_size = 1024 * 1024 # Buffer 1MB of audio data before sending for transcription
|
14 |
+
audio_buffer = bytearray() # Collect audio chunks directly in memory
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
# Stream the audio file in real-time
|
17 |
+
with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
|
18 |
+
if response.status_code == 200:
|
19 |
+
print("Starting to stream audio file...")
|
20 |
+
|
21 |
+
for chunk in response.iter_content(chunk_size=8192): # Stream in chunks of 8192 bytes
|
22 |
+
if chunk:
|
23 |
+
# Append each chunk to the in-memory buffer
|
24 |
+
audio_buffer.extend(chunk)
|
25 |
+
print(f"Received audio chunk of size {len(chunk)} bytes.")
|
26 |
+
|
27 |
+
# Once we have buffered enough audio data, send it for transcription
|
28 |
+
if len(audio_buffer) >= buffer_size:
|
29 |
+
await websocket.send(audio_buffer) # Send buffered data directly
|
30 |
+
print(f"Sent {len(audio_buffer)} bytes of audio data to the server for transcription.")
|
31 |
+
audio_buffer.clear() # Clear buffer after sending
|
32 |
+
await asyncio.sleep(0.01) # Simulate real-time streaming
|
33 |
+
|
34 |
+
print("Finished sending audio.")
|
35 |
+
else:
|
36 |
+
print(f"Failed to download audio file. Status code: {response.status_code}")
|
37 |
+
|
38 |
+
async def receive_transcription(websocket):
|
39 |
+
while True:
|
40 |
+
try:
|
41 |
+
transcription = await websocket.recv()
|
42 |
+
print(f"Received transcription: {transcription}")
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error receiving transcription: {e}")
|
45 |
+
break
|
46 |
|
47 |
async def receive_transcription(websocket):
|
48 |
while True:
|
|
|
60 |
ssl_context.check_hostname = False
|
61 |
ssl_context.verify_mode = ssl.CERT_NONE
|
62 |
|
63 |
+
async with websockets.connect(uri, ssl=ssl_context, timeout=30) as websocket:
|
64 |
await asyncio.gather(
|
65 |
send_audio(websocket),
|
66 |
receive_transcription(websocket)
|
infer.py
CHANGED
@@ -178,7 +178,7 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
|
|
178 |
new_last_transcribed_time = max(new_last_transcribed_time, s.end)
|
179 |
logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
|
180 |
|
181 |
-
logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
|
182 |
return ret, new_last_transcribed_time
|
183 |
|
184 |
|
@@ -197,7 +197,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
197 |
logging.info("Initialized processed_segments and audio_data buffer.")
|
198 |
|
199 |
# A temporary file to store the growing audio data
|
200 |
-
with tempfile.NamedTemporaryFile(suffix=".
|
201 |
logging.info(f"Temporary audio file created at {temp_audio_file.name}")
|
202 |
|
203 |
# Continuously receive and process audio chunks
|
@@ -218,7 +218,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
218 |
logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
|
219 |
|
220 |
audio_data.extend(audio_chunk) # In-memory data buffer (if needed)
|
221 |
-
logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
|
222 |
|
223 |
# Perform transcription and track new segments
|
224 |
logging.info(
|
@@ -228,6 +228,8 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
228 |
logging.info(
|
229 |
f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
|
230 |
# Send the new transcription result back to the client
|
|
|
|
|
231 |
await websocket.send_json(partial_result)
|
232 |
|
233 |
except WebSocketDisconnect:
|
|
|
178 |
new_last_transcribed_time = max(new_last_transcribed_time, s.end)
|
179 |
logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
|
180 |
|
181 |
+
#logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
|
182 |
return ret, new_last_transcribed_time
|
183 |
|
184 |
|
|
|
197 |
logging.info("Initialized processed_segments and audio_data buffer.")
|
198 |
|
199 |
# A temporary file to store the growing audio data
|
200 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
|
201 |
logging.info(f"Temporary audio file created at {temp_audio_file.name}")
|
202 |
|
203 |
# Continuously receive and process audio chunks
|
|
|
218 |
logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
|
219 |
|
220 |
audio_data.extend(audio_chunk) # In-memory data buffer (if needed)
|
221 |
+
#logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
|
222 |
|
223 |
# Perform transcription and track new segments
|
224 |
logging.info(
|
|
|
228 |
logging.info(
|
229 |
f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
|
230 |
# Send the new transcription result back to the client
|
231 |
+
logging.info(
|
232 |
+
f"partial result{partial_result}")
|
233 |
await websocket.send_json(partial_result)
|
234 |
|
235 |
except WebSocketDisconnect:
|