Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

App Files Files Community

AshDavid12 commited on Sep 12, 2024

Commit

a94388a

1 Parent(s): f1bf1b3

empy seg -buffer size

Browse files

Files changed (2) hide show

client.py +6 -30
infer.py +17 -26

client.py CHANGED Viewed

@@ -1,18 +1,12 @@
 import asyncio
 import websockets
-import wave
 import requests
 # Parameters for reading and sending the audio
-SAMPLE_RATE = 16000
-CHUNK_SIZE = 8192  # Size of the audio chunk sent at a time
-AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod_serverless_whisper/main/me-hebrew.wav"  # Path to the mp3 file
 async def send_audio(websocket):
-    buffer_size = 1024 * 1024  # Buffer 1MB of audio data before sending for transcription
-    audio_buffer = bytearray()  # Collect audio chunks directly in memory
     # Stream the audio file in real-time
     with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
         if response.status_code == 200:
@@ -20,42 +14,24 @@ async def send_audio(websocket):
             for chunk in response.iter_content(chunk_size=8192):  # Stream in chunks of 8192 bytes
                 if chunk:
-                    # Append each chunk to the in-memory buffer
-                    audio_buffer.extend(chunk)
-                    print(f"Received audio chunk of size {len(chunk)} bytes.")
-                    # Once we have buffered enough audio data, send it for transcription
-                    if len(audio_buffer) >= buffer_size:
-                        await websocket.send(audio_buffer)  # Send buffered data directly
-                        print(f"Sent {len(audio_buffer)} bytes of audio data to the server for transcription.")
-                        audio_buffer.clear()  # Clear buffer after sending
-                        await asyncio.sleep(0.01)  # Simulate real-time streaming
             print("Finished sending audio.")
         else:
             print(f"Failed to download audio file. Status code: {response.status_code}")
-async def receive_transcription(websocket):
-    while True:
-        try:
-            transcription = await websocket.recv()
-            print(f"Received transcription: {transcription}")
-        except Exception as e:
-            print(f"Error receiving transcription: {e}")
-            break
 async def receive_transcription(websocket):
     while True:
         try:
             transcription = await websocket.recv()  # Receive transcription from the server
             print(f"Transcription: {transcription}")
         except Exception as e:
-            print(f"Error: {e}")
             break
-import ssl
 async def run_client():
-    uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/ws/transcribe")  # Replace with your Hugging Face Space WebSocket URL
     ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
     ssl_context.check_hostname = False
     ssl_context.verify_mode = ssl.CERT_NONE

 import asyncio
 import websockets
 import requests
+import ssl
 # Parameters for reading and sending the audio
+AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod_serverless_whisper/main/me-hebrew.wav"  # Use WAV file
 async def send_audio(websocket):
     # Stream the audio file in real-time
     with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
         if response.status_code == 200:
             for chunk in response.iter_content(chunk_size=8192):  # Stream in chunks of 8192 bytes
                 if chunk:
+                    await websocket.send(chunk)  # Send each chunk over WebSocket
+                    print(f"Sent audio chunk of size {len(chunk)} bytes")
             print("Finished sending audio.")
         else:
             print(f"Failed to download audio file. Status code: {response.status_code}")
 async def receive_transcription(websocket):
     while True:
         try:
             transcription = await websocket.recv()  # Receive transcription from the server
             print(f"Transcription: {transcription}")
         except Exception as e:
+            print(f"Error receiving transcription: {e}")
             break
 async def run_client():
+    uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/ws/transcribe")  # WebSocket URL
     ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
     ssl_context.check_hostname = False
     ssl_context.verify_mode = ssl.CERT_NONE

infer.py CHANGED Viewed

@@ -193,59 +193,50 @@ async def websocket_transcribe(websocket: WebSocket):
     try:
         processed_segments = []  # Keeps track of the segments already transcribed
-        audio_data = bytearray()  # Buffer for audio chunks
-        logging.info("Initialized processed_segments and audio_data buffer.")
         # A temporary file to store the growing audio data
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
             logging.info(f"Temporary audio file created at {temp_audio_file.name}")
-            # Continuously receive and process audio chunks
             while True:
                 try:
-                    logging.info("Waiting to receive the next chunk of audio data from WebSocket.")
                     # Receive the next chunk of audio data
                     audio_chunk = await websocket.receive_bytes()
-                    logging.info(f"Received an audio chunk of size {len(audio_chunk)} bytes.")
                     if not audio_chunk:
                         logging.warning("Received empty audio chunk, skipping processing.")
                         continue
                     temp_audio_file.write(audio_chunk)
                     temp_audio_file.flush()
-                    logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
-                    audio_data.extend(audio_chunk)  # In-memory data buffer (if needed)
-                    #logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
-                    # Perform transcription and track new segments
-                    logging.info(
-                        f"Transcribing audio from {temp_audio_file.name}. Processed segments: {len(processed_segments)}")
-                    partial_result, processed_segments = transcribe_core_ws(temp_audio_file.name, processed_segments)
-                    logging.info(
-                        f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
-                    # Send the new transcription result back to the client
-                    logging.info(
-                        f"partial result{partial_result}")
-                    await websocket.send_json(partial_result)
                 except WebSocketDisconnect:
-                    logging.info("WebSocket connection closed by the client. Ending transcription session.")
-                    break
-                except Exception as e:
-                    logging.error(f"Error processing audio chunk: {e}")
-                    await websocket.send_json({"error": str(e)})
                     break
     except Exception as e:
         logging.error(f"Unexpected error during WebSocket transcription: {e}")
         await websocket.send_json({"error": str(e)})
     finally:
         logging.info("Cleaning up and closing WebSocket connection.")

     try:
         processed_segments = []  # Keeps track of the segments already transcribed
+        accumulated_audio_size = 0  # Track how much audio data has been buffered
         # A temporary file to store the growing audio data
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
             logging.info(f"Temporary audio file created at {temp_audio_file.name}")
             while True:
                 try:
                     # Receive the next chunk of audio data
                     audio_chunk = await websocket.receive_bytes()
                     if not audio_chunk:
                         logging.warning("Received empty audio chunk, skipping processing.")
                         continue
+                    # Write audio chunk to file and accumulate size
                     temp_audio_file.write(audio_chunk)
                     temp_audio_file.flush()
+                    accumulated_audio_size += len(audio_chunk)
+                    logging.info(
+                        f"Received and buffered {len(audio_chunk)} bytes, total buffered: {accumulated_audio_size} bytes")
+                    # Buffer at least 512KB before transcription
+                    if accumulated_audio_size >= (512 * 1024):  # Adjust this size as needed
+                        logging.info("Buffered enough data, starting transcription.")
+                        partial_result, processed_segments = transcribe_core_ws(temp_audio_file.name,
+                                                                                processed_segments)
+                        accumulated_audio_size = 0  # Reset the accumulated audio size
+                        # Send the transcription result back to the client
+                        logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
+                        logging.info(f"partial result {partial_result}")
+                        await websocket.send_json(partial_result)
                 except WebSocketDisconnect:
+                    logging.info("WebSocket connection closed by the client.")
                     break
     except Exception as e:
         logging.error(f"Unexpected error during WebSocket transcription: {e}")
         await websocket.send_json({"error": str(e)})
     finally:
         logging.info("Cleaning up and closing WebSocket connection.")