Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

App Files Files Community

AshDavid12 commited on Sep 12, 2024

Commit

f1bf1b3

1 Parent(s): cf31b20

change to wav

Browse files

Files changed (2) hide show

client.py +36 -9
infer.py +5 -3

client.py CHANGED Viewed

@@ -1,21 +1,48 @@
 import asyncio
 import websockets
 import wave
 # Parameters for reading and sending the audio
 SAMPLE_RATE = 16000
-CHUNK_SIZE = 1024  # Size of the audio chunk sent at a time
-AUDIO_FILE = "https://raw.githubusercontent.com/AshDavid12/hugging_face_ivrit_streaming/main/test_copy.mp3"  # Path to the mp3 file
 async def send_audio(websocket):
-    with wave.open(AUDIO_FILE, "rb") as wf:
-        data = wf.readframes(CHUNK_SIZE)
-        while data:
-            await websocket.send(data)  # Send audio chunk to the server
-            await asyncio.sleep(CHUNK_SIZE / SAMPLE_RATE)  # Simulate real-time by waiting for the duration of the chunk
-            data = wf.readframes(CHUNK_SIZE)
 async def receive_transcription(websocket):
     while True:
@@ -33,7 +60,7 @@ async def run_client():
     ssl_context.check_hostname = False
     ssl_context.verify_mode = ssl.CERT_NONE
-    async with websockets.connect(uri, ssl=ssl_context) as websocket:
         await asyncio.gather(
             send_audio(websocket),
             receive_transcription(websocket)

 import asyncio
 import websockets
 import wave
+import requests
 # Parameters for reading and sending the audio
 SAMPLE_RATE = 16000
+CHUNK_SIZE = 8192  # Size of the audio chunk sent at a time
+AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod_serverless_whisper/main/me-hebrew.wav"  # Path to the mp3 file
 async def send_audio(websocket):
+    buffer_size = 1024 * 1024  # Buffer 1MB of audio data before sending for transcription
+    audio_buffer = bytearray()  # Collect audio chunks directly in memory
+    # Stream the audio file in real-time
+    with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
+        if response.status_code == 200:
+            print("Starting to stream audio file...")
+            for chunk in response.iter_content(chunk_size=8192):  # Stream in chunks of 8192 bytes
+                if chunk:
+                    # Append each chunk to the in-memory buffer
+                    audio_buffer.extend(chunk)
+                    print(f"Received audio chunk of size {len(chunk)} bytes.")
+                    # Once we have buffered enough audio data, send it for transcription
+                    if len(audio_buffer) >= buffer_size:
+                        await websocket.send(audio_buffer)  # Send buffered data directly
+                        print(f"Sent {len(audio_buffer)} bytes of audio data to the server for transcription.")
+                        audio_buffer.clear()  # Clear buffer after sending
+                        await asyncio.sleep(0.01)  # Simulate real-time streaming
+            print("Finished sending audio.")
+        else:
+            print(f"Failed to download audio file. Status code: {response.status_code}")
+async def receive_transcription(websocket):
+    while True:
+        try:
+            transcription = await websocket.recv()
+            print(f"Received transcription: {transcription}")
+        except Exception as e:
+            print(f"Error receiving transcription: {e}")
+            break
 async def receive_transcription(websocket):
     while True:
     ssl_context.check_hostname = False
     ssl_context.verify_mode = ssl.CERT_NONE
+    async with websockets.connect(uri, ssl=ssl_context, timeout=30) as websocket:
         await asyncio.gather(
             send_audio(websocket),
             receive_transcription(websocket)

infer.py CHANGED Viewed

@@ -178,7 +178,7 @@ def transcribe_core_ws(audio_file, last_transcribed_time):
             new_last_transcribed_time = max(new_last_transcribed_time, s.end)
             logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
-    logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
     return ret, new_last_transcribed_time
@@ -197,7 +197,7 @@ async def websocket_transcribe(websocket: WebSocket):
         logging.info("Initialized processed_segments and audio_data buffer.")
         # A temporary file to store the growing audio data
-        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
             logging.info(f"Temporary audio file created at {temp_audio_file.name}")
             # Continuously receive and process audio chunks
@@ -218,7 +218,7 @@ async def websocket_transcribe(websocket: WebSocket):
                     logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
                     audio_data.extend(audio_chunk)  # In-memory data buffer (if needed)
-                    logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
                     # Perform transcription and track new segments
                     logging.info(
@@ -228,6 +228,8 @@ async def websocket_transcribe(websocket: WebSocket):
                     logging.info(
                         f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
                     # Send the new transcription result back to the client
                     await websocket.send_json(partial_result)
                 except WebSocketDisconnect:

             new_last_transcribed_time = max(new_last_transcribed_time, s.end)
             logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
+    #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
     return ret, new_last_transcribed_time
         logging.info("Initialized processed_segments and audio_data buffer.")
         # A temporary file to store the growing audio data
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
             logging.info(f"Temporary audio file created at {temp_audio_file.name}")
             # Continuously receive and process audio chunks
                     logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
                     audio_data.extend(audio_chunk)  # In-memory data buffer (if needed)
+                    #logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
                     # Perform transcription and track new segments
                     logging.info(
                     logging.info(
                         f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
                     # Send the new transcription result back to the client
+                    logging.info(
+                        f"partial result{partial_result}")
                     await websocket.send_json(partial_result)
                 except WebSocketDisconnect: