import asyncio import json import logging import wave import websockets import requests import ssl import sys logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', handlers=[logging.StreamHandler(sys.stdout)], force=True) logger = logging.getLogger(__name__) # Parameters for reading and sending the audio AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file from pydub import AudioSegment # Convert and resample audio before writing it to WAV # Convert and resample audio before writing it to WAV def convert_to_mono_16k(audio_file_path): logging.info(f"Starting audio conversion to mono and resampling to 16kHz for file: {audio_file_path}") try: # Load the audio file into an AudioSegment object audio_segment = AudioSegment.from_file(audio_file_path, format="wav") # Convert the audio to mono and resample it to 16kHz audio_segment = audio_segment.set_channels(1).set_frame_rate(16000) logging.info("Audio conversion to mono and 16kHz completed successfully.") except Exception as e: logging.error(f"Error during audio conversion: {e}") raise e # Return the modified AudioSegment object return audio_segment async def send_audio(websocket): print(f"hi") buffer_size = 1024 * 16 # Send smaller chunks (16KB) for real-time processing logging.info("Converting the audio to mono and 16kHz.") try: converted_audio = convert_to_mono_16k('test_copy.wav') except Exception as e: logging.error(f"Failed to convert audio: {e}") return # Send metadata to the server metadata = { 'sample_rate': 16000, # Resampled rate 'channels': 1, # Converted to mono 'sampwidth': 2 # Assuming 16-bit audio } await websocket.send(json.dumps(metadata)) logging.info(f"Sent metadata: {metadata}") try: raw_data = converted_audio.raw_data logging.info(f"Starting to send raw PCM audio data. Total data size: {len(raw_data)} bytes.") for i in range(0, len(raw_data), buffer_size): pcm_chunk = raw_data[i:i + buffer_size] await websocket.send(pcm_chunk) # Send raw PCM data chunk logging.info(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.") await asyncio.sleep(0.01) # Simulate real-time sending logging.info("Completed sending all audio data.") except Exception as e: logging.error(f"Error while sending audio data: {e}") # Download the WAV file locally # with requests.get(AUDIO_FILE_URL, stream=True) as response: # if response.status_code == 200: # with open('downloaded_audio.wav', 'wb') as f: # for chunk in response.iter_content(chunk_size=1024): # f.write(chunk) # print("Audio file downloaded successfully.") # Open the downloaded WAV file and extract PCM data # with wave.open('test_copy.wav', 'rb') as wav_file: # metadata = { # 'sample_rate': wav_file.getframerate(), # 'channels': wav_file.getnchannels(), # 'sampwidth': wav_file.getsampwidth(), # } # # # Send metadata to the server before sending the audio # await websocket.send(json.dumps(metadata)) # print(f"Sent metadata: {metadata}") # # Send the PCM audio data in chunks # while True: # pcm_chunk = wav_file.readframes(buffer_size) # if not pcm_chunk: # break # End of file # # await websocket.send(pcm_chunk) # Send raw PCM data chunk # #print(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.") # await asyncio.sleep(0.01) # Simulate real-time sending # else: # print(f"Failed to download audio file. Status code: {response.status_code}") async def receive_transcription(websocket): while True: try: transcription = await websocket.recv() # Receive transcription from the server print(f"Transcription: {transcription}") transcription = json.loads(transcription) #download_url = transcription.get('download_url') # if download_url: # print(f"Download URL: {download_url}") # # Download the audio file # response = requests.get(download_url) # if response.status_code == 200: # with open("downloaded_audio.wav", "wb") as f: # f.write(response.content) # print("File downloaded successfully") # else: # print(f"Failed to download file. Status code: {response.status_code}") except Exception as e: print(f"Error receiving transcription: {e}") break async def send_heartbeat(websocket): while True: try: await websocket.ping() print("Sent keepalive ping") except websockets.ConnectionClosed: print("Connection closed, stopping heartbeat") break await asyncio.sleep(30) # Send ping every 30 seconds (adjust as needed) async def run_client(): uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/wtranscribe") # WebSocket URL ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE async with websockets.connect(uri, ssl=ssl_context, timeout=60) as websocket: await asyncio.gather( send_audio(websocket), receive_transcription(websocket), send_heartbeat(websocket) ) asyncio.run(run_client())