File size: 5,841 Bytes
ebaaf9b
abaf442
1c789c0
d7b2452
abaf442
ebaaf9b
f1bf1b3
a94388a
8004cea
 
9b933e3
8004cea
 
ebaaf9b
 
f4a3257
 
1c789c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4a3257
8004cea
d7b2452
1c789c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d82fb72
1c789c0
 
 
 
 
f4a3257
d7b2452
1ab0cdf
 
 
 
 
 
f4a3257
d7b2452
1c789c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4a3257
d7b2452
f4a3257
 
 
 
 
abaf442
7456852
 
 
 
 
 
 
 
 
 
 
f4a3257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b85baaf
 
 
5a62402
f4a3257
 
 
 
 
a9516a4
 
f4a3257
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import asyncio
import json
import logging
import wave

import websockets
import requests
import ssl
import sys

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
                    handlers=[logging.StreamHandler(sys.stdout)], force=True)
logger = logging.getLogger(__name__)

# Parameters for reading and sending the audio
AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"  # Use WAV file

from pydub import AudioSegment


# Convert and resample audio before writing it to WAV
# Convert and resample audio before writing it to WAV
def convert_to_mono_16k(audio_file_path):
    logging.info(f"Starting audio conversion to mono and resampling to 16kHz for file: {audio_file_path}")

    try:
        # Load the audio file into an AudioSegment object
        audio_segment = AudioSegment.from_file(audio_file_path, format="wav")

        # Convert the audio to mono and resample it to 16kHz
        audio_segment = audio_segment.set_channels(1).set_frame_rate(16000)

        logging.info("Audio conversion to mono and 16kHz completed successfully.")
    except Exception as e:
        logging.error(f"Error during audio conversion: {e}")
        raise e

    # Return the modified AudioSegment object
    return audio_segment


async def send_audio(websocket):
    print(f"hi")
    buffer_size = 1024 * 16  # Send smaller chunks (16KB) for real-time processing
    logging.info("Converting the audio to mono and 16kHz.")

    try:
        converted_audio = convert_to_mono_16k('test_copy.wav')
    except Exception as e:
        logging.error(f"Failed to convert audio: {e}")
        return

    # Send metadata to the server
    metadata = {
        'sample_rate': 16000,  # Resampled rate
        'channels': 1,  # Converted to mono
        'sampwidth': 2  # Assuming 16-bit audio
    }
    await websocket.send(json.dumps(metadata))
    logging.info(f"Sent metadata: {metadata}")

    try:
        raw_data = converted_audio.raw_data
        logging.info(f"Starting to send raw PCM audio data. Total data size: {len(raw_data)} bytes.")

        for i in range(0, len(raw_data), buffer_size):
            pcm_chunk = raw_data[i:i + buffer_size]
            await websocket.send(pcm_chunk)  # Send raw PCM data chunk
            logging.info(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
            await asyncio.sleep(0.01)  # Simulate real-time sending

        logging.info("Completed sending all audio data.")
    except Exception as e:
        logging.error(f"Error while sending audio data: {e}")

    # Download the WAV file locally
    # with requests.get(AUDIO_FILE_URL, stream=True) as response:
    #     if response.status_code == 200:
    #         with open('downloaded_audio.wav', 'wb') as f:
    #             for chunk in response.iter_content(chunk_size=1024):
    #                 f.write(chunk)
    #         print("Audio file downloaded successfully.")

            # Open the downloaded WAV file and extract PCM data
    # with wave.open('test_copy.wav', 'rb') as wav_file:
    #     metadata = {
    #         'sample_rate': wav_file.getframerate(),
    #         'channels': wav_file.getnchannels(),
    #         'sampwidth': wav_file.getsampwidth(),
    #     }
    #
    #     # Send metadata to the server before sending the audio
    #     await websocket.send(json.dumps(metadata))
    #     print(f"Sent metadata: {metadata}")

        # # Send the PCM audio data in chunks
        # while True:
        #     pcm_chunk = wav_file.readframes(buffer_size)
        #     if not pcm_chunk:
        #         break  # End of file
        #
        #     await websocket.send(pcm_chunk)  # Send raw PCM data chunk
        #     #print(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
        #     await asyncio.sleep(0.01)  # Simulate real-time sending

        # else:
        #     print(f"Failed to download audio file. Status code: {response.status_code}")


async def receive_transcription(websocket):
    while True:
        try:
            transcription = await websocket.recv()  # Receive transcription from the server
            print(f"Transcription: {transcription}")
            transcription = json.loads(transcription)
            #download_url = transcription.get('download_url')
            # if download_url:
            #     print(f"Download URL: {download_url}")
            #     # Download the audio file
            #     response = requests.get(download_url)
            #     if response.status_code == 200:
            #         with open("downloaded_audio.wav", "wb") as f:
            #             f.write(response.content)
            #         print("File downloaded successfully")
            #     else:
            #         print(f"Failed to download file. Status code: {response.status_code}")
        except Exception as e:
            print(f"Error receiving transcription: {e}")
            break

async def send_heartbeat(websocket):
    while True:
        try:
            await websocket.ping()
            print("Sent keepalive ping")
        except websockets.ConnectionClosed:
            print("Connection closed, stopping heartbeat")
            break
        await asyncio.sleep(30)  # Send ping every 30 seconds (adjust as needed)


async def run_client():
    uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/wtranscribe")  # WebSocket URL
    ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
    ssl_context.check_hostname = False
    ssl_context.verify_mode = ssl.CERT_NONE

    async with websockets.connect(uri, ssl=ssl_context, timeout=60) as websocket:
        await asyncio.gather(
            send_audio(websocket),
            receive_transcription(websocket),
            send_heartbeat(websocket)
        )

asyncio.run(run_client())