AshDavid12 commited on
Commit
f4a3257
·
1 Parent(s): 1ad41b2

reverting back to partial trans

Browse files
Files changed (2) hide show
  1. client.py +54 -149
  2. infer.py +230 -109
client.py CHANGED
@@ -1,161 +1,66 @@
1
  import asyncio
2
- import io
3
- import json
4
-
5
- import numpy as np
6
  import websockets
7
  import requests
8
  import ssl
9
- import wave
10
- import logging
11
- import sys
12
- import sounddevice as sd
13
-
14
 
15
  # Parameters for reading and sending the audio
16
- #AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
17
- AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/hugging_face_ivrit_streaming/main/long_hebrew.wav"
18
-
19
-
20
-
21
- # Set up logging
22
- logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
23
- handlers=[logging.StreamHandler(sys.stdout)], force=True)
24
- logger = logging.getLogger(__name__)
25
-
26
- async def send_receive():
27
- uri = "wss://gigaverse-ivrit-ai-streaming.hf.space/ws" # Update with your server's address if needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
29
  ssl_context.check_hostname = False
30
  ssl_context.verify_mode = ssl.CERT_NONE
31
- logger.info(f"Connecting to server at {uri}")
32
- try:
33
- async with websockets.connect(uri,ssl=ssl_context) as websocket:
34
- logger.info("WebSocket connection established")
35
- # Start tasks for sending and receiving
36
- send_task = asyncio.create_task(send_audio(websocket))
37
- receive_task = asyncio.create_task(receive_transcriptions(websocket))
38
- await asyncio.gather(send_task, receive_task)
39
- except Exception as e:
40
- logger.error(f"WebSocket connection error: {e}")
41
- max_size_bytes = 50_000_000 # 10 MB
42
-
43
- SAMPLE_RATE = 16000
44
- CHUNK_SIZE =1024
45
-
46
- async def send_audio_chunks(websocket):
47
- """Capture audio and send chunks to the server via WebSocket."""
48
 
49
- def audio_callback(indata, frames, time, status):
50
- """Callback function called when new audio is available."""
51
- # Convert the audio input to a JSON-serializable format (e.g., list of samples)
52
- audio_chunk = indata[:, 0].tolist() # Use only the first channel
53
- asyncio.run_coroutine_threadsafe(
54
- websocket.send(json.dumps(audio_chunk)), asyncio.get_event_loop()
55
  )
56
 
57
- # Start the audio stream
58
- with sd.InputStream(callback=audio_callback, channels=1, samplerate=SAMPLE_RATE, blocksize=CHUNK_SIZE):
59
- await asyncio.Future() # Keep the stream open and running
60
-
61
-
62
- async def receive_transcriptions(websocket):
63
- try:
64
- logger.info("Starting to receive transcriptions")
65
- async for message in websocket: # This is the same as websocket.recv()
66
- logger.info(f"Received transcription: {message}")
67
- print(f"Transcription: {message}")
68
- except Exception as e:
69
- logger.error(f"Receive transcription error: {e}")
70
-
71
-
72
-
73
-
74
-
75
- if __name__ == "__main__":
76
- asyncio.run(send_receive())
77
-
78
-
79
-
80
-
81
-
82
-
83
-
84
-
85
-
86
-
87
-
88
-
89
-
90
-
91
- # async def send_audio(websocket):
92
- # buffer_size = 512 * 1024 #HAVE TO HAVE 512!!
93
- # audio_buffer = bytearray()
94
- #
95
- # with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
96
- # if response.status_code == 200:
97
- # print("Starting to stream audio file...")
98
- #
99
- # for chunk in response.iter_content(chunk_size=1024): # Stream in chunks
100
- # if chunk:
101
- # audio_buffer.extend(chunk)
102
- # #print(f"Received audio chunk of size {len(chunk)} bytes.")
103
- #
104
- # # Send buffered audio data once it's large enough
105
- # if len(audio_buffer) >= buffer_size:
106
- # await websocket.send(audio_buffer)
107
- # #print(f"Sent {len(audio_buffer)} bytes of audio data.")
108
- # audio_buffer.clear()
109
- # await asyncio.sleep(0.01)
110
- #
111
- # print("Finished sending audio.")
112
- # else:
113
- # print(f"Failed to download audio file. Status code: {response.status_code}")
114
- #
115
- #
116
- # async def receive_transcription(websocket):
117
- # while True:
118
- # try:
119
- #
120
- # transcription = await websocket.recv()
121
- # # Receive transcription from the server
122
- # print(f"Transcription: {transcription}")
123
- # except Exception as e:
124
- # print(f"Error receiving transcription: {e}")
125
- # #await asyncio.sleep(30)
126
- # break
127
- #
128
- #
129
- # async def send_heartbeat(websocket):
130
- # while True:
131
- # try:
132
- # await websocket.ping()
133
- # print("Sent keepalive ping")
134
- # except websockets.ConnectionClosed:
135
- # print("Connection closed, stopping heartbeat")
136
- # break
137
- # await asyncio.sleep(30) # Send ping every 30 seconds (adjust as needed)
138
- #
139
- #
140
- # async def run_client():
141
- # uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/wtranscribe") # WebSocket URL
142
- # ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
143
- # ssl_context.check_hostname = False
144
- # ssl_context.verify_mode = ssl.CERT_NONE
145
- # while True:
146
- # try:
147
- # async with websockets.connect(uri, ssl=ssl_context, ping_timeout=1000, ping_interval=50) as websocket:
148
- # await asyncio.gather(
149
- # send_audio(websocket),
150
- # receive_transcription(websocket),
151
- # send_heartbeat(websocket)
152
- # )
153
- # except websockets.ConnectionClosedError as e:
154
- # print(f"WebSocket closed with error: {e}")
155
- # # except Exception as e:
156
- # # print(f"Unexpected error: {e}")
157
- # #
158
- # # print("Reconnecting in 5 seconds...")
159
- # # await asyncio.sleep(5) # Wait 5 seconds before reconnecting
160
- #
161
- # asyncio.run(run_client())
 
1
  import asyncio
 
 
 
 
2
  import websockets
3
  import requests
4
  import ssl
 
 
 
 
 
5
 
6
  # Parameters for reading and sending the audio
7
+ AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
8
+
9
+ async def send_audio(websocket):
10
+ buffer_size = 1024*512 # Buffer audio chunks up to 512KB before sending
11
+ audio_buffer = bytearray()
12
+
13
+ with requests.get(AUDIO_FILE_URL, stream=True, allow_redirects=False) as response:
14
+ if response.status_code == 200:
15
+ print("Starting to stream audio file...")
16
+
17
+ for chunk in response.iter_content(chunk_size=1024): # Stream in chunks
18
+ if chunk:
19
+ audio_buffer.extend(chunk)
20
+ print(f"Received audio chunk of size {len(chunk)} bytes.")
21
+
22
+ # Send buffered audio data once it's large enough
23
+ if len(audio_buffer) >= buffer_size:
24
+ await websocket.send(audio_buffer)
25
+ print(f"Sent {len(audio_buffer)} bytes of audio data.")
26
+ audio_buffer.clear()
27
+ await asyncio.sleep(0.01)
28
+
29
+ print("Finished sending audio.")
30
+ else:
31
+ print(f"Failed to download audio file. Status code: {response.status_code}")
32
+
33
+ async def receive_transcription(websocket):
34
+ while True:
35
+ try:
36
+ transcription = await websocket.recv() # Receive transcription from the server
37
+ print(f"Transcription: {transcription}")
38
+ except Exception as e:
39
+ print(f"Error receiving transcription: {e}")
40
+ break
41
+
42
+ async def send_heartbeat(websocket):
43
+ while True:
44
+ try:
45
+ await websocket.ping()
46
+ print("Sent keepalive ping")
47
+ except websockets.ConnectionClosed:
48
+ print("Connection closed, stopping heartbeat")
49
+ break
50
+ await asyncio.sleep(30) # Send ping every 30 seconds (adjust as needed)
51
+
52
+
53
+ async def run_client():
54
+ uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/wtranscribe") # WebSocket URL
55
  ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
56
  ssl_context.check_hostname = False
57
  ssl_context.verify_mode = ssl.CERT_NONE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ async with websockets.connect(uri, ssl=ssl_context, timeout=60) as websocket:
60
+ await asyncio.gather(
61
+ send_audio(websocket),
62
+ receive_transcription(websocket),
63
+ send_heartbeat(websocket)
 
64
  )
65
 
66
+ asyncio.run(run_client())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
infer.py CHANGED
@@ -123,129 +123,250 @@ async def read_root():
123
  import tempfile
124
 
125
 
126
- @app.websocket("/ws")
127
- async def websocket_endpoint(websocket: WebSocket):
128
- """WebSocket endpoint to handle client connections."""
129
- await websocket.accept()
130
- client_ip = websocket.client.host
131
- logger.info(f"Client connected: {client_ip}")
132
- sys.stdout.flush()
133
- try:
134
- await process_audio_stream(websocket)
135
- except WebSocketDisconnect:
136
- logger.info(f"Client disconnected: {client_ip}")
137
- except Exception as e:
138
- logger.error(f"Unexpected error: {e}")
139
- await websocket.close()
140
-
141
- async def process_audio_stream(websocket: WebSocket):
142
- """Continuously receive audio chunks and initiate transcription tasks."""
143
- sampling_rate = 16000
144
- min_chunk_size = 5 # in seconds
145
-
146
- transcription_task = None
147
- chunk_counter = 0
148
- total_bytes_received = 0
149
-
150
- while True:
151
- try:
152
- # Receive audio data from client
153
- data = await websocket.receive_bytes()
154
- if not data:
155
- logger.info("No data received, closing connection")
156
- break
157
- chunk_counter += 1
158
- chunk_size = len(data)
159
- total_bytes_received += chunk_size
160
- #logger.debug(f"Received chunk {chunk_counter}: {chunk_size} bytes")
161
-
162
- audio_chunk = process_received_audio(data)
163
- #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
164
- # Check if enough audio has been buffered
165
- if transcription_task is None or transcription_task.done():
166
- # Start a new transcription task
167
- # logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
168
- transcription_task = asyncio.create_task(
169
- transcribe_and_send(websocket, audio_chunk)
170
- )
171
-
172
- #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
173
- except Exception as e:
174
- logger.error(f"Error receiving data: {e}")
175
- break
176
-
177
-
178
-
179
- async def transcribe_and_send(websocket: WebSocket, audio_data):
180
- """Run transcription in a separate thread and send the result to the client."""
181
- logger.debug(f"Transcription task started for {len(audio_data)} samples")
182
- transcription_result = await asyncio.to_thread(sync_transcribe_audio, audio_data)
183
- if transcription_result:
184
- try:
185
- # Send the result as JSON
186
- await websocket.send_json(transcription_result)
187
- logger.info(f"Transcription JSON sent to client {transcription_result}")
188
- except Exception as e:
189
- logger.error(f"Error sending transcription: {e}")
190
- else:
191
- logger.warning("No transcription result to send")
192
-
193
- def sync_transcribe_audio(audio_data):
194
- """Synchronously transcribe audio data using the ASR model and format the result."""
195
- try:
196
 
197
- logger.info('Starting transcription...')
198
- segments, info = model.transcribe(
199
- audio_data, language="he",compression_ratio_threshold=2.5, word_timestamps=True
200
- )
201
- logger.info('Transcription completed')
202
 
203
- # Build the transcription result as per your requirement
204
- ret = {'segments': []}
 
 
 
205
 
206
- for s in segments:
207
- logger.debug(f"Processing segment {s.id} with start time: {s.start} and end time: {s.end}")
208
 
209
- # Process words in the segment
210
- words = [{
211
- 'start': float(w.start),
212
- 'end': float(w.end),
213
- 'word': w.word,
214
- 'probability': float(w.probability)
215
- } for w in s.words]
 
 
 
 
 
 
 
 
 
 
216
 
217
  seg = {
218
- 'id': int(s.id),
219
- 'seek': int(s.seek),
220
- 'start': float(s.start),
221
- 'end': float(s.end),
222
- 'text': s.text,
223
- 'avg_logprob': float(s.avg_logprob),
224
- 'compression_ratio': float(s.compression_ratio),
225
- 'no_speech_prob': float(s.no_speech_prob),
226
- 'words': words
227
  }
228
- logger.debug(f'Adding new transcription segment: {seg}')
229
- ret['segments'].append(seg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- logger.debug(f"Total segments in transcription result: {len(ret['segments'])}")
232
- return ret
233
  except Exception as e:
234
- logger.error(f"Transcription error: {e}")
235
- return {}
 
 
 
236
 
237
- def process_received_audio(data):
238
- """Convert received bytes into normalized float32 NumPy array."""
239
- #logger.debug(f"Processing received audio data of size {len(data)} bytes")
240
- audio_int16 = np.frombuffer(data, dtype=np.int16)
241
- #logger.debug(f"Converted to int16 NumPy array with {len(audio_int16)} samples")
242
 
243
- audio_float32 = audio_int16.astype(np.float32) / 32768.0 # Normalize to [-1, 1]
244
- #logger.debug(f"Normalized audio data to float32 with {len(audio_float32)} samples")
245
 
246
- return audio_float32
247
 
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
 
251
 
 
123
  import tempfile
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ def transcribe_core_ws(audio_file, last_transcribed_time):
128
+ """
129
+ Transcribe the audio file and return only the segments that have not been processed yet.
 
 
130
 
131
+ :param audio_file: Path to the growing audio file.
132
+ :param last_transcribed_time: The last time (in seconds) that was transcribed.
133
+ :return: Newly transcribed segments and the updated last transcribed time.
134
+ """
135
+ logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
136
 
137
+ ret = {'new_segments': []}
138
+ new_last_transcribed_time = last_transcribed_time
139
 
140
+ try:
141
+ # Transcribe the entire audio file
142
+ logging.debug(f"Initiating model transcription for file: {audio_file}")
143
+ segs, _ = model.transcribe(audio_file, language='he', word_timestamps=True)
144
+ logging.info('Transcription completed successfully.')
145
+ except Exception as e:
146
+ logging.error(f"Error during transcription: {e}")
147
+ raise e
148
+
149
+ # Track the new segments and update the last transcribed time
150
+ for s in segs:
151
+ logging.info(f"Processing segment with start time: {s.start} and end time: {s.end}")
152
+
153
+ # Only process segments that start after the last transcribed time
154
+ if s.start >= last_transcribed_time:
155
+ logging.info(f"New segment found starting at {s.start} seconds.")
156
+ words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
157
 
158
  seg = {
159
+ 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
160
+ 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
161
+ 'no_speech_prob': s.no_speech_prob, 'words': words
 
 
 
 
 
 
162
  }
163
+ logging.info(f'Adding new transcription segment: {seg}')
164
+ ret['new_segments'].append(seg)
165
+
166
+ # Update the last transcribed time to the end of the current segment
167
+ new_last_transcribed_time = max(new_last_transcribed_time, s.end)
168
+ logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
169
+
170
+ #logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
171
+ return ret, new_last_transcribed_time
172
+
173
+
174
+ import tempfile
175
+
176
+
177
+ @app.websocket("/wtranscribe")
178
+ async def websocket_transcribe(websocket: WebSocket):
179
+ logging.info("New WebSocket connection request received.")
180
+ await websocket.accept()
181
+ logging.info("WebSocket connection established successfully.")
182
+
183
+ try:
184
+ processed_segments = [] # Keeps track of the segments already transcribed
185
+ accumulated_audio_size = 0 # Track how much audio data has been buffered
186
+ accumulated_audio_time = 0 # Track the total audio duration accumulated
187
+ last_transcribed_time = 0.0
188
+ #min_transcription_time = 5.0 # Minimum duration of audio in seconds before transcription starts
189
+
190
+ # A temporary file to store the growing audio data
191
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
192
+ logging.info(f"Temporary audio file created at {temp_audio_file.name}")
193
+
194
+ while True:
195
+ try:
196
+ # Receive the next chunk of audio data
197
+ audio_chunk = await websocket.receive_bytes()
198
+ if not audio_chunk:
199
+ logging.warning("Received empty audio chunk, skipping processing.")
200
+ continue
201
+
202
+ # Write audio chunk to file and accumulate size and time
203
+ temp_audio_file.write(audio_chunk)
204
+ temp_audio_file.flush()
205
+ accumulated_audio_size += len(audio_chunk)
206
+
207
+ # Estimate the duration of the chunk based on its size (e.g., 16kHz audio)
208
+ chunk_duration = len(audio_chunk) / (16000 * 2) # Assuming 16kHz mono WAV (2 bytes per sample)
209
+ accumulated_audio_time += chunk_duration
210
+ logging.info(f"Received and buffered {len(audio_chunk)} bytes, total buffered: {accumulated_audio_size} bytes, total time: {accumulated_audio_time:.2f} seconds")
211
+
212
+ # Transcribe when enough time (audio) is accumulated (e.g., at least 5 seconds of audio)
213
+ #if accumulated_audio_time >= min_transcription_time:
214
+ #logging.info("Buffered enough audio time, starting transcription.")
215
+
216
+
217
+ # Call the transcription function with the last processed time
218
+ partial_result, last_transcribed_time = transcribe_core_ws(temp_audio_file.name, last_transcribed_time)
219
+ accumulated_audio_time = 0 # Reset the accumulated audio time
220
+ processed_segments.extend(partial_result['new_segments'])
221
+
222
+ # Reset the accumulated audio size after transcription
223
+ accumulated_audio_size = 0
224
+
225
+ # Send the transcription result back to the client with both new and all processed segments
226
+ response = {
227
+ "new_segments": partial_result['new_segments'],
228
+ "processed_segments": processed_segments
229
+ }
230
+ logging.info(f"Sending {len(partial_result['new_segments'])} new segments to the client.")
231
+ await websocket.send_json(response)
232
+
233
+ except WebSocketDisconnect:
234
+ logging.info("WebSocket connection closed by the client.")
235
+ break
236
 
 
 
237
  except Exception as e:
238
+ logging.error(f"Unexpected error during WebSocket transcription: {e}")
239
+ await websocket.send_json({"error": str(e)})
240
+
241
+ finally:
242
+ logging.info("Cleaning up and closing WebSocket connection.")
243
 
 
 
 
 
 
244
 
 
 
245
 
 
246
 
247
 
248
+ # @app.websocket("/ws")
249
+ # async def websocket_endpoint(websocket: WebSocket):
250
+ # """WebSocket endpoint to handle client connections."""
251
+ # await websocket.accept()
252
+ # client_ip = websocket.client.host
253
+ # logger.info(f"Client connected: {client_ip}")
254
+ # sys.stdout.flush()
255
+ # try:
256
+ # await process_audio_stream(websocket)
257
+ # except WebSocketDisconnect:
258
+ # logger.info(f"Client disconnected: {client_ip}")
259
+ # except Exception as e:
260
+ # logger.error(f"Unexpected error: {e}")
261
+ # await websocket.close()
262
+ #
263
+ # async def process_audio_stream(websocket: WebSocket):
264
+ # """Continuously receive audio chunks and initiate transcription tasks."""
265
+ # sampling_rate = 16000
266
+ # min_chunk_size = 5 # in seconds
267
+ #
268
+ # transcription_task = None
269
+ # chunk_counter = 0
270
+ # total_bytes_received = 0
271
+ #
272
+ # while True:
273
+ # try:
274
+ # # Receive audio data from client
275
+ # data = await websocket.receive_bytes()
276
+ # if not data:
277
+ # logger.info("No data received, closing connection")
278
+ # break
279
+ # chunk_counter += 1
280
+ # chunk_size = len(data)
281
+ # total_bytes_received += chunk_size
282
+ # #logger.debug(f"Received chunk {chunk_counter}: {chunk_size} bytes")
283
+ #
284
+ # audio_chunk = process_received_audio(data)
285
+ # #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
286
+ # # Check if enough audio has been buffered
287
+ # # if transcription_task is None or transcription_task.done():
288
+ # # # Start a new transcription task
289
+ # # # logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
290
+ # transcription_task = asyncio.create_task(
291
+ # transcribe_and_send(websocket, audio_chunk)
292
+ # )
293
+ #
294
+ # #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
295
+ # except Exception as e:
296
+ # logger.error(f"Error receiving data: {e}")
297
+ # break
298
+ #
299
+ #
300
+ # async def transcribe_and_send(websocket: WebSocket, audio_data):
301
+ # """Run transcription in a separate thread and send the result to the client."""
302
+ # logger.debug(f"Transcription task started for {len(audio_data)} samples")
303
+ # transcription_result = await asyncio.to_thread(sync_transcribe_audio, audio_data)
304
+ # if transcription_result:
305
+ # try:
306
+ # # Send the result as JSON
307
+ # await websocket.send_json(transcription_result)
308
+ # logger.info(f"Transcription JSON sent to client {transcription_result}")
309
+ # except Exception as e:
310
+ # logger.error(f"Error sending transcription: {e}")
311
+ # else:
312
+ # logger.warning("No transcription result to send")
313
+ #
314
+ # def sync_transcribe_audio(audio_data):
315
+ # """Synchronously transcribe audio data using the ASR model and format the result."""
316
+ # try:
317
+ #
318
+ # logger.info('Starting transcription...')
319
+ # segments, info = model.transcribe(
320
+ # audio_data, language="he",compression_ratio_threshold=2.5, word_timestamps=True
321
+ # )
322
+ # logger.info('Transcription completed')
323
+ #
324
+ # # Build the transcription result as per your requirement
325
+ # ret = {'segments': []}
326
+ #
327
+ # for s in segments:
328
+ # logger.debug(f"Processing segment {s.id} with start time: {s.start} and end time: {s.end}")
329
+ #
330
+ # # Process words in the segment
331
+ # words = [{
332
+ # 'start': float(w.start),
333
+ # 'end': float(w.end),
334
+ # 'word': w.word,
335
+ # 'probability': float(w.probability)
336
+ # } for w in s.words]
337
+ #
338
+ # seg = {
339
+ # 'id': int(s.id),
340
+ # 'seek': int(s.seek),
341
+ # 'start': float(s.start),
342
+ # 'end': float(s.end),
343
+ # 'text': s.text,
344
+ # 'avg_logprob': float(s.avg_logprob),
345
+ # 'compression_ratio': float(s.compression_ratio),
346
+ # 'no_speech_prob': float(s.no_speech_prob),
347
+ # 'words': words
348
+ # }
349
+ # logger.debug(f'Adding new transcription segment: {seg}')
350
+ # ret['segments'].append(seg)
351
+ #
352
+ # logger.debug(f"Total segments in transcription result: {len(ret['segments'])}")
353
+ # return ret
354
+ # except Exception as e:
355
+ # logger.error(f"Transcription error: {e}")
356
+ # return {}
357
+ #
358
+ # def process_received_audio(data):
359
+ # """Convert received bytes into normalized float32 NumPy array."""
360
+ # #logger.debug(f"Processing received audio data of size {len(data)} bytes")
361
+ # audio_int16 = np.frombuffer(data, dtype=np.int16)
362
+ # #logger.debug(f"Converted to int16 NumPy array with {len(audio_int16)} samples")
363
+ #
364
+ # audio_float32 = audio_int16.astype(np.float32) / 32768.0 # Normalize to [-1, 1]
365
+ # #logger.debug(f"Normalized audio data to float32 with {len(audio_float32)} samples")
366
+ #
367
+ # return audio_float32
368
+ #
369
+ #
370
 
371
 
372