AshDavid12 commited on
Commit
ebaaf9b
·
1 Parent(s): 963a8a8

trying to create websocket

Browse files
Files changed (3) hide show
  1. client.py +35 -0
  2. infer.py +120 -2
  3. requirements.txt +1 -0
client.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import websockets
3
+ import wave
4
+
5
+ # Parameters for reading and sending the audio
6
+ SAMPLE_RATE = 16000
7
+ CHUNK_SIZE = 1024 # Size of the audio chunk sent at a time
8
+ AUDIO_FILE = "https://raw.githubusercontent.com/AshDavid12/hugging_face_ivrit_streaming/main/test_copy.mp3" # Path to the mp3 file
9
+
10
+ async def send_audio(websocket):
11
+ with wave.open(AUDIO_FILE, "rb") as wf:
12
+ data = wf.readframes(CHUNK_SIZE)
13
+ while data:
14
+ await websocket.send(data) # Send audio chunk to the server
15
+ await asyncio.sleep(CHUNK_SIZE / SAMPLE_RATE) # Simulate real-time by waiting for the duration of the chunk
16
+ data = wf.readframes(CHUNK_SIZE)
17
+
18
+ async def receive_transcription(websocket):
19
+ while True:
20
+ try:
21
+ transcription = await websocket.recv() # Receive transcription from the server
22
+ print(f"Transcription: {transcription}")
23
+ except Exception as e:
24
+ print(f"Error: {e}")
25
+ break
26
+
27
+ async def run_client():
28
+ uri = "wss://gigaverse-ivrit-ai-streaming.hf.space/ws/transcribe" # Replace with your Hugging Face Space WebSocket URL
29
+ async with websockets.connect(uri) as websocket:
30
+ await asyncio.gather(
31
+ send_audio(websocket),
32
+ receive_transcription(websocket)
33
+ )
34
+
35
+ asyncio.run(run_client())
infer.py CHANGED
@@ -2,14 +2,17 @@ import base64
2
  import faster_whisper
3
  import tempfile
4
  import torch
 
5
  import requests
6
  import logging
7
- from fastapi import FastAPI, HTTPException
 
8
  from pydantic import BaseModel
9
  from typing import Optional
 
10
 
11
  # Configure logging
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
 
14
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
15
  logging.info(f'Device selected: {device}')
@@ -130,3 +133,118 @@ def transcribe_core(audio_file):
130
  ret['segments'].append(seg)
131
 
132
  return ret
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import faster_whisper
3
  import tempfile
4
  import torch
5
+ import time
6
  import requests
7
  import logging
8
+ from fastapi import FastAPI, HTTPException, WebSocket,WebSocketDisconnect
9
+ import websockets
10
  from pydantic import BaseModel
11
  from typing import Optional
12
+ import asyncio
13
 
14
  # Configure logging
15
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
16
 
17
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
  logging.info(f'Device selected: {device}')
 
133
  ret['segments'].append(seg)
134
 
135
  return ret
136
+
137
+
138
+ def transcribe_core_ws(audio_file, last_transcribed_time):
139
+ """
140
+ Transcribe the audio file and return only the segments that have not been processed yet.
141
+
142
+ :param audio_file: Path to the growing audio file.
143
+ :param last_transcribed_time: The last time (in seconds) that was transcribed.
144
+ :return: Newly transcribed segments and the updated last transcribed time.
145
+ """
146
+ logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
147
+
148
+ ret = {'new_segments': []}
149
+ new_last_transcribed_time = last_transcribed_time
150
+
151
+ try:
152
+ # Transcribe the entire audio file
153
+ logging.debug(f"Initiating model transcription for file: {audio_file}")
154
+ segs, _ = model.transcribe(audio_file, language='he', word_timestamps=True)
155
+ logging.info('Transcription completed successfully.')
156
+ except Exception as e:
157
+ logging.error(f"Error during transcription: {e}")
158
+ raise e
159
+
160
+ # Track the new segments and update the last transcribed time
161
+ for s in segs:
162
+ logging.debug(f"Processing segment with start time: {s.start} and end time: {s.end}")
163
+
164
+ # Only process segments that start after the last transcribed time
165
+ if s.start >= last_transcribed_time:
166
+ logging.debug(f"New segment found starting at {s.start} seconds.")
167
+ words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
168
+
169
+ seg = {
170
+ 'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
171
+ 'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
172
+ 'no_speech_prob': s.no_speech_prob, 'words': words
173
+ }
174
+ logging.info(f'Adding new transcription segment: {seg}')
175
+ ret['new_segments'].append(seg)
176
+
177
+ # Update the last transcribed time to the end of the current segment
178
+ new_last_transcribed_time = max(new_last_transcribed_time, s.end)
179
+ logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
180
+
181
+ logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
182
+ return ret, new_last_transcribed_time
183
+
184
+
185
+ import tempfile
186
+
187
+
188
+ @app.websocket("/ws/transcribe")
189
+ async def websocket_transcribe(websocket: WebSocket):
190
+ logging.info("New WebSocket connection request received.")
191
+ await websocket.accept()
192
+ logging.info("WebSocket connection established successfully.")
193
+
194
+ try:
195
+ processed_segments = [] # Keeps track of the segments already transcribed
196
+ audio_data = bytearray() # Buffer for audio chunks
197
+ logging.info("Initialized processed_segments and audio_data buffer.")
198
+
199
+ # A temporary file to store the growing audio data
200
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
201
+ logging.info(f"Temporary audio file created at {temp_audio_file.name}")
202
+
203
+ # Continuously receive and process audio chunks
204
+ while True:
205
+ try:
206
+ logging.debug("Waiting to receive the next chunk of audio data from WebSocket.")
207
+
208
+ # Receive the next chunk of audio data
209
+ audio_chunk = await websocket.receive_bytes()
210
+ logging.info(f"Received an audio chunk of size {len(audio_chunk)} bytes.")
211
+
212
+ if not audio_chunk:
213
+ logging.warning("Received empty audio chunk, skipping processing.")
214
+ continue
215
+
216
+ temp_audio_file.write(audio_chunk)
217
+ temp_audio_file.flush()
218
+ logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
219
+
220
+ audio_data.extend(audio_chunk) # In-memory data buffer (if needed)
221
+ logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
222
+
223
+ # Perform transcription and track new segments
224
+ logging.info(
225
+ f"Transcribing audio from {temp_audio_file.name}. Processed segments: {len(processed_segments)}")
226
+ partial_result, processed_segments = transcribe_core_ws(temp_audio_file.name, processed_segments)
227
+
228
+ logging.info(
229
+ f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
230
+ # Send the new transcription result back to the client
231
+ await websocket.send_json(partial_result)
232
+
233
+ except WebSocketDisconnect:
234
+ logging.info("WebSocket connection closed by the client. Ending transcription session.")
235
+ break
236
+ except Exception as e:
237
+ logging.error(f"Error processing audio chunk: {e}")
238
+ await websocket.send_json({"error": str(e)})
239
+ break
240
+
241
+ except Exception as e:
242
+ logging.error(f"Unexpected error during WebSocket transcription: {e}")
243
+ await websocket.send_json({"error": str(e)})
244
+ finally:
245
+ logging.info("Cleaning up and closing WebSocket connection.")
246
+ await websocket.close()
247
+
248
+
249
+
250
+
requirements.txt CHANGED
@@ -7,4 +7,5 @@ faster-whisper
7
  torch
8
  uvicorn
9
  fastapi
 
10
 
 
7
  torch
8
  uvicorn
9
  fastapi
10
+ websockets
11