Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
ebaaf9b
1
Parent(s):
963a8a8
trying to create websocket
Browse files- client.py +35 -0
- infer.py +120 -2
- requirements.txt +1 -0
client.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import websockets
|
3 |
+
import wave
|
4 |
+
|
5 |
+
# Parameters for reading and sending the audio
|
6 |
+
SAMPLE_RATE = 16000
|
7 |
+
CHUNK_SIZE = 1024 # Size of the audio chunk sent at a time
|
8 |
+
AUDIO_FILE = "https://raw.githubusercontent.com/AshDavid12/hugging_face_ivrit_streaming/main/test_copy.mp3" # Path to the mp3 file
|
9 |
+
|
10 |
+
async def send_audio(websocket):
|
11 |
+
with wave.open(AUDIO_FILE, "rb") as wf:
|
12 |
+
data = wf.readframes(CHUNK_SIZE)
|
13 |
+
while data:
|
14 |
+
await websocket.send(data) # Send audio chunk to the server
|
15 |
+
await asyncio.sleep(CHUNK_SIZE / SAMPLE_RATE) # Simulate real-time by waiting for the duration of the chunk
|
16 |
+
data = wf.readframes(CHUNK_SIZE)
|
17 |
+
|
18 |
+
async def receive_transcription(websocket):
|
19 |
+
while True:
|
20 |
+
try:
|
21 |
+
transcription = await websocket.recv() # Receive transcription from the server
|
22 |
+
print(f"Transcription: {transcription}")
|
23 |
+
except Exception as e:
|
24 |
+
print(f"Error: {e}")
|
25 |
+
break
|
26 |
+
|
27 |
+
async def run_client():
|
28 |
+
uri = "wss://gigaverse-ivrit-ai-streaming.hf.space/ws/transcribe" # Replace with your Hugging Face Space WebSocket URL
|
29 |
+
async with websockets.connect(uri) as websocket:
|
30 |
+
await asyncio.gather(
|
31 |
+
send_audio(websocket),
|
32 |
+
receive_transcription(websocket)
|
33 |
+
)
|
34 |
+
|
35 |
+
asyncio.run(run_client())
|
infer.py
CHANGED
@@ -2,14 +2,17 @@ import base64
|
|
2 |
import faster_whisper
|
3 |
import tempfile
|
4 |
import torch
|
|
|
5 |
import requests
|
6 |
import logging
|
7 |
-
from fastapi import FastAPI, HTTPException
|
|
|
8 |
from pydantic import BaseModel
|
9 |
from typing import Optional
|
|
|
10 |
|
11 |
# Configure logging
|
12 |
-
logging.basicConfig(level=logging.
|
13 |
|
14 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
15 |
logging.info(f'Device selected: {device}')
|
@@ -130,3 +133,118 @@ def transcribe_core(audio_file):
|
|
130 |
ret['segments'].append(seg)
|
131 |
|
132 |
return ret
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import faster_whisper
|
3 |
import tempfile
|
4 |
import torch
|
5 |
+
import time
|
6 |
import requests
|
7 |
import logging
|
8 |
+
from fastapi import FastAPI, HTTPException, WebSocket,WebSocketDisconnect
|
9 |
+
import websockets
|
10 |
from pydantic import BaseModel
|
11 |
from typing import Optional
|
12 |
+
import asyncio
|
13 |
|
14 |
# Configure logging
|
15 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
16 |
|
17 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
18 |
logging.info(f'Device selected: {device}')
|
|
|
133 |
ret['segments'].append(seg)
|
134 |
|
135 |
return ret
|
136 |
+
|
137 |
+
|
138 |
+
def transcribe_core_ws(audio_file, last_transcribed_time):
|
139 |
+
"""
|
140 |
+
Transcribe the audio file and return only the segments that have not been processed yet.
|
141 |
+
|
142 |
+
:param audio_file: Path to the growing audio file.
|
143 |
+
:param last_transcribed_time: The last time (in seconds) that was transcribed.
|
144 |
+
:return: Newly transcribed segments and the updated last transcribed time.
|
145 |
+
"""
|
146 |
+
logging.info(f"Starting transcription for file: {audio_file} from {last_transcribed_time} seconds.")
|
147 |
+
|
148 |
+
ret = {'new_segments': []}
|
149 |
+
new_last_transcribed_time = last_transcribed_time
|
150 |
+
|
151 |
+
try:
|
152 |
+
# Transcribe the entire audio file
|
153 |
+
logging.debug(f"Initiating model transcription for file: {audio_file}")
|
154 |
+
segs, _ = model.transcribe(audio_file, language='he', word_timestamps=True)
|
155 |
+
logging.info('Transcription completed successfully.')
|
156 |
+
except Exception as e:
|
157 |
+
logging.error(f"Error during transcription: {e}")
|
158 |
+
raise e
|
159 |
+
|
160 |
+
# Track the new segments and update the last transcribed time
|
161 |
+
for s in segs:
|
162 |
+
logging.debug(f"Processing segment with start time: {s.start} and end time: {s.end}")
|
163 |
+
|
164 |
+
# Only process segments that start after the last transcribed time
|
165 |
+
if s.start >= last_transcribed_time:
|
166 |
+
logging.debug(f"New segment found starting at {s.start} seconds.")
|
167 |
+
words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
|
168 |
+
|
169 |
+
seg = {
|
170 |
+
'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text,
|
171 |
+
'avg_logprob': s.avg_logprob, 'compression_ratio': s.compression_ratio,
|
172 |
+
'no_speech_prob': s.no_speech_prob, 'words': words
|
173 |
+
}
|
174 |
+
logging.info(f'Adding new transcription segment: {seg}')
|
175 |
+
ret['new_segments'].append(seg)
|
176 |
+
|
177 |
+
# Update the last transcribed time to the end of the current segment
|
178 |
+
new_last_transcribed_time = max(new_last_transcribed_time, s.end)
|
179 |
+
logging.debug(f"Updated last transcribed time to: {new_last_transcribed_time} seconds")
|
180 |
+
|
181 |
+
logging.info(f"Returning {len(ret['new_segments'])} new segments and updated last transcribed time.")
|
182 |
+
return ret, new_last_transcribed_time
|
183 |
+
|
184 |
+
|
185 |
+
import tempfile
|
186 |
+
|
187 |
+
|
188 |
+
@app.websocket("/ws/transcribe")
|
189 |
+
async def websocket_transcribe(websocket: WebSocket):
|
190 |
+
logging.info("New WebSocket connection request received.")
|
191 |
+
await websocket.accept()
|
192 |
+
logging.info("WebSocket connection established successfully.")
|
193 |
+
|
194 |
+
try:
|
195 |
+
processed_segments = [] # Keeps track of the segments already transcribed
|
196 |
+
audio_data = bytearray() # Buffer for audio chunks
|
197 |
+
logging.info("Initialized processed_segments and audio_data buffer.")
|
198 |
+
|
199 |
+
# A temporary file to store the growing audio data
|
200 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
|
201 |
+
logging.info(f"Temporary audio file created at {temp_audio_file.name}")
|
202 |
+
|
203 |
+
# Continuously receive and process audio chunks
|
204 |
+
while True:
|
205 |
+
try:
|
206 |
+
logging.debug("Waiting to receive the next chunk of audio data from WebSocket.")
|
207 |
+
|
208 |
+
# Receive the next chunk of audio data
|
209 |
+
audio_chunk = await websocket.receive_bytes()
|
210 |
+
logging.info(f"Received an audio chunk of size {len(audio_chunk)} bytes.")
|
211 |
+
|
212 |
+
if not audio_chunk:
|
213 |
+
logging.warning("Received empty audio chunk, skipping processing.")
|
214 |
+
continue
|
215 |
+
|
216 |
+
temp_audio_file.write(audio_chunk)
|
217 |
+
temp_audio_file.flush()
|
218 |
+
logging.debug(f"Written audio chunk to temporary file: {temp_audio_file.name}")
|
219 |
+
|
220 |
+
audio_data.extend(audio_chunk) # In-memory data buffer (if needed)
|
221 |
+
logging.debug(f"Audio data buffer extended to size {len(audio_data)} bytes.")
|
222 |
+
|
223 |
+
# Perform transcription and track new segments
|
224 |
+
logging.info(
|
225 |
+
f"Transcribing audio from {temp_audio_file.name}. Processed segments: {len(processed_segments)}")
|
226 |
+
partial_result, processed_segments = transcribe_core_ws(temp_audio_file.name, processed_segments)
|
227 |
+
|
228 |
+
logging.info(
|
229 |
+
f"Transcription completed. Sending {len(partial_result['new_segments'])} new segments to the client.")
|
230 |
+
# Send the new transcription result back to the client
|
231 |
+
await websocket.send_json(partial_result)
|
232 |
+
|
233 |
+
except WebSocketDisconnect:
|
234 |
+
logging.info("WebSocket connection closed by the client. Ending transcription session.")
|
235 |
+
break
|
236 |
+
except Exception as e:
|
237 |
+
logging.error(f"Error processing audio chunk: {e}")
|
238 |
+
await websocket.send_json({"error": str(e)})
|
239 |
+
break
|
240 |
+
|
241 |
+
except Exception as e:
|
242 |
+
logging.error(f"Unexpected error during WebSocket transcription: {e}")
|
243 |
+
await websocket.send_json({"error": str(e)})
|
244 |
+
finally:
|
245 |
+
logging.info("Cleaning up and closing WebSocket connection.")
|
246 |
+
await websocket.close()
|
247 |
+
|
248 |
+
|
249 |
+
|
250 |
+
|
requirements.txt
CHANGED
@@ -7,4 +7,5 @@ faster-whisper
|
|
7 |
torch
|
8 |
uvicorn
|
9 |
fastapi
|
|
|
10 |
|
|
|
7 |
torch
|
8 |
uvicorn
|
9 |
fastapi
|
10 |
+
websockets
|
11 |
|