Spaces:
Building
Building
Upload audio_routes.py
Browse files- routes/audio_routes.py +18 -25
routes/audio_routes.py
CHANGED
@@ -15,6 +15,7 @@ from utils.logger import log_info, log_error, log_warning, log_debug
|
|
15 |
from tts.tts_factory import TTSFactory
|
16 |
from tts.tts_preprocessor import TTSPreprocessor
|
17 |
from config.config_provider import ConfigProvider
|
|
|
18 |
|
19 |
router = APIRouter(tags=["audio"])
|
20 |
|
@@ -43,10 +44,10 @@ async def generate_tts(request: TTSRequest, req: Request):
|
|
43 |
# Check if we should use event-driven mode
|
44 |
if request.session_id and hasattr(req.app.state, 'event_bus'):
|
45 |
# Event-driven mode for realtime sessions
|
46 |
-
from event_bus import Event, EventType
|
47 |
-
|
48 |
log_info(f"π€ TTS request via event bus for session: {request.session_id}")
|
49 |
-
|
50 |
# Publish TTS event
|
51 |
await req.app.state.event_bus.publish(Event(
|
52 |
type=EventType.TTS_STARTED,
|
@@ -58,14 +59,14 @@ async def generate_tts(request: TTSRequest, req: Request):
|
|
58 |
"is_api_call": True # Flag to indicate this is from REST API
|
59 |
}
|
60 |
))
|
61 |
-
|
62 |
# Return a response indicating audio will be streamed via WebSocket
|
63 |
return {
|
64 |
"status": "processing",
|
65 |
"message": "TTS audio will be streamed via WebSocket connection",
|
66 |
"session_id": request.session_id
|
67 |
}
|
68 |
-
|
69 |
else:
|
70 |
# Direct TTS generation (legacy mode)
|
71 |
tts_provider = TTSFactory.create_provider()
|
@@ -179,10 +180,8 @@ async def transcribe_audio(request: STTRequest, req: Request):
|
|
179 |
# Check if we should use event-driven mode
|
180 |
if request.session_id and hasattr(req.app.state, 'event_bus'):
|
181 |
# Event-driven mode for realtime sessions
|
182 |
-
from event_bus import Event, EventType
|
183 |
-
|
184 |
log_info(f"π§ STT request via event bus for session: {request.session_id}")
|
185 |
-
|
186 |
# Publish audio chunk event
|
187 |
await req.app.state.event_bus.publish(Event(
|
188 |
type=EventType.AUDIO_CHUNK_RECEIVED,
|
@@ -194,14 +193,14 @@ async def transcribe_audio(request: STTRequest, req: Request):
|
|
194 |
"is_api_call": True
|
195 |
}
|
196 |
))
|
197 |
-
|
198 |
# Return a response indicating transcription will be available via WebSocket
|
199 |
return {
|
200 |
"status": "processing",
|
201 |
"message": "Transcription will be available via WebSocket connection",
|
202 |
"session_id": request.session_id
|
203 |
}
|
204 |
-
|
205 |
else:
|
206 |
# Direct STT transcription (legacy mode)
|
207 |
from stt.stt_factory import STTFactory
|
@@ -327,11 +326,11 @@ async def audio_websocket(websocket: WebSocket, session_id: str, request: Reques
|
|
327 |
This is a dedicated audio stream separate from the main conversation WebSocket
|
328 |
"""
|
329 |
from fastapi import WebSocketDisconnect
|
330 |
-
|
331 |
try:
|
332 |
await websocket.accept()
|
333 |
log_info(f"π΅ Audio WebSocket connected for session: {session_id}")
|
334 |
-
|
335 |
if not hasattr(request.app.state, 'event_bus'):
|
336 |
await websocket.send_json({
|
337 |
"type": "error",
|
@@ -339,16 +338,14 @@ async def audio_websocket(websocket: WebSocket, session_id: str, request: Reques
|
|
339 |
})
|
340 |
await websocket.close()
|
341 |
return
|
342 |
-
|
343 |
while True:
|
344 |
try:
|
345 |
# Receive audio data
|
346 |
data = await websocket.receive_json()
|
347 |
-
|
348 |
if data.get("type") == "audio_chunk":
|
349 |
# Forward to event bus
|
350 |
-
from event_bus import Event, EventType
|
351 |
-
|
352 |
await request.app.state.event_bus.publish(Event(
|
353 |
type=EventType.AUDIO_CHUNK_RECEIVED,
|
354 |
session_id=session_id,
|
@@ -358,13 +355,11 @@ async def audio_websocket(websocket: WebSocket, session_id: str, request: Reques
|
|
358 |
"chunk_index": data.get("chunk_index", 0)
|
359 |
}
|
360 |
))
|
361 |
-
|
362 |
elif data.get("type") == "control":
|
363 |
action = data.get("action")
|
364 |
-
|
365 |
if action == "start_recording":
|
366 |
-
from event_bus import Event, EventType
|
367 |
-
|
368 |
await request.app.state.event_bus.publish(Event(
|
369 |
type=EventType.STT_STARTED,
|
370 |
session_id=session_id,
|
@@ -373,16 +368,14 @@ async def audio_websocket(websocket: WebSocket, session_id: str, request: Reques
|
|
373 |
"format": data.get("format", "webm")
|
374 |
}
|
375 |
))
|
376 |
-
|
377 |
elif action == "stop_recording":
|
378 |
-
from event_bus import Event, EventType
|
379 |
-
|
380 |
await request.app.state.event_bus.publish(Event(
|
381 |
type=EventType.STT_STOPPED,
|
382 |
session_id=session_id,
|
383 |
data={"reason": "user_request"}
|
384 |
))
|
385 |
-
|
386 |
except WebSocketDisconnect:
|
387 |
break
|
388 |
except Exception as e:
|
@@ -391,7 +384,7 @@ async def audio_websocket(websocket: WebSocket, session_id: str, request: Reques
|
|
391 |
"type": "error",
|
392 |
"message": str(e)
|
393 |
})
|
394 |
-
|
395 |
except Exception as e:
|
396 |
log_error(f"Audio WebSocket error", error=str(e))
|
397 |
finally:
|
|
|
15 |
from tts.tts_factory import TTSFactory
|
16 |
from tts.tts_preprocessor import TTSPreprocessor
|
17 |
from config.config_provider import ConfigProvider
|
18 |
+
from chat_session.event_bus import Event, EventType
|
19 |
|
20 |
router = APIRouter(tags=["audio"])
|
21 |
|
|
|
44 |
# Check if we should use event-driven mode
|
45 |
if request.session_id and hasattr(req.app.state, 'event_bus'):
|
46 |
# Event-driven mode for realtime sessions
|
47 |
+
from chat_session.event_bus import Event, EventType
|
48 |
+
|
49 |
log_info(f"π€ TTS request via event bus for session: {request.session_id}")
|
50 |
+
|
51 |
# Publish TTS event
|
52 |
await req.app.state.event_bus.publish(Event(
|
53 |
type=EventType.TTS_STARTED,
|
|
|
59 |
"is_api_call": True # Flag to indicate this is from REST API
|
60 |
}
|
61 |
))
|
62 |
+
|
63 |
# Return a response indicating audio will be streamed via WebSocket
|
64 |
return {
|
65 |
"status": "processing",
|
66 |
"message": "TTS audio will be streamed via WebSocket connection",
|
67 |
"session_id": request.session_id
|
68 |
}
|
69 |
+
|
70 |
else:
|
71 |
# Direct TTS generation (legacy mode)
|
72 |
tts_provider = TTSFactory.create_provider()
|
|
|
180 |
# Check if we should use event-driven mode
|
181 |
if request.session_id and hasattr(req.app.state, 'event_bus'):
|
182 |
# Event-driven mode for realtime sessions
|
|
|
|
|
183 |
log_info(f"π§ STT request via event bus for session: {request.session_id}")
|
184 |
+
|
185 |
# Publish audio chunk event
|
186 |
await req.app.state.event_bus.publish(Event(
|
187 |
type=EventType.AUDIO_CHUNK_RECEIVED,
|
|
|
193 |
"is_api_call": True
|
194 |
}
|
195 |
))
|
196 |
+
|
197 |
# Return a response indicating transcription will be available via WebSocket
|
198 |
return {
|
199 |
"status": "processing",
|
200 |
"message": "Transcription will be available via WebSocket connection",
|
201 |
"session_id": request.session_id
|
202 |
}
|
203 |
+
|
204 |
else:
|
205 |
# Direct STT transcription (legacy mode)
|
206 |
from stt.stt_factory import STTFactory
|
|
|
326 |
This is a dedicated audio stream separate from the main conversation WebSocket
|
327 |
"""
|
328 |
from fastapi import WebSocketDisconnect
|
329 |
+
|
330 |
try:
|
331 |
await websocket.accept()
|
332 |
log_info(f"π΅ Audio WebSocket connected for session: {session_id}")
|
333 |
+
|
334 |
if not hasattr(request.app.state, 'event_bus'):
|
335 |
await websocket.send_json({
|
336 |
"type": "error",
|
|
|
338 |
})
|
339 |
await websocket.close()
|
340 |
return
|
341 |
+
|
342 |
while True:
|
343 |
try:
|
344 |
# Receive audio data
|
345 |
data = await websocket.receive_json()
|
346 |
+
|
347 |
if data.get("type") == "audio_chunk":
|
348 |
# Forward to event bus
|
|
|
|
|
349 |
await request.app.state.event_bus.publish(Event(
|
350 |
type=EventType.AUDIO_CHUNK_RECEIVED,
|
351 |
session_id=session_id,
|
|
|
355 |
"chunk_index": data.get("chunk_index", 0)
|
356 |
}
|
357 |
))
|
358 |
+
|
359 |
elif data.get("type") == "control":
|
360 |
action = data.get("action")
|
361 |
+
|
362 |
if action == "start_recording":
|
|
|
|
|
363 |
await request.app.state.event_bus.publish(Event(
|
364 |
type=EventType.STT_STARTED,
|
365 |
session_id=session_id,
|
|
|
368 |
"format": data.get("format", "webm")
|
369 |
}
|
370 |
))
|
371 |
+
|
372 |
elif action == "stop_recording":
|
|
|
|
|
373 |
await request.app.state.event_bus.publish(Event(
|
374 |
type=EventType.STT_STOPPED,
|
375 |
session_id=session_id,
|
376 |
data={"reason": "user_request"}
|
377 |
))
|
378 |
+
|
379 |
except WebSocketDisconnect:
|
380 |
break
|
381 |
except Exception as e:
|
|
|
384 |
"type": "error",
|
385 |
"message": str(e)
|
386 |
})
|
387 |
+
|
388 |
except Exception as e:
|
389 |
log_error(f"Audio WebSocket error", error=str(e))
|
390 |
finally:
|