Spaces:
Building
Building
Update websocket_handler.py
Browse files- websocket_handler.py +72 -32
websocket_handler.py
CHANGED
@@ -455,38 +455,38 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
|
|
455 |
}
|
456 |
})
|
457 |
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
|
491 |
|
492 |
# ========================= PROCESSING FUNCTIONS =========================
|
@@ -620,6 +620,46 @@ async def generate_and_stream_tts(
|
|
620 |
chunk = audio_data[i:i + chunk_size]
|
621 |
chunk_index = i // chunk_size
|
622 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
623 |
await websocket.send_json({
|
624 |
"type": "tts_audio",
|
625 |
"data": base64.b64encode(chunk).decode('utf-8'),
|
|
|
455 |
}
|
456 |
})
|
457 |
|
458 |
+
# Send welcome message and TTS if available
|
459 |
+
chat_history = session.session.chat_history
|
460 |
+
if chat_history and len(chat_history) > 0:
|
461 |
+
last_message = chat_history[-1]
|
462 |
+
if last_message["role"] == "assistant":
|
463 |
+
welcome_text = last_message["content"]
|
464 |
+
|
465 |
+
# Send text message
|
466 |
+
await websocket.send_json({
|
467 |
+
"type": "assistant_response",
|
468 |
+
"text": welcome_text
|
469 |
+
})
|
470 |
+
|
471 |
+
# Generate TTS if enabled
|
472 |
+
tts_provider = TTSFactory.create_provider()
|
473 |
+
if tts_provider:
|
474 |
+
await session.change_state(ConversationState.PROCESSING_TTS)
|
475 |
+
await websocket.send_json({
|
476 |
+
"type": "state_change",
|
477 |
+
"from": "idle",
|
478 |
+
"to": "processing_tts"
|
479 |
+
})
|
480 |
+
|
481 |
+
# Generate and stream TTS
|
482 |
+
tts_task = session.barge_in_handler.start_tts_task(
|
483 |
+
generate_and_stream_tts(websocket, session, tts_provider, welcome_text)
|
484 |
+
)
|
485 |
+
|
486 |
+
try:
|
487 |
+
await tts_task
|
488 |
+
except asyncio.CancelledError:
|
489 |
+
log_info("Welcome TTS cancelled", session_id=session.session.session_id)
|
490 |
|
491 |
|
492 |
# ========================= PROCESSING FUNCTIONS =========================
|
|
|
620 |
chunk = audio_data[i:i + chunk_size]
|
621 |
chunk_index = i // chunk_size
|
622 |
|
623 |
+
await websocket.send_json({
|
624 |
+
"type": "tts_audio",
|
625 |
+
"data": base64.b64encode(chunk).decode('utf-8'),
|
626 |
+
"chunk_index": chunk_index,
|
627 |
+
"total_chunks": total_chunks,
|
628 |
+
"is_last": chunk_index == total_chunks - 1,
|
629 |
+
"mime_type": "audio/mpeg" # MP3 format for ElevenLabs
|
630 |
+
})
|
631 |
+
|
632 |
+
# Small delay to prevent overwhelming the client
|
633 |
+
await asyncio.sleep(0.01)
|
634 |
+
|
635 |
+
# Send state back to idle after completion
|
636 |
+
await session.change_state(ConversationState.IDLE)
|
637 |
+
await websocket.send_json({
|
638 |
+
"type": "state_change",
|
639 |
+
"from": "playing_audio",
|
640 |
+
"to": "idle"
|
641 |
+
})
|
642 |
+
|
643 |
+
log_info(
|
644 |
+
f"TTS streaming completed",
|
645 |
+
session_id=session.session.session_id,
|
646 |
+
text_length=len(text),
|
647 |
+
audio_size=len(audio_data)
|
648 |
+
)
|
649 |
+
|
650 |
+
except asyncio.CancelledError:
|
651 |
+
log_info("TTS streaming cancelled", session_id=session.session.session_id)
|
652 |
+
raise
|
653 |
+
except Exception as e:
|
654 |
+
log_error(
|
655 |
+
f"TTS generation error",
|
656 |
+
error=str(e),
|
657 |
+
session_id=session.session.session_id
|
658 |
+
)
|
659 |
+
await websocket.send_json({
|
660 |
+
"type": "error",
|
661 |
+
"message": f"TTS error: {str(e)}"
|
662 |
+
})
|
663 |
await websocket.send_json({
|
664 |
"type": "tts_audio",
|
665 |
"data": base64.b64encode(chunk).decode('utf-8'),
|