ciyidogan commited on
Commit
479a219
·
verified ·
1 Parent(s): 5998f62

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +72 -32
websocket_handler.py CHANGED
@@ -455,38 +455,38 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
455
  }
456
  })
457
 
458
- elif action == "end_session":
459
- # Clean up and close
460
- await session.cleanup()
461
- await websocket.close()
462
-
463
- elif action == "interrupt":
464
- # Handle explicit interrupt
465
- await session.handle_barge_in()
466
- await websocket.send_json({
467
- "type": "control",
468
- "action": "interrupt_acknowledged"
469
- })
470
-
471
- elif action == "reset":
472
- # Reset conversation state
473
- await session.reset_for_new_utterance()
474
- await session.change_state(ConversationState.IDLE)
475
- await websocket.send_json({
476
- "type": "state_change",
477
- "from": session.state.value,
478
- "to": "idle"
479
- })
480
-
481
- elif action == "audio_ended":
482
- # Audio playback ended on client
483
- if session.state == ConversationState.PLAYING_AUDIO:
484
- await session.change_state(ConversationState.IDLE)
485
- await websocket.send_json({
486
- "type": "state_change",
487
- "from": "playing_audio",
488
- "to": "idle"
489
- })
490
 
491
 
492
  # ========================= PROCESSING FUNCTIONS =========================
@@ -620,6 +620,46 @@ async def generate_and_stream_tts(
620
  chunk = audio_data[i:i + chunk_size]
621
  chunk_index = i // chunk_size
622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  await websocket.send_json({
624
  "type": "tts_audio",
625
  "data": base64.b64encode(chunk).decode('utf-8'),
 
455
  }
456
  })
457
 
458
+ # Send welcome message and TTS if available
459
+ chat_history = session.session.chat_history
460
+ if chat_history and len(chat_history) > 0:
461
+ last_message = chat_history[-1]
462
+ if last_message["role"] == "assistant":
463
+ welcome_text = last_message["content"]
464
+
465
+ # Send text message
466
+ await websocket.send_json({
467
+ "type": "assistant_response",
468
+ "text": welcome_text
469
+ })
470
+
471
+ # Generate TTS if enabled
472
+ tts_provider = TTSFactory.create_provider()
473
+ if tts_provider:
474
+ await session.change_state(ConversationState.PROCESSING_TTS)
475
+ await websocket.send_json({
476
+ "type": "state_change",
477
+ "from": "idle",
478
+ "to": "processing_tts"
479
+ })
480
+
481
+ # Generate and stream TTS
482
+ tts_task = session.barge_in_handler.start_tts_task(
483
+ generate_and_stream_tts(websocket, session, tts_provider, welcome_text)
484
+ )
485
+
486
+ try:
487
+ await tts_task
488
+ except asyncio.CancelledError:
489
+ log_info("Welcome TTS cancelled", session_id=session.session.session_id)
490
 
491
 
492
  # ========================= PROCESSING FUNCTIONS =========================
 
620
  chunk = audio_data[i:i + chunk_size]
621
  chunk_index = i // chunk_size
622
 
623
+ await websocket.send_json({
624
+ "type": "tts_audio",
625
+ "data": base64.b64encode(chunk).decode('utf-8'),
626
+ "chunk_index": chunk_index,
627
+ "total_chunks": total_chunks,
628
+ "is_last": chunk_index == total_chunks - 1,
629
+ "mime_type": "audio/mpeg" # MP3 format for ElevenLabs
630
+ })
631
+
632
+ # Small delay to prevent overwhelming the client
633
+ await asyncio.sleep(0.01)
634
+
635
+ # Send state back to idle after completion
636
+ await session.change_state(ConversationState.IDLE)
637
+ await websocket.send_json({
638
+ "type": "state_change",
639
+ "from": "playing_audio",
640
+ "to": "idle"
641
+ })
642
+
643
+ log_info(
644
+ f"TTS streaming completed",
645
+ session_id=session.session.session_id,
646
+ text_length=len(text),
647
+ audio_size=len(audio_data)
648
+ )
649
+
650
+ except asyncio.CancelledError:
651
+ log_info("TTS streaming cancelled", session_id=session.session.session_id)
652
+ raise
653
+ except Exception as e:
654
+ log_error(
655
+ f"TTS generation error",
656
+ error=str(e),
657
+ session_id=session.session.session_id
658
+ )
659
+ await websocket.send_json({
660
+ "type": "error",
661
+ "message": f"TTS error: {str(e)}"
662
+ })
663
  await websocket.send_json({
664
  "type": "tts_audio",
665
  "data": base64.b64encode(chunk).decode('utf-8'),