ciyidogan commited on
Commit
8297b29
·
verified ·
1 Parent(s): ba22449

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +66 -12
websocket_handler.py CHANGED
@@ -226,6 +226,30 @@ class RealtimeSession:
226
  self.stt_manager = None
227
  self.is_streaming = False
228
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  async def change_state(self, new_state: ConversationState):
231
  """Change conversation state"""
@@ -564,16 +588,19 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
564
  })
565
 
566
  except Exception as e:
567
- log_error(
568
- f"❌ Audio chunk handling error",
569
- error=str(e),
570
- traceback=traceback.format_exc(),
571
- session_id=session.session.session_id
572
- )
573
- await websocket.send_json({
574
- "type": "error",
575
- "message": f"Audio processing error: {str(e)}"
576
- })
 
 
 
577
 
578
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
579
  """Handle control messages"""
@@ -620,18 +647,30 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
620
  elif action == "audio_ended":
621
  # Audio playback ended on client
622
  if session.state == ConversationState.PLAYING_AUDIO:
623
- await session.change_state(ConversationState.IDLE)
624
  await websocket.send_json({
625
  "type": "state_change",
626
  "from": "playing_audio",
627
- "to": "idle"
628
  })
 
 
629
 
630
 
631
  # ========================= PROCESSING FUNCTIONS =========================
632
  async def process_user_input(websocket: WebSocket, session: RealtimeSession):
633
  """Process complete user input"""
634
  try:
 
 
 
 
 
 
 
 
 
 
635
  # WebSocket aktif mi kontrol et
636
  if not session.is_websocket_active:
637
  return
@@ -745,6 +784,16 @@ async def generate_and_stream_tts(
745
  ):
746
  """Generate and stream TTS audio with cancellation support"""
747
  try:
 
 
 
 
 
 
 
 
 
 
748
  log_info(f"🎤 Starting TTS generation for text: '{text[:50]}...'", session_id=session.session.session_id)
749
 
750
  # TTS preprocessor kullan
@@ -828,6 +877,11 @@ async def generate_and_stream_tts(
828
  audio_size=len(audio_data),
829
  chunks_sent=total_chunks
830
  )
 
 
 
 
 
831
 
832
  except asyncio.CancelledError:
833
  log_info("🛑 TTS streaming cancelled", session_id=session.session.session_id)
 
226
  self.stt_manager = None
227
  self.is_streaming = False
228
  return False
229
+
230
+ async def restart_stt_if_needed(self):
231
+ """Restart STT if it's not active - sadece gerektiğinde"""
232
+ try:
233
+ if not self.is_streaming and self.is_websocket_active and self.state == ConversationState.LISTENING:
234
+ log_info(f"🔄 Restarting STT stream after timeout...", session_id=self.session.session_id)
235
+
236
+ # Mevcut STT manager'ı kullan
237
+ if self.stt_manager:
238
+ # Yeniden başlat
239
+ stt_initialized = await self.initialize_stt()
240
+ if stt_initialized:
241
+ log_info(f"✅ STT stream restarted successfully", session_id=self.session.session_id)
242
+ # Reset chunk counter
243
+ if hasattr(self, 'chunk_counter'):
244
+ self.chunk_counter = 0
245
+ return True
246
+ else:
247
+ log_error(f"❌ Failed to restart STT stream", session_id=self.session.session_id)
248
+ return False
249
+ return True
250
+ except Exception as e:
251
+ log_error(f"❌ Error restarting STT", error=str(e), session_id=self.session.session_id)
252
+ return False
253
 
254
  async def change_state(self, new_state: ConversationState):
255
  """Change conversation state"""
 
588
  })
589
 
590
  except Exception as e:
591
+ error_msg = str(e)
592
+ # Google STT timeout hatası kontrolü
593
+ if "Audio Timeout Error" in error_msg or "stream duration" in error_msg:
594
+ log_warning(f"⚠️ STT timeout detected, marking stream as inactive", session_id=session.session.session_id)
595
+ session.is_streaming = False
596
+ # Timeout durumunda frontend'e hata gönderme, sessizce handle et
597
+ else:
598
+ log_error(f" STT streaming error", error=error_msg, traceback=traceback.format_exc(), session_id=session.session.session_id)
599
+ await websocket.send_json({
600
+ "type": "error",
601
+ "error_type": "stt_error",
602
+ "message": f"STT error: {str(e)}"
603
+ })
604
 
605
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
606
  """Handle control messages"""
 
647
  elif action == "audio_ended":
648
  # Audio playback ended on client
649
  if session.state == ConversationState.PLAYING_AUDIO:
650
+ await session.change_state(ConversationState.LISTENING)
651
  await websocket.send_json({
652
  "type": "state_change",
653
  "from": "playing_audio",
654
+ "to": "listening"
655
  })
656
+ # STT'yi yeniden başlat
657
+ await session.restart_stt_if_needed()
658
 
659
 
660
  # ========================= PROCESSING FUNCTIONS =========================
661
  async def process_user_input(websocket: WebSocket, session: RealtimeSession):
662
  """Process complete user input"""
663
  try:
664
+ # LLM işlemesi sırasında STT'yi durdur
665
+ if session.stt_manager and session.is_streaming:
666
+ log_info(f"⏸️ Pausing STT during LLM processing", session_id=session.session.session_id)
667
+ try:
668
+ await session.stt_manager.stop_streaming()
669
+ session.is_streaming = False
670
+ except Exception as e:
671
+ log_warning(f"⚠️ Error stopping STT: {e}", session_id=session.session.session_id)
672
+ session.is_streaming = False
673
+
674
  # WebSocket aktif mi kontrol et
675
  if not session.is_websocket_active:
676
  return
 
784
  ):
785
  """Generate and stream TTS audio with cancellation support"""
786
  try:
787
+ # TTS başlamadan önce STT'yi durdur - timeout'u önle
788
+ if session.stt_manager and session.is_streaming:
789
+ log_info(f"⏸️ Pausing STT stream during TTS", session_id=session.session.session_id)
790
+ try:
791
+ await session.stt_manager.stop_streaming()
792
+ session.is_streaming = False
793
+ except Exception as e:
794
+ log_warning(f"⚠️ Error stopping STT before TTS: {e}", session_id=session.session.session_id)
795
+ session.is_streaming = False
796
+
797
  log_info(f"🎤 Starting TTS generation for text: '{text[:50]}...'", session_id=session.session.session_id)
798
 
799
  # TTS preprocessor kullan
 
877
  audio_size=len(audio_data),
878
  chunks_sent=total_chunks
879
  )
880
+
881
+ # TTS bitiminde STT'yi yeniden başlat
882
+ if session.state == ConversationState.LISTENING:
883
+ log_info(f"🔄 Restarting STT after TTS completion", session_id=session.session.session_id)
884
+ await session.restart_stt_if_needed()
885
 
886
  except asyncio.CancelledError:
887
  log_info("🛑 TTS streaming cancelled", session_id=session.session.session_id)