ciyidogan commited on
Commit
fdd3bc7
·
verified ·
1 Parent(s): 565adbd

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +115 -115
websocket_handler.py CHANGED
@@ -369,7 +369,7 @@ class RealtimeSession:
369
  log_info(f"✅ Reset for new utterance complete", session_id=self.session.session_id)
370
 
371
 
372
- # ========================= Backend-Frontend Messaging =========================
373
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
374
  """Handle control messages"""
375
  action = message.get("action")
@@ -456,6 +456,120 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
456
  await session.stop_stt_streaming()
457
  await session.restart_stt_if_needed()
458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  # ========================= MAIN HANDLER =========================
460
  async def websocket_endpoint(websocket: WebSocket, session_id: str):
461
  """Main WebSocket endpoint for real-time conversation"""
@@ -674,120 +788,6 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
674
  except Exception as e:
675
  log_debug(f"WebSocket already closed or error during close: {e}", session_id=session_id)
676
 
677
- # ========================= MESSAGE HANDLERS =========================
678
- async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
679
- """Handle incoming audio chunk with sequential processing"""
680
- try:
681
- # WebSocket kapandıysa işlem yapma
682
- if not session.is_websocket_active:
683
- return
684
-
685
- audio_data = message.get("data")
686
- if not audio_data:
687
- log_warning(f"⚠️ Empty audio chunk received", session_id=session.session.session_id)
688
- return
689
-
690
- # TTS/LLM işlenirken audio chunk'ları tamamen yoksay
691
- if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS,
692
- ConversationState.PROCESSING_LLM, ConversationState.PROCESSING_STT]:
693
- log_debug(f"🔇 Ignoring audio chunk during state: {session.state.value}", session_id=session.session.session_id)
694
- return
695
-
696
- # LISTENING state'inde değilse audio işleme
697
- if session.state != ConversationState.LISTENING:
698
- log_warning(f"⚠️ Audio received in unexpected state: {session.state.value}", session_id=session.session.session_id)
699
- return
700
-
701
- # STT yoksa veya streaming değilse hata döndür
702
- if not session.stt_manager or not session.is_streaming:
703
- log_warning(f"⚠️ STT not ready, attempting to restart", session_id=session.session.session_id)
704
- await websocket.send_json({
705
- "type": "error",
706
- "error_type": "stt_not_ready",
707
- "message": "STT is not ready. Waiting for initialization..."
708
- })
709
- return
710
-
711
- # Add to buffer
712
- await session.audio_buffer.add_chunk(audio_data)
713
-
714
- # Decode for processing
715
- decoded_audio = base64.b64decode(audio_data)
716
-
717
- # Check silence
718
- silence_duration = session.silence_detector.update(decoded_audio)
719
-
720
- # Stream to STT
721
- try:
722
- # Chunk counter artır
723
- session.chunk_counter += 1
724
-
725
- if session.chunk_counter == 1:
726
- log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
727
- # İlk chunk'ta format kontrolü yap
728
- if len(decoded_audio) >= 4:
729
- if decoded_audio[:4] == b'\x1a\x45\xdf\xa3':
730
- log_info(f"✅ Valid WEBM header detected", session_id=session.session.session_id)
731
- else:
732
- log_warning(f"⚠️ Unknown audio format, first 4 bytes: {decoded_audio[:4].hex()}", session_id=session.session.session_id)
733
- elif session.chunk_counter % 100 == 0:
734
- log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
735
-
736
- # STT'ye gönder ve sonuçları bekle
737
- async for result in session.stt_manager.stream_audio(decoded_audio):
738
- # SADECE FINAL RESULT'LARI İŞLE
739
- if result.is_final:
740
- log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
741
-
742
- # Send ONLY final transcription to frontend
743
- await websocket.send_json({
744
- "type": "transcription",
745
- "text": result.text,
746
- "is_final": True,
747
- "confidence": result.confidence
748
- })
749
-
750
- session.current_transcription = result.text
751
-
752
- # Final transcription geldiğinde STT'yi durdur ve işle
753
- if session.current_transcription:
754
- # Önce STT'yi durdur
755
- await session.stop_stt_streaming()
756
-
757
- # State'i değiştir
758
- await session.change_state(ConversationState.PROCESSING_STT)
759
- await websocket.send_json({
760
- "type": "state_change",
761
- "from": "listening",
762
- "to": "processing_stt"
763
- })
764
-
765
- # Process user input
766
- await process_user_input(websocket, session)
767
- return
768
-
769
- except Exception as e:
770
- error_msg = str(e)
771
- # Google STT timeout hatası kontrolü
772
- if "Audio Timeout Error" in error_msg or "stream duration" in error_msg or "Exceeded maximum allowed stream duration" in error_msg:
773
- log_warning(f"⚠️ STT timeout detected, ignoring", session_id=session.session.session_id)
774
- # Timeout durumunda STT'yi yeniden başlatmaya gerek yok,
775
- # çünkü kullanıcı konuşmayı bitirdiğinde zaten yeniden başlatılacak
776
- else:
777
- log_error(f"❌ STT streaming error", error=error_msg, traceback=traceback.format_exc(), session_id=session.session.session_id)
778
- await websocket.send_json({
779
- "type": "error",
780
- "error_type": "stt_error",
781
- "message": f"STT error: {str(e)}"
782
- })
783
-
784
- except Exception as e:
785
- log_error(f"❌ Error in handle_audio_chunk", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
786
- await websocket.send_json({
787
- "type": "error",
788
- "error_type": "audio_error",
789
- "message": f"Audio processing error: {str(e)}"
790
- })
791
 
792
  # ========================= PROCESSING FUNCTIONS =========================
793
  async def process_user_input(websocket: WebSocket, session: RealtimeSession):
 
369
  log_info(f"✅ Reset for new utterance complete", session_id=self.session.session_id)
370
 
371
 
372
+ # ========================= MESSAGE HANDLERS =========================
373
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
374
  """Handle control messages"""
375
  action = message.get("action")
 
456
  await session.stop_stt_streaming()
457
  await session.restart_stt_if_needed()
458
 
459
+ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
460
+ """Handle incoming audio chunk with sequential processing"""
461
+ try:
462
+ # WebSocket kapandıysa işlem yapma
463
+ if not session.is_websocket_active:
464
+ return
465
+
466
+ audio_data = message.get("data")
467
+ if not audio_data:
468
+ log_warning(f"⚠️ Empty audio chunk received", session_id=session.session.session_id)
469
+ return
470
+
471
+ # TTS/LLM işlenirken audio chunk'ları tamamen yoksay
472
+ if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS,
473
+ ConversationState.PROCESSING_LLM, ConversationState.PROCESSING_STT]:
474
+ log_debug(f"🔇 Ignoring audio chunk during state: {session.state.value}", session_id=session.session.session_id)
475
+ return
476
+
477
+ # LISTENING state'inde değilse audio işleme
478
+ if session.state != ConversationState.LISTENING:
479
+ log_warning(f"⚠️ Audio received in unexpected state: {session.state.value}", session_id=session.session.session_id)
480
+ return
481
+
482
+ # STT yoksa veya streaming değilse hata döndür
483
+ if not session.stt_manager or not session.is_streaming:
484
+ log_warning(f"⚠️ STT not ready, attempting to restart", session_id=session.session.session_id)
485
+ await websocket.send_json({
486
+ "type": "error",
487
+ "error_type": "stt_not_ready",
488
+ "message": "STT is not ready. Waiting for initialization..."
489
+ })
490
+ return
491
+
492
+ # Add to buffer
493
+ await session.audio_buffer.add_chunk(audio_data)
494
+
495
+ # Decode for processing
496
+ decoded_audio = base64.b64decode(audio_data)
497
+
498
+ # Check silence
499
+ silence_duration = session.silence_detector.update(decoded_audio)
500
+
501
+ # Stream to STT
502
+ try:
503
+ # Chunk counter artır
504
+ session.chunk_counter += 1
505
+
506
+ if session.chunk_counter == 1:
507
+ log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
508
+ # İlk chunk'ta format kontrolü yap
509
+ if len(decoded_audio) >= 4:
510
+ if decoded_audio[:4] == b'\x1a\x45\xdf\xa3':
511
+ log_info(f"✅ Valid WEBM header detected", session_id=session.session.session_id)
512
+ else:
513
+ log_warning(f"⚠️ Unknown audio format, first 4 bytes: {decoded_audio[:4].hex()}", session_id=session.session.session_id)
514
+ elif session.chunk_counter % 100 == 0:
515
+ log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
516
+
517
+ # STT'ye gönder ve sonuçları bekle
518
+ async for result in session.stt_manager.stream_audio(decoded_audio):
519
+ # SADECE FINAL RESULT'LARI İŞLE
520
+ if result.is_final:
521
+ log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
522
+
523
+ # Send ONLY final transcription to frontend
524
+ await websocket.send_json({
525
+ "type": "transcription",
526
+ "text": result.text,
527
+ "is_final": True,
528
+ "confidence": result.confidence
529
+ })
530
+
531
+ session.current_transcription = result.text
532
+
533
+ # Final transcription geldiğinde STT'yi durdur ve işle
534
+ if session.current_transcription:
535
+ # Önce STT'yi durdur
536
+ await session.stop_stt_streaming()
537
+
538
+ # State'i değiştir
539
+ await session.change_state(ConversationState.PROCESSING_STT)
540
+ await websocket.send_json({
541
+ "type": "state_change",
542
+ "from": "listening",
543
+ "to": "processing_stt"
544
+ })
545
+
546
+ # Process user input
547
+ await process_user_input(websocket, session)
548
+ return
549
+
550
+ except Exception as e:
551
+ error_msg = str(e)
552
+ # Google STT timeout hatası kontrolü
553
+ if "Audio Timeout Error" in error_msg or "stream duration" in error_msg or "Exceeded maximum allowed stream duration" in error_msg:
554
+ log_warning(f"⚠️ STT timeout detected, ignoring", session_id=session.session.session_id)
555
+ # Timeout durumunda STT'yi yeniden başlatmaya gerek yok,
556
+ # çünkü kullanıcı konuşmayı bitirdiğinde zaten yeniden başlatılacak
557
+ else:
558
+ log_error(f"❌ STT streaming error", error=error_msg, traceback=traceback.format_exc(), session_id=session.session.session_id)
559
+ await websocket.send_json({
560
+ "type": "error",
561
+ "error_type": "stt_error",
562
+ "message": f"STT error: {str(e)}"
563
+ })
564
+
565
+ except Exception as e:
566
+ log_error(f"❌ Error in handle_audio_chunk", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
567
+ await websocket.send_json({
568
+ "type": "error",
569
+ "error_type": "audio_error",
570
+ "message": f"Audio processing error: {str(e)}"
571
+ })
572
+
573
  # ========================= MAIN HANDLER =========================
574
  async def websocket_endpoint(websocket: WebSocket, session_id: str):
575
  """Main WebSocket endpoint for real-time conversation"""
 
788
  except Exception as e:
789
  log_debug(f"WebSocket already closed or error during close: {e}", session_id=session_id)
790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791
 
792
  # ========================= PROCESSING FUNCTIONS =========================
793
  async def process_user_input(websocket: WebSocket, session: RealtimeSession):