Spaces:
Building
Building
Update websocket_handler.py
Browse files- websocket_handler.py +66 -12
websocket_handler.py
CHANGED
@@ -226,6 +226,30 @@ class RealtimeSession:
|
|
226 |
self.stt_manager = None
|
227 |
self.is_streaming = False
|
228 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
230 |
async def change_state(self, new_state: ConversationState):
|
231 |
"""Change conversation state"""
|
@@ -564,16 +588,19 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
564 |
})
|
565 |
|
566 |
except Exception as e:
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
"
|
575 |
-
|
576 |
-
|
|
|
|
|
|
|
577 |
|
578 |
async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
579 |
"""Handle control messages"""
|
@@ -620,18 +647,30 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
|
|
620 |
elif action == "audio_ended":
|
621 |
# Audio playback ended on client
|
622 |
if session.state == ConversationState.PLAYING_AUDIO:
|
623 |
-
await session.change_state(ConversationState.
|
624 |
await websocket.send_json({
|
625 |
"type": "state_change",
|
626 |
"from": "playing_audio",
|
627 |
-
"to": "
|
628 |
})
|
|
|
|
|
629 |
|
630 |
|
631 |
# ========================= PROCESSING FUNCTIONS =========================
|
632 |
async def process_user_input(websocket: WebSocket, session: RealtimeSession):
|
633 |
"""Process complete user input"""
|
634 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
# WebSocket aktif mi kontrol et
|
636 |
if not session.is_websocket_active:
|
637 |
return
|
@@ -745,6 +784,16 @@ async def generate_and_stream_tts(
|
|
745 |
):
|
746 |
"""Generate and stream TTS audio with cancellation support"""
|
747 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
748 |
log_info(f"🎤 Starting TTS generation for text: '{text[:50]}...'", session_id=session.session.session_id)
|
749 |
|
750 |
# TTS preprocessor kullan
|
@@ -828,6 +877,11 @@ async def generate_and_stream_tts(
|
|
828 |
audio_size=len(audio_data),
|
829 |
chunks_sent=total_chunks
|
830 |
)
|
|
|
|
|
|
|
|
|
|
|
831 |
|
832 |
except asyncio.CancelledError:
|
833 |
log_info("🛑 TTS streaming cancelled", session_id=session.session.session_id)
|
|
|
226 |
self.stt_manager = None
|
227 |
self.is_streaming = False
|
228 |
return False
|
229 |
+
|
230 |
+
async def restart_stt_if_needed(self):
|
231 |
+
"""Restart STT if it's not active - sadece gerektiğinde"""
|
232 |
+
try:
|
233 |
+
if not self.is_streaming and self.is_websocket_active and self.state == ConversationState.LISTENING:
|
234 |
+
log_info(f"🔄 Restarting STT stream after timeout...", session_id=self.session.session_id)
|
235 |
+
|
236 |
+
# Mevcut STT manager'ı kullan
|
237 |
+
if self.stt_manager:
|
238 |
+
# Yeniden başlat
|
239 |
+
stt_initialized = await self.initialize_stt()
|
240 |
+
if stt_initialized:
|
241 |
+
log_info(f"✅ STT stream restarted successfully", session_id=self.session.session_id)
|
242 |
+
# Reset chunk counter
|
243 |
+
if hasattr(self, 'chunk_counter'):
|
244 |
+
self.chunk_counter = 0
|
245 |
+
return True
|
246 |
+
else:
|
247 |
+
log_error(f"❌ Failed to restart STT stream", session_id=self.session.session_id)
|
248 |
+
return False
|
249 |
+
return True
|
250 |
+
except Exception as e:
|
251 |
+
log_error(f"❌ Error restarting STT", error=str(e), session_id=self.session.session_id)
|
252 |
+
return False
|
253 |
|
254 |
async def change_state(self, new_state: ConversationState):
|
255 |
"""Change conversation state"""
|
|
|
588 |
})
|
589 |
|
590 |
except Exception as e:
|
591 |
+
error_msg = str(e)
|
592 |
+
# Google STT timeout hatası kontrolü
|
593 |
+
if "Audio Timeout Error" in error_msg or "stream duration" in error_msg:
|
594 |
+
log_warning(f"⚠️ STT timeout detected, marking stream as inactive", session_id=session.session.session_id)
|
595 |
+
session.is_streaming = False
|
596 |
+
# Timeout durumunda frontend'e hata gönderme, sessizce handle et
|
597 |
+
else:
|
598 |
+
log_error(f"❌ STT streaming error", error=error_msg, traceback=traceback.format_exc(), session_id=session.session.session_id)
|
599 |
+
await websocket.send_json({
|
600 |
+
"type": "error",
|
601 |
+
"error_type": "stt_error",
|
602 |
+
"message": f"STT error: {str(e)}"
|
603 |
+
})
|
604 |
|
605 |
async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
|
606 |
"""Handle control messages"""
|
|
|
647 |
elif action == "audio_ended":
|
648 |
# Audio playback ended on client
|
649 |
if session.state == ConversationState.PLAYING_AUDIO:
|
650 |
+
await session.change_state(ConversationState.LISTENING)
|
651 |
await websocket.send_json({
|
652 |
"type": "state_change",
|
653 |
"from": "playing_audio",
|
654 |
+
"to": "listening"
|
655 |
})
|
656 |
+
# STT'yi yeniden başlat
|
657 |
+
await session.restart_stt_if_needed()
|
658 |
|
659 |
|
660 |
# ========================= PROCESSING FUNCTIONS =========================
|
661 |
async def process_user_input(websocket: WebSocket, session: RealtimeSession):
|
662 |
"""Process complete user input"""
|
663 |
try:
|
664 |
+
# LLM işlemesi sırasında STT'yi durdur
|
665 |
+
if session.stt_manager and session.is_streaming:
|
666 |
+
log_info(f"⏸️ Pausing STT during LLM processing", session_id=session.session.session_id)
|
667 |
+
try:
|
668 |
+
await session.stt_manager.stop_streaming()
|
669 |
+
session.is_streaming = False
|
670 |
+
except Exception as e:
|
671 |
+
log_warning(f"⚠️ Error stopping STT: {e}", session_id=session.session.session_id)
|
672 |
+
session.is_streaming = False
|
673 |
+
|
674 |
# WebSocket aktif mi kontrol et
|
675 |
if not session.is_websocket_active:
|
676 |
return
|
|
|
784 |
):
|
785 |
"""Generate and stream TTS audio with cancellation support"""
|
786 |
try:
|
787 |
+
# TTS başlamadan önce STT'yi durdur - timeout'u önle
|
788 |
+
if session.stt_manager and session.is_streaming:
|
789 |
+
log_info(f"⏸️ Pausing STT stream during TTS", session_id=session.session.session_id)
|
790 |
+
try:
|
791 |
+
await session.stt_manager.stop_streaming()
|
792 |
+
session.is_streaming = False
|
793 |
+
except Exception as e:
|
794 |
+
log_warning(f"⚠️ Error stopping STT before TTS: {e}", session_id=session.session.session_id)
|
795 |
+
session.is_streaming = False
|
796 |
+
|
797 |
log_info(f"🎤 Starting TTS generation for text: '{text[:50]}...'", session_id=session.session.session_id)
|
798 |
|
799 |
# TTS preprocessor kullan
|
|
|
877 |
audio_size=len(audio_data),
|
878 |
chunks_sent=total_chunks
|
879 |
)
|
880 |
+
|
881 |
+
# TTS bitiminde STT'yi yeniden başlat
|
882 |
+
if session.state == ConversationState.LISTENING:
|
883 |
+
log_info(f"🔄 Restarting STT after TTS completion", session_id=session.session.session_id)
|
884 |
+
await session.restart_stt_if_needed()
|
885 |
|
886 |
except asyncio.CancelledError:
|
887 |
log_info("🛑 TTS streaming cancelled", session_id=session.session.session_id)
|