Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 19 days ago

Commit

fdd3bc7

verified ·

1 Parent(s): 565adbd

Update websocket_handler.py

Browse files

Files changed (1) hide show

websocket_handler.py +115 -115

websocket_handler.py CHANGED Viewed

@@ -369,7 +369,7 @@ class RealtimeSession:
         log_info(f"✅ Reset for new utterance complete", session_id=self.session.session_id)
-# ========================= Backend-Frontend Messaging =========================
 async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
     """Handle control messages"""
     action = message.get("action")
@@ -456,6 +456,120 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
         await session.stop_stt_streaming()
         await session.restart_stt_if_needed()
 # ========================= MAIN HANDLER =========================
 async def websocket_endpoint(websocket: WebSocket, session_id: str):
     """Main WebSocket endpoint for real-time conversation"""
@@ -674,120 +788,6 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
         except Exception as e:
             log_debug(f"WebSocket already closed or error during close: {e}", session_id=session_id)
-# ========================= MESSAGE HANDLERS =========================
-async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
-    """Handle incoming audio chunk with sequential processing"""
-    try:
-        # WebSocket kapandıysa işlem yapma
-        if not session.is_websocket_active:
-            return
-        audio_data = message.get("data")
-        if not audio_data:
-            log_warning(f"⚠️ Empty audio chunk received", session_id=session.session.session_id)
-            return
-        # TTS/LLM işlenirken audio chunk'ları tamamen yoksay
-        if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS,
-                            ConversationState.PROCESSING_LLM, ConversationState.PROCESSING_STT]:
-            log_debug(f"🔇 Ignoring audio chunk during state: {session.state.value}", session_id=session.session.session_id)
-            return
-        # LISTENING state'inde değilse audio işleme
-        if session.state != ConversationState.LISTENING:
-            log_warning(f"⚠️ Audio received in unexpected state: {session.state.value}", session_id=session.session.session_id)
-            return
-        # STT yoksa veya streaming değilse hata döndür
-        if not session.stt_manager or not session.is_streaming:
-            log_warning(f"⚠️ STT not ready, attempting to restart", session_id=session.session.session_id)
-            await websocket.send_json({
-                "type": "error",
-                "error_type": "stt_not_ready",
-                "message": "STT is not ready. Waiting for initialization..."
-            })
-            return
-        # Add to buffer
-        await session.audio_buffer.add_chunk(audio_data)
-        # Decode for processing
-        decoded_audio = base64.b64decode(audio_data)
-        # Check silence
-        silence_duration = session.silence_detector.update(decoded_audio)
-        # Stream to STT
-        try:
-            # Chunk counter artır
-            session.chunk_counter += 1
-            if session.chunk_counter == 1:
-                log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
-                # İlk chunk'ta format kontrolü yap
-                if len(decoded_audio) >= 4:
-                    if decoded_audio[:4] == b'\x1a\x45\xdf\xa3':
-                        log_info(f"✅ Valid WEBM header detected", session_id=session.session.session_id)
-                    else:
-                        log_warning(f"⚠️ Unknown audio format, first 4 bytes: {decoded_audio[:4].hex()}", session_id=session.session.session_id)
-            elif session.chunk_counter % 100 == 0:
-                log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
-            # STT'ye gönder ve sonuçları bekle
-            async for result in session.stt_manager.stream_audio(decoded_audio):
-                # SADECE FINAL RESULT'LARI İŞLE
-                if result.is_final:
-                    log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
-                    # Send ONLY final transcription to frontend
-                    await websocket.send_json({
-                        "type": "transcription",
-                        "text": result.text,
-                        "is_final": True,
-                        "confidence": result.confidence
-                    })
-                    session.current_transcription = result.text
-                    # Final transcription geldiğinde STT'yi durdur ve işle
-                    if session.current_transcription:
-                        # Önce STT'yi durdur
-                        await session.stop_stt_streaming()
-                        # State'i değiştir
-                        await session.change_state(ConversationState.PROCESSING_STT)
-                        await websocket.send_json({
-                            "type": "state_change",
-                            "from": "listening",
-                            "to": "processing_stt"
-                        })
-                        # Process user input
-                        await process_user_input(websocket, session)
-                        return
-        except Exception as e:
-            error_msg = str(e)
-            # Google STT timeout hatası kontrolü
-            if "Audio Timeout Error" in error_msg or "stream duration" in error_msg or "Exceeded maximum allowed stream duration" in error_msg:
-                log_warning(f"⚠️ STT timeout detected, ignoring", session_id=session.session.session_id)
-                # Timeout durumunda STT'yi yeniden başlatmaya gerek yok,
-                # çünkü kullanıcı konuşmayı bitirdiğinde zaten yeniden başlatılacak
-            else:
-                log_error(f"❌ STT streaming error", error=error_msg, traceback=traceback.format_exc(), session_id=session.session.session_id)
-                await websocket.send_json({
-                    "type": "error",
-                    "error_type": "stt_error",
-                    "message": f"STT error: {str(e)}"
-                })
-    except Exception as e:
-        log_error(f"❌ Error in handle_audio_chunk", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
-        await websocket.send_json({
-            "type": "error",
-            "error_type": "audio_error",
-            "message": f"Audio processing error: {str(e)}"
-        })
 # ========================= PROCESSING FUNCTIONS =========================
 async def process_user_input(websocket: WebSocket, session: RealtimeSession):

         log_info(f"✅ Reset for new utterance complete", session_id=self.session.session_id)
+# ========================= MESSAGE HANDLERS =========================
 async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
     """Handle control messages"""
     action = message.get("action")
         await session.stop_stt_streaming()
         await session.restart_stt_if_needed()
+async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
+    """Handle incoming audio chunk with sequential processing"""
+    try:
+        # WebSocket kapandıysa işlem yapma
+        if not session.is_websocket_active:
+            return
+        audio_data = message.get("data")
+        if not audio_data:
+            log_warning(f"⚠️ Empty audio chunk received", session_id=session.session.session_id)
+            return
+        # TTS/LLM işlenirken audio chunk'ları tamamen yoksay
+        if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS,
+                            ConversationState.PROCESSING_LLM, ConversationState.PROCESSING_STT]:
+            log_debug(f"🔇 Ignoring audio chunk during state: {session.state.value}", session_id=session.session.session_id)
+            return
+        # LISTENING state'inde değilse audio işleme
+        if session.state != ConversationState.LISTENING:
+            log_warning(f"⚠️ Audio received in unexpected state: {session.state.value}", session_id=session.session.session_id)
+            return
+        # STT yoksa veya streaming değilse hata döndür
+        if not session.stt_manager or not session.is_streaming:
+            log_warning(f"⚠️ STT not ready, attempting to restart", session_id=session.session.session_id)
+            await websocket.send_json({
+                "type": "error",
+                "error_type": "stt_not_ready",
+                "message": "STT is not ready. Waiting for initialization..."
+            })
+            return
+        # Add to buffer
+        await session.audio_buffer.add_chunk(audio_data)
+        # Decode for processing
+        decoded_audio = base64.b64decode(audio_data)
+        # Check silence
+        silence_duration = session.silence_detector.update(decoded_audio)
+        # Stream to STT
+        try:
+            # Chunk counter artır
+            session.chunk_counter += 1
+            if session.chunk_counter == 1:
+                log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
+                # İlk chunk'ta format kontrolü yap
+                if len(decoded_audio) >= 4:
+                    if decoded_audio[:4] == b'\x1a\x45\xdf\xa3':
+                        log_info(f"✅ Valid WEBM header detected", session_id=session.session.session_id)
+                    else:
+                        log_warning(f"⚠️ Unknown audio format, first 4 bytes: {decoded_audio[:4].hex()}", session_id=session.session.session_id)
+            elif session.chunk_counter % 100 == 0:
+                log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
+            # STT'ye gönder ve sonuçları bekle
+            async for result in session.stt_manager.stream_audio(decoded_audio):
+                # SADECE FINAL RESULT'LARI İŞLE
+                if result.is_final:
+                    log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
+                    # Send ONLY final transcription to frontend
+                    await websocket.send_json({
+                        "type": "transcription",
+                        "text": result.text,
+                        "is_final": True,
+                        "confidence": result.confidence
+                    })
+                    session.current_transcription = result.text
+                    # Final transcription geldiğinde STT'yi durdur ve işle
+                    if session.current_transcription:
+                        # Önce STT'yi durdur
+                        await session.stop_stt_streaming()
+                        # State'i değiştir
+                        await session.change_state(ConversationState.PROCESSING_STT)
+                        await websocket.send_json({
+                            "type": "state_change",
+                            "from": "listening",
+                            "to": "processing_stt"
+                        })
+                        # Process user input
+                        await process_user_input(websocket, session)
+                        return
+        except Exception as e:
+            error_msg = str(e)
+            # Google STT timeout hatası kontrolü
+            if "Audio Timeout Error" in error_msg or "stream duration" in error_msg or "Exceeded maximum allowed stream duration" in error_msg:
+                log_warning(f"⚠️ STT timeout detected, ignoring", session_id=session.session.session_id)
+                # Timeout durumunda STT'yi yeniden başlatmaya gerek yok,
+                # çünkü kullanıcı konuşmayı bitirdiğinde zaten yeniden başlatılacak
+            else:
+                log_error(f"❌ STT streaming error", error=error_msg, traceback=traceback.format_exc(), session_id=session.session.session_id)
+                await websocket.send_json({
+                    "type": "error",
+                    "error_type": "stt_error",
+                    "message": f"STT error: {str(e)}"
+                })
+    except Exception as e:
+        log_error(f"❌ Error in handle_audio_chunk", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
+        await websocket.send_json({
+            "type": "error",
+            "error_type": "audio_error",
+            "message": f"Audio processing error: {str(e)}"
+        })
 # ========================= MAIN HANDLER =========================
 async def websocket_endpoint(websocket: WebSocket, session_id: str):
     """Main WebSocket endpoint for real-time conversation"""
         except Exception as e:
             log_debug(f"WebSocket already closed or error during close: {e}", session_id=session_id)
 # ========================= PROCESSING FUNCTIONS =========================
 async def process_user_input(websocket: WebSocket, session: RealtimeSession):