Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 16 days ago

Commit

c582b8f

verified ·

1 Parent(s): 4e2b388

Update websocket_handler.py

Browse files

Files changed (1) hide show

websocket_handler.py +75 -63

websocket_handler.py CHANGED Viewed

@@ -473,74 +473,86 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
         # Check silence
         silence_duration = session.silence_detector.update(decoded_audio)
-    # Stream to STT if available
-    if session.stt_manager and session.state == ConversationState.LISTENING:
-        # Ensure streaming is active
-        if not session.is_streaming:
-            log_warning(f"⚠️ STT manager exists but streaming not active", session_id=session.session.session_id)
-            # Try to restart streaming
-            stt_initialized = await session.initialize_stt()
-            if not stt_initialized:
-                await websocket.send_json({
-                    "type": "error",
-                    "error_type": "stt_error",
-                    "message": "STT streaming not available"
-                })
-                return
-        try:
-            # Chunk counter - sadece önemli milestone'larda logla
-            if not hasattr(session, 'chunk_counter'):
-                session.chunk_counter = 0
-            session.chunk_counter += 1
-            if session.chunk_counter == 1:
-                log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
-            elif session.chunk_counter % 100 == 0:
-                log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
-            # STT'ye gönder ve sonuçları bekle
-            async for result in session.stt_manager.stream_audio(decoded_audio):
-                # Sadece anlamlı sonuçları logla
-                if result.text.strip():  # Boş olmayan text varsa
-                    log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
-                # Send transcription updates
-                await websocket.send_json({
-                    "type": "transcription",
-                    "text": result.text,
-                    "is_final": result.is_final,
-                    "confidence": result.confidence
-                })
-                if result.is_final:
-                    session.current_transcription = result.text
-                    log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
-                    # Final transcription geldiğinde hemen işle
-                    if session.current_transcription:
-                        # State'i değiştir ve user input'u işle
-                        await session.change_state(ConversationState.PROCESSING_STT)
-                        await websocket.send_json({
-                            "type": "state_change",
-                            "from": "listening",
-                            "to": "processing_stt"
-                        })
-                        # Process user input
-                        await process_user_input(websocket, session)
-                        # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
-                        await session.reset_for_new_utterance()
-                        return  # Bu audio chunk için işlem tamamlandı
-        except Exception as e:
-            log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
-            await websocket.send_json({
-                "type": "error",
-                "error_type": "stt_error",
-                "message": f"STT error: {str(e)}"
-            })
 async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
     """Handle control messages"""

         # Check silence
         silence_duration = session.silence_detector.update(decoded_audio)
+        # Stream to STT if available
+        if session.stt_manager and session.state == ConversationState.LISTENING:
+            # Ensure streaming is active
+            if not session.is_streaming:
+                log_warning(f"⚠️ STT manager exists but streaming not active", session_id=session.session.session_id)
+                # Try to restart streaming
+                stt_initialized = await session.initialize_stt()
+                if not stt_initialized:
+                    await websocket.send_json({
+                        "type": "error",
+                        "error_type": "stt_error",
+                        "message": "STT streaming not available"
+                    })
+                    return
+            try:
+                # Chunk counter - sadece önemli milestone'larda logla
+                if not hasattr(session, 'chunk_counter'):
+                    session.chunk_counter = 0
+                session.chunk_counter += 1
+                if session.chunk_counter == 1:
+                    log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
+                elif session.chunk_counter % 100 == 0:
+                    log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
+                # STT'ye gönder ve sonuçları bekle
+                async for result in session.stt_manager.stream_audio(decoded_audio):
+                    # Sadece anlamlı sonuçları logla
+                    if result.text.strip():  # Boş olmayan text varsa
+                        log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
+                    # Send transcription updates
+                    await websocket.send_json({
+                        "type": "transcription",
+                        "text": result.text,
+                        "is_final": result.is_final,
+                        "confidence": result.confidence
+                    })
+                    if result.is_final:
+                        session.current_transcription = result.text
+                        log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
+                        # Final transcription geldiğinde hemen işle
+                        if session.current_transcription:
+                            # State'i değiştir ve user input'u işle
+                            await session.change_state(ConversationState.PROCESSING_STT)
+                            await websocket.send_json({
+                                "type": "state_change",
+                                "from": "listening",
+                                "to": "processing_stt"
+                            })
+                            # Process user input
+                            await process_user_input(websocket, session)
+                            # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
+                            await session.reset_for_new_utterance()
+                            return  # Bu audio chunk için işlem tamamlandı
+            except Exception as e:
+                log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
+                await websocket.send_json({
+                    "type": "error",
+                    "error_type": "stt_error",
+                    "message": f"STT error: {str(e)}"
+                })
+    except Exception as e:
+        log_error(
+            f"❌ Audio chunk handling error",
+            error=str(e),
+            traceback=traceback.format_exc(),
+            session_id=session.session.session_id
+        )
+        await websocket.send_json({
+            "type": "error",
+            "message": f"Audio processing error: {str(e)}"
+        })
 async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
     """Handle control messages"""