Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 16 days ago

Commit

c44ad84

verified ·

1 Parent(s): c95cb32

Update websocket_handler.py

Browse files

Files changed (1) hide show

websocket_handler.py +86 -48

websocket_handler.py CHANGED Viewed

@@ -620,6 +620,10 @@ async def handle_control_message(websocket: WebSocket, session: RealtimeSession,
 async def process_user_input(websocket: WebSocket, session: RealtimeSession):
     """Process complete user input"""
     try:
         user_text = session.current_transcription
         if not user_text:
             log_warning(f"⚠️ Empty transcription, resetting", session_id=session.session.session_id)
@@ -632,20 +636,22 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
         # State zaten PROCESSING_STT olarak set edildi, direkt devam et
         # Send final transcription
-        await websocket.send_json({
-            "type": "transcription",
-            "text": user_text,
-            "is_final": True,
-            "confidence": 0.95
-        })
         # State: LLM Processing
         await session.change_state(ConversationState.PROCESSING_LLM)
-        await websocket.send_json({
-            "type": "state_change",
-            "from": "processing_stt",
-            "to": "processing_llm"
-        })
         # Add to chat history
         session.session.add_message("user", user_text)
@@ -664,20 +670,26 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
         session.session.add_message("assistant", response_text)
         # Send text response
-        await websocket.send_json({
-            "type": "assistant_response",
-            "text": response_text
-        })
         # Generate TTS if enabled
         tts_provider = TTSFactory.create_provider()
-        if tts_provider:
             await session.change_state(ConversationState.PROCESSING_TTS)
-            await websocket.send_json({
-                "type": "state_change",
-                "from": "processing_llm",
-                "to": "processing_tts"
-            })
             # Generate TTS with barge-in support
             tts_task = await session.barge_in_handler.start_tts_task(
@@ -689,13 +701,15 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
             except asyncio.CancelledError:
                 log_info("⚡ TTS cancelled due to barge-in", session_id=session.session.session_id)
         else:
             # No TTS, go back to idle
             await session.change_state(ConversationState.IDLE)
-            await websocket.send_json({
-                "type": "state_change",
-                "from": "processing_llm",
-                "to": "idle"
-            })
     except Exception as e:
         log_error(
@@ -704,10 +718,11 @@ async def process_user_input(websocket: WebSocket, session: RealtimeSession):
             traceback=traceback.format_exc(),
             session_id=session.session.session_id
         )
-        await websocket.send_json({
-            "type": "error",
-            "message": f"Processing error: {str(e)}"
-        })
         await session.reset_for_new_utterance()
         await session.change_state(ConversationState.IDLE)
@@ -721,17 +736,33 @@ async def generate_and_stream_tts(
     try:
         log_info(f"🎤 Starting TTS generation for text: '{text[:50]}...'", session_id=session.session.session_id)
         # Generate audio
-        audio_data = await tts_provider.synthesize(text)
         log_info(f"✅ TTS generated: {len(audio_data)} bytes, type: {type(audio_data)}", session_id=session.session.session_id)
         # Change state to playing
         await session.change_state(ConversationState.PLAYING_AUDIO)
-        await websocket.send_json({
-            "type": "state_change",
-            "from": "processing_tts",
-            "to": "playing_audio"
-        })
         # Convert entire audio to base64 for transmission
         import base64
@@ -754,6 +785,11 @@ async def generate_and_stream_tts(
             if asyncio.current_task().cancelled():
                 log_info(f"⚡ Streaming cancelled at chunk {i//chunk_size}", session_id=session.session.session_id)
                 break
             chunk = audio_base64[i:i + chunk_size]
             chunk_index = i // chunk_size
@@ -761,14 +797,15 @@ async def generate_and_stream_tts(
             log_debug(f"📨 Sending chunk {chunk_index}/{total_chunks}, size: {len(chunk)}, is_last: {is_last}")
-            await websocket.send_json({
-                "type": "tts_audio",
-                "data": chunk,
-                "chunk_index": chunk_index,
-                "total_chunks": total_chunks,
-                "is_last": is_last,
-                "mime_type": "audio/mpeg"
-            })
             # Small delay to prevent overwhelming the client
             await asyncio.sleep(0.01)
@@ -791,7 +828,8 @@ async def generate_and_stream_tts(
             traceback=traceback.format_exc(),
             session_id=session.session.session_id
         )
-        await websocket.send_json({
-            "type": "error",
-            "message": f"TTS error: {str(e)}"
-        })

 async def process_user_input(websocket: WebSocket, session: RealtimeSession):
     """Process complete user input"""
     try:
+        # WebSocket aktif mi kontrol et
+        if not session.is_websocket_active:
+            return
         user_text = session.current_transcription
         if not user_text:
             log_warning(f"⚠️ Empty transcription, resetting", session_id=session.session.session_id)
         # State zaten PROCESSING_STT olarak set edildi, direkt devam et
         # Send final transcription
+        if session.is_websocket_active:
+            await websocket.send_json({
+                "type": "transcription",
+                "text": user_text,
+                "is_final": True,
+                "confidence": 0.95
+            })
         # State: LLM Processing
         await session.change_state(ConversationState.PROCESSING_LLM)
+        if session.is_websocket_active:
+            await websocket.send_json({
+                "type": "state_change",
+                "from": "processing_stt",
+                "to": "processing_llm"
+            })
         # Add to chat history
         session.session.add_message("user", user_text)
         session.session.add_message("assistant", response_text)
         # Send text response
+        if session.is_websocket_active:
+            await websocket.send_json({
+                "type": "assistant_response",
+                "text": response_text
+            })
         # Generate TTS if enabled
         tts_provider = TTSFactory.create_provider()
+        log_info(f"🔍 TTS provider check: {tts_provider is not None}", session_id=session.session.session_id)
+        if tts_provider and session.is_websocket_active:
             await session.change_state(ConversationState.PROCESSING_TTS)
+            if session.is_websocket_active:
+                await websocket.send_json({
+                    "type": "state_change",
+                    "from": "processing_llm",
+                    "to": "processing_tts"
+                })
+            log_info(f"🎵 Starting TTS generation for response", session_id=session.session.session_id)
             # Generate TTS with barge-in support
             tts_task = await session.barge_in_handler.start_tts_task(
             except asyncio.CancelledError:
                 log_info("⚡ TTS cancelled due to barge-in", session_id=session.session.session_id)
         else:
+            log_info(f"⚠️ No TTS provider or WebSocket inactive, skipping TTS", session_id=session.session.session_id)
             # No TTS, go back to idle
             await session.change_state(ConversationState.IDLE)
+            if session.is_websocket_active:
+                await websocket.send_json({
+                    "type": "state_change",
+                    "from": "processing_llm",
+                    "to": "idle"
+                })
     except Exception as e:
         log_error(
             traceback=traceback.format_exc(),
             session_id=session.session.session_id
         )
+        if session.is_websocket_active:
+            await websocket.send_json({
+                "type": "error",
+                "message": f"Processing error: {str(e)}"
+            })
         await session.reset_for_new_utterance()
         await session.change_state(ConversationState.IDLE)
     try:
         log_info(f"🎤 Starting TTS generation for text: '{text[:50]}...'", session_id=session.session.session_id)
+        # TTS preprocessor kullan
+        from tts_preprocessor import TTSPreprocessor
+        preprocessor = TTSPreprocessor(language=session.session.locale)
+        processed_text = preprocessor.preprocess(
+            text,
+            tts_provider.get_preprocessing_flags()
+        )
+        log_debug(f"📝 Preprocessed text: '{processed_text[:50]}...'", session_id=session.session.session_id)
         # Generate audio
+        audio_data = await tts_provider.synthesize(processed_text)
         log_info(f"✅ TTS generated: {len(audio_data)} bytes, type: {type(audio_data)}", session_id=session.session.session_id)
+        # WebSocket aktif mi kontrol et
+        if not session.is_websocket_active:
+            log_warning(f"⚠️ WebSocket inactive, skipping TTS streaming", session_id=session.session.session_id)
+            return
         # Change state to playing
         await session.change_state(ConversationState.PLAYING_AUDIO)
+        if session.is_websocket_active:
+            await websocket.send_json({
+                "type": "state_change",
+                "from": "processing_tts",
+                "to": "playing_audio"
+            })
         # Convert entire audio to base64 for transmission
         import base64
             if asyncio.current_task().cancelled():
                 log_info(f"⚡ Streaming cancelled at chunk {i//chunk_size}", session_id=session.session.session_id)
                 break
+            # WebSocket aktif mi kontrol et
+            if not session.is_websocket_active:
+                log_warning(f"⚠️ WebSocket inactive during streaming, stopping", session_id=session.session.session_id)
+                break
             chunk = audio_base64[i:i + chunk_size]
             chunk_index = i // chunk_size
             log_debug(f"📨 Sending chunk {chunk_index}/{total_chunks}, size: {len(chunk)}, is_last: {is_last}")
+            if session.is_websocket_active:
+                await websocket.send_json({
+                    "type": "tts_audio",
+                    "data": chunk,
+                    "chunk_index": chunk_index,
+                    "total_chunks": total_chunks,
+                    "is_last": is_last,
+                    "mime_type": "audio/mpeg"
+                })
             # Small delay to prevent overwhelming the client
             await asyncio.sleep(0.01)
             traceback=traceback.format_exc(),
             session_id=session.session.session_id
         )
+        if session.is_websocket_active:
+            await websocket.send_json({
+                "type": "error",
+                "message": f"TTS error: {str(e)}"
+            })