Spaces:

AndroidGuy
/

Speaker-Diarization

Running

App Files Files Community

Saiyaswanth007 commited on May 30

Commit

1dd5469

1 Parent(s): 3a4cb0f

1

Browse files

Files changed (1) hide show

ui.py +413 -224

ui.py CHANGED Viewed

@@ -8,334 +8,523 @@ from typing import Dict, Any, Optional
 import threading
 from queue import Queue
 import base64
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-class TranscriptionInterface:
-    """Interface for real-time transcription and speaker diarization"""
     def __init__(self):
         self.connected_clients = set()
-        self.message_queue = Queue()
         self.is_running = False
         self.websocket_server = None
-        self.current_transcript = ""
         self.conversation_history = []
-    async def handle_client(self, websocket, path):
-        """Handle WebSocket client connections"""
-        client_id = f"client_{int(time.time())}"
-        self.connected_clients.add(websocket)
-        logger.info(f"Client connected: {client_id}. Total clients: {len(self.connected_clients)}")
         try:
-            # Send connection confirmation
             await websocket.send(json.dumps({
-                "type": "connection",
                 "status": "connected",
                 "timestamp": time.time(),
-                "client_id": client_id
             }))
             async for message in websocket:
                 try:
                     if isinstance(message, bytes):
                         # Handle binary audio data
-                        await self.process_audio_chunk(message, websocket)
                     else:
-                        # Handle text messages
-                        data = json.loads(message)
-                        await self.handle_message(data, websocket)
-                except json.JSONDecodeError:
-                    logger.warning(f"Invalid JSON received from client: {message}")
                 except Exception as e:
                     logger.error(f"Error processing message: {e}")
         except websockets.exceptions.ConnectionClosed:
-            logger.info(f"Client {client_id} disconnected")
         except Exception as e:
-            logger.error(f"Client handler error: {e}")
         finally:
             self.connected_clients.discard(websocket)
-            logger.info(f"Client removed. Remaining clients: {len(self.connected_clients)}")
-    async def process_audio_chunk(self, audio_data: bytes, websocket):
-        """Process incoming audio data"""
         try:
-            # Import inference functions (assuming they exist in your setup)
-            from inference import process_audio_for_transcription
-            # Process the audio chunk
-            result = await process_audio_for_transcription(audio_data)
-            if result:
-                # Broadcast result to all clients
-                await self.broadcast_result({
-                    "type": "processing_result",
-                    "timestamp": time.time(),
-                    "data": result
-                })
-                # Update conversation history
-                if "transcription" in result:
-                    self.update_conversation(result)
-        except ImportError:
-            logger.warning("Inference module not found - audio processing disabled")
         except Exception as e:
-            logger.error(f"Error processing audio chunk: {e}")
-            await websocket.send(json.dumps({
-                "type": "error",
-                "message": f"Audio processing error: {str(e)}",
-                "timestamp": time.time()
-            }))
-    async def handle_message(self, data: Dict[str, Any], websocket):
-        """Handle non-audio messages from clients"""
-        message_type = data.get("type", "unknown")
-        if message_type == "config":
-            # Handle configuration updates
-            logger.info(f"Configuration update: {data}")
-        elif message_type == "request_history":
-            # Send conversation history to client
             await websocket.send(json.dumps({
-                "type": "conversation_history",
-                "data": self.conversation_history,
                 "timestamp": time.time()
             }))
-        elif message_type == "clear_history":
-            # Clear conversation history
-            self.conversation_history = []
-            self.current_transcript = ""
-            await self.broadcast_result({
-                "type": "conversation_update",
-                "action": "cleared",
-                "timestamp": time.time()
-            })
-        else:
-            logger.warning(f"Unknown message type: {message_type}")
-    async def broadcast_result(self, result: Dict[str, Any]):
-        """Broadcast results to all connected clients"""
-        if not self.connected_clients:
-            return
-        message = json.dumps(result)
-        disconnected = set()
-        for client in self.connected_clients.copy():
-            try:
-                await client.send(message)
-            except Exception as e:
-                logger.warning(f"Failed to send to client: {e}")
-                disconnected.add(client)
-        # Clean up disconnected clients
-        for client in disconnected:
-            self.connected_clients.discard(client)
-    def update_conversation(self, result: Dict[str, Any]):
-        """Update conversation history with new transcription results"""
-        if "transcription" in result:
-            transcript_data = {
-                "timestamp": time.time(),
-                "text": result["transcription"],
-                "speaker": result.get("speaker", "Unknown"),
-                "confidence": result.get("confidence", 0.0)
-            }
-            self.conversation_history.append(transcript_data)
-            # Keep only last 100 entries to prevent memory issues
-            if len(self.conversation_history) > 100:
-                self.conversation_history = self.conversation_history[-100:]
-    async def start_websocket_server(self, host="0.0.0.0", port=7860):
         """Start the WebSocket server"""
         try:
             self.websocket_server = await websockets.serve(
-                self.handle_client,
                 host,
                 port,
                 path="/ws_inference"
             )
             self.is_running = True
-            logger.info(f"WebSocket server started on {host}:{port}")
-            # Keep server running
             await self.websocket_server.wait_closed()
         except Exception as e:
-            logger.error(f"WebSocket server error: {e}")
             self.is_running = False
-    def get_status(self):
-        """Get current status information"""
-        return {
-            "connected_clients": len(self.connected_clients),
-            "is_running": self.is_running,
-            "conversation_entries": len(self.conversation_history),
-            "last_activity": time.time()
-        }
-# Initialize the transcription interface
-transcription_interface = TranscriptionInterface()
 def create_gradio_interface():
-    """Create the Gradio interface"""
     def get_server_status():
-        """Get server status for display"""
-        status = transcription_interface.get_status()
-        return f"""
-        **Server Status:**
-        - WebSocket Server: {'Running' if status['is_running'] else 'Stopped'}
-        - Connected Clients: {status['connected_clients']}
-        - Conversation Entries: {status['conversation_entries']}
-        - Last Activity: {time.ctime(status['last_activity'])}
         """
-    def get_conversation_history():
-        """Get formatted conversation history"""
-        if not transcription_interface.conversation_history:
-            return "No conversation history available."
-        formatted_history = []
-        for entry in transcription_interface.conversation_history[-10:]:  # Show last 10 entries
-            timestamp = time.ctime(entry['timestamp'])
-            speaker = entry.get('speaker', 'Unknown')
-            text = entry.get('text', '')
-            confidence = entry.get('confidence', 0.0)
-            formatted_history.append(f"**[{timestamp}] {speaker}** (confidence: {confidence:.2f})\n{text}\n")
-        return "\n".join(formatted_history)
-    def clear_conversation():
         """Clear conversation history"""
-        transcription_interface.conversation_history = []
-        transcription_interface.current_transcript = ""
-        return "Conversation history cleared."
     # Create Gradio interface
-    with gr.Blocks(title="Real-time Audio Transcription & Speaker Diarization") as demo:
-        gr.Markdown("# Real-time Audio Transcription & Speaker Diarization")
-        gr.Markdown("This Hugging Face Space provides WebSocket endpoints for real-time audio processing.")
-        with gr.Tab("Server Status"):
-            status_display = gr.Markdown(get_server_status())
-            refresh_btn = gr.Button("Refresh Status")
-            refresh_btn.click(get_server_status, outputs=status_display)
-        with gr.Tab("Live Transcription"):
-            gr.Markdown("### Live Conversation")
-            conversation_display = gr.Markdown(get_conversation_history())
             with gr.Row():
-                refresh_conv_btn = gr.Button("Refresh Conversation")
-                clear_conv_btn = gr.Button("Clear History", variant="secondary")
-            refresh_conv_btn.click(get_conversation_history, outputs=conversation_display)
-            clear_conv_btn.click(clear_conversation, outputs=conversation_display)
-        with gr.Tab("WebSocket Info"):
-            gr.Markdown("""
-            ### WebSocket Endpoint
-            Connect to this Space's WebSocket endpoint for real-time audio processing:
-            **WebSocket URL:** `wss://your-space-name.hf.space/ws_inference`
-            ### Message Format
-            **Audio Data:** Send raw audio bytes directly to the WebSocket
-            **Text Messages:** JSON format
-            ```json
-            {
-                "type": "config",
-                "settings": {
-                    "language": "en",
-                    "enable_diarization": true
-                }
-            }
-            ```
-            ### Response Format
-            ```json
-            {
-                "type": "processing_result",
-                "timestamp": 1234567890.123,
-                "data": {
-                    "transcription": "Hello world",
-                    "speaker": "Speaker_1",
-                    "confidence": 0.95
-                }
-            }
-            ```
             """)
-        with gr.Tab("API Documentation"):
             gr.Markdown("""
-            ### Available Endpoints
-            - **WebSocket:** `/ws_inference` - Main endpoint for real-time audio processing
-            - **HTTP:** `/health` - Check server health status
-            - **HTTP:** `/stats` - Get detailed statistics
-            ### Integration Example
-            ```javascript
-            const ws = new WebSocket('wss://your-space-name.hf.space/ws_inference');
-            ws.onopen = function() {
-                console.log('Connected to transcription service');
-            };
-            ws.onmessage = function(event) {
-                const data = JSON.parse(event.data);
-                if (data.type === 'processing_result') {
-                    console.log('Transcription:', data.data.transcription);
-                    console.log('Speaker:', data.data.speaker);
-                }
-            };
-            // Send audio data
-            ws.send(audioBuffer);
-            ```
             """)
     return demo
 def run_websocket_server():
-    """Run the WebSocket server in a separate thread"""
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
-        loop.run_until_complete(transcription_interface.start_websocket_server())
     except Exception as e:
-        logger.error(f"WebSocket server thread error: {e}")
     finally:
         loop.close()
-# Start WebSocket server in background thread
 websocket_thread = threading.Thread(target=run_websocket_server, daemon=True)
 websocket_thread.start()
 # Create and launch Gradio interface
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        share=False,
         show_error=True
     )

 import threading
 from queue import Queue
 import base64
+import numpy as np
+import os
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Environment-configurable HF Space URL (matching backend.py)
+HF_SPACE_URL = os.getenv("HF_SPACE_URL", "https://androidguy-speaker-diarization.hf.space")
+API_WS = f"wss://{HF_SPACE_URL}/ws_inference"
+class TranscriptionWebSocketServer:
+    """WebSocket server that receives audio from backend and returns transcription results"""
     def __init__(self):
         self.connected_clients = set()
         self.is_running = False
         self.websocket_server = None
         self.conversation_history = []
+        self.processing_stats = {
+            "total_audio_chunks": 0,
+            "total_transcriptions": 0,
+            "last_audio_received": None,
+            "server_start_time": time.time(),
+            "backend_url": HF_SPACE_URL
+        }
+    async def handle_client_connection(self, websocket, path):
+        """Handle incoming WebSocket connections from the backend"""
+        client_addr = websocket.remote_address
+        logger.info(f"Backend client connected from {client_addr}")
+        self.connected_clients.add(websocket)
         try:
+            # Send initial connection acknowledgment
             await websocket.send(json.dumps({
+                "type": "connection_ack",
                 "status": "connected",
                 "timestamp": time.time(),
+                "message": "HuggingFace transcription service ready"
             }))
+            # Handle incoming messages/audio data
             async for message in websocket:
                 try:
                     if isinstance(message, bytes):
                         # Handle binary audio data
+                        await self.process_audio_data(message, websocket)
                     else:
+                        # Handle text messages (JSON)
+                        await self.handle_text_message(message, websocket)
                 except Exception as e:
                     logger.error(f"Error processing message: {e}")
+                    await self.send_error(websocket, f"Processing error: {str(e)}")
         except websockets.exceptions.ConnectionClosed:
+            logger.info("Backend client disconnected")
         except Exception as e:
+            logger.error(f"Client connection error: {e}")
         finally:
             self.connected_clients.discard(websocket)
+            logger.info(f"Client removed. Active connections: {len(self.connected_clients)}")
+    async def process_audio_data(self, audio_data: bytes, websocket):
+        """Process incoming audio data and return transcription results"""
         try:
+            self.processing_stats["total_audio_chunks"] += 1
+            self.processing_stats["last_audio_received"] = time.time()
+            logger.debug(f"Received {len(audio_data)} bytes of audio data")
+            # Try to import and use your inference functions
+            try:
+                from inference import transcribe_audio, identify_speakers
+                # Process the audio for transcription
+                transcription_result = await transcribe_audio(audio_data)
+                if transcription_result:
+                    # Process for speaker diarization if available
+                    try:
+                        speaker_info = await identify_speakers(audio_data)
+                        transcription_result.update(speaker_info)
+                    except Exception as e:
+                        logger.warning(f"Speaker diarization failed: {e}")
+                        transcription_result["speaker"] = "Unknown"
+                    # Update conversation history
+                    self.update_conversation_history(transcription_result)
+                    # Send result back to backend
+                    response = {
+                        "type": "processing_result",
+                        "timestamp": time.time(),
+                        "data": transcription_result
+                    }
+                    await websocket.send(json.dumps(response))
+                    self.processing_stats["total_transcriptions"] += 1
+                    logger.info(f"Sent transcription result: {transcription_result.get('text', '')[:50]}...")
+            except ImportError:
+                # Fallback if inference module is not available
+                logger.warning("Inference module not found, using mock transcription")
+                # Try to use shared.py for processing if available
+                try:
+                    from shared import RealtimeSpeakerDiarization
+                    # Initialize if not already initialized
+                    if not hasattr(self, 'diarization_system'):
+                        self.diarization_system = RealtimeSpeakerDiarization()
+                        await asyncio.to_thread(self.diarization_system.initialize_models)
+                        await asyncio.to_thread(self.diarization_system.start_recording)
+                    # Process the audio chunk
+                    result = await asyncio.to_thread(self.diarization_system.process_audio_chunk, audio_data)
+                    # Format result for response
+                    if result and result["status"] != "error":
+                        mock_result = {
+                            "text": result.get("text", f"[Processing {len(audio_data)} bytes]"),
+                            "speaker": f"Speaker_{result.get('speaker_id', 0) + 1}",
+                            "confidence": result.get("similarity", 0.85),
+                            "timestamp": time.time()
+                        }
+                    else:
+                        # Fallback mock result
+                        mock_result = {
+                            "text": f"[Mock transcription - {len(audio_data)} bytes processed]",
+                            "speaker": "Speaker_1",
+                            "confidence": 0.85,
+                            "timestamp": time.time()
+                        }
+                    # Update conversation history
+                    self.update_conversation_history(mock_result)
+                    response = {
+                        "type": "processing_result",
+                        "timestamp": time.time(),
+                        "data": mock_result
+                    }
+                    await websocket.send(json.dumps(response))
+                    self.processing_stats["total_transcriptions"] += 1
+                except Exception as e:
+                    logger.warning(f"Failed to use shared module: {e}")
+                    # Basic mock transcription as last resort
+                    mock_result = {
+                        "text": f"[Mock transcription - {len(audio_data)} bytes processed]",
+                        "speaker": "Speaker_1",
+                        "confidence": 0.85,
+                        "timestamp": time.time()
+                    }
+                    self.update_conversation_history(mock_result)
+                    response = {
+                        "type": "processing_result",
+                        "timestamp": time.time(),
+                        "data": mock_result
+                    }
+                    await websocket.send(json.dumps(response))
         except Exception as e:
+            logger.error(f"Audio processing error: {e}")
+            await self.send_error(websocket, f"Audio processing failed: {str(e)}")
+    async def handle_text_message(self, message: str, websocket):
+        """Handle text-based messages from backend"""
+        try:
+            data = json.loads(message)
+            message_type = data.get("type", "unknown")
+            logger.info(f"Received message type: {message_type}")
+            if message_type == "ping":
+                # Respond to ping with pong
+                await websocket.send(json.dumps({
+                    "type": "pong",
+                    "timestamp": time.time()
+                }))
+            elif message_type == "config":
+                # Handle configuration updates
+                logger.info(f"Configuration update: {data}")
+                # Apply configuration settings if available
+                settings = data.get("settings", {})
+                if "max_speakers" in settings:
+                    max_speakers = settings.get("max_speakers")
+                    logger.info(f"Setting max_speakers to {max_speakers}")
+                if "threshold" in settings:
+                    threshold = settings.get("threshold")
+                    logger.info(f"Setting speaker change threshold to {threshold}")
+                # Send acknowledgment
+                await websocket.send(json.dumps({
+                    "type": "config_ack",
+                    "message": "Configuration received",
+                    "timestamp": time.time()
+                }))
+            elif message_type == "status_request":
+                # Send status information
+                await websocket.send(json.dumps({
+                    "type": "status_response",
+                    "data": self.get_processing_stats(),
+                    "timestamp": time.time()
+                }))
+            else:
+                logger.warning(f"Unknown message type: {message_type}")
+        except json.JSONDecodeError:
+            logger.error(f"Invalid JSON received: {message}")
+            await self.send_error(websocket, "Invalid JSON format")
+    async def send_error(self, websocket, error_message: str):
+        """Send error message to client"""
+        try:
             await websocket.send(json.dumps({
+                "type": "error",
+                "message": error_message,
                 "timestamp": time.time()
             }))
+        except Exception as e:
+            logger.error(f"Failed to send error message: {e}")
+    def update_conversation_history(self, transcription_result: Dict[str, Any]):
+        """Update conversation history with new transcription"""
+        history_entry = {
+            "timestamp": time.time(),
+            "text": transcription_result.get("text", ""),
+            "speaker": transcription_result.get("speaker", "Unknown"),
+            "confidence": transcription_result.get("confidence", 0.0)
+        }
+        self.conversation_history.append(history_entry)
+        # Keep only last 50 entries to prevent memory issues
+        if len(self.conversation_history) > 50:
+            self.conversation_history = self.conversation_history[-50:]
+    def get_processing_stats(self):
+        """Get processing statistics"""
+        return {
+            "connected_clients": len(self.connected_clients),
+            "total_audio_chunks": self.processing_stats["total_audio_chunks"],
+            "total_transcriptions": self.processing_stats["total_transcriptions"],
+            "last_audio_received": self.processing_stats["last_audio_received"],
+            "server_uptime": time.time() - self.processing_stats["server_start_time"],
+            "conversation_entries": len(self.conversation_history),
+            "backend_url": self.processing_stats.get("backend_url", HF_SPACE_URL)
+        }
+    async def start_server(self, host="0.0.0.0", port=7860):
         """Start the WebSocket server"""
         try:
+            # Start WebSocket server on /ws_inference endpoint
             self.websocket_server = await websockets.serve(
+                self.handle_client_connection,
                 host,
                 port,
+                subprotocols=[],
                 path="/ws_inference"
             )
             self.is_running = True
+            logger.info(f"WebSocket server started on ws://{host}:{port}/ws_inference")
+            # Keep the server running
             await self.websocket_server.wait_closed()
         except Exception as e:
+            logger.error(f"Failed to start WebSocket server: {e}")
             self.is_running = False
+# Initialize the WebSocket server
+ws_server = TranscriptionWebSocketServer()
 def create_gradio_interface():
+    """Create Gradio interface for monitoring and testing"""
     def get_server_status():
+        """Get current server status"""
+        stats = ws_server.get_processing_stats()
+        status_text = f"""
+### Server Status
+- **WebSocket Server**: {'🟢 Running' if ws_server.is_running else '🔴 Stopped'}
+- **Connected Clients**: {stats['connected_clients']}
+- **Server Uptime**: {stats['server_uptime']:.1f} seconds
+### Processing Statistics
+- **Audio Chunks Processed**: {stats['total_audio_chunks']}
+- **Transcriptions Generated**: {stats['total_transcriptions']}
+- **Last Audio Received**: {time.ctime(stats['last_audio_received']) if stats['last_audio_received'] else 'Never'}
+### Conversation
+- **History Entries**: {stats['conversation_entries']}
         """
+        return status_text
+    def get_recent_transcriptions():
+        """Get recent transcription results"""
+        if not ws_server.conversation_history:
+            return "No transcriptions yet. Waiting for audio data from backend..."
+        recent_entries = ws_server.conversation_history[-10:]  # Last 10 entries
+        formatted_text = "### Recent Transcriptions\n\n"
+        for entry in recent_entries:
+            timestamp = time.strftime("%H:%M:%S", time.localtime(entry['timestamp']))
+            speaker = entry['speaker']
+            text = entry['text']
+            confidence = entry['confidence']
+            # Extract speaker number for color matching with shared.py
+            speaker_num = 0
+            if speaker.startswith("Speaker_"):
+                try:
+                    speaker_num = int(speaker.split("_")[1]) - 1
+                except (ValueError, IndexError):
+                    speaker_num = 0
+            # Use colors from shared.py if possible
+            try:
+                from shared import SPEAKER_COLORS
+                color = SPEAKER_COLORS[speaker_num % len(SPEAKER_COLORS)]
+            except (ImportError, IndexError):
+                # Fallback colors
+                colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7", "#DDA0DD", "#98D8C8", "#F7DC6F"]
+                color = colors[speaker_num % len(colors)]
+            formatted_text += f"<span style='color:{color};font-weight:bold;'>[{timestamp}] {speaker}</span> (confidence: {confidence:.2f})\n"
+            formatted_text += f"{text}\n\n"
+        return formatted_text
+    def clear_conversation_history():
         """Clear conversation history"""
+        ws_server.conversation_history.clear()
+        return "Conversation history cleared!"
     # Create Gradio interface
+    with gr.Blocks(
+        title="Real-time Audio Transcription Service",
+        theme=gr.themes.Soft()
+    ) as demo:
+        gr.Markdown("# 🎤 Real-time Audio Transcription Service")
+        gr.Markdown("This HuggingFace Space receives audio from your backend and returns transcription results with speaker diarization.")
+        with gr.Tab("📊 Server Status"):
+            status_display = gr.Markdown(get_server_status())
             with gr.Row():
+                refresh_status_btn = gr.Button("🔄 Refresh Status", variant="primary")
+            refresh_status_btn.click(
+                fn=get_server_status,
+                outputs=status_display,
+                every=None
+            )
+        with gr.Tab("📝 Live Transcription"):
+            transcription_display = gr.Markdown(get_recent_transcriptions())
+            with gr.Row():
+                refresh_transcription_btn = gr.Button("🔄 Refresh Transcriptions", variant="primary")
+                clear_history_btn = gr.Button("🗑️ Clear History", variant="secondary")
+            refresh_transcription_btn.click(
+                fn=get_recent_transcriptions,
+                outputs=transcription_display
+            )
+            clear_history_btn.click(
+                fn=clear_conversation_history,
+                outputs=gr.Markdown()
+            )
+        with gr.Tab("🔧 Connection Info"):
+            gr.Markdown(f"""
+### WebSocket Connection Details
+**WebSocket Endpoint**: `wss://{HF_SPACE_URL}/ws_inference`
+### Backend Connection
+Your backend should connect to this WebSocket endpoint and:
+1. **Send Audio Data**: Stream raw audio bytes to this endpoint
+2. **Receive Results**: Get JSON responses with transcription results
+### Expected Message Flow
+**Backend → HuggingFace**:
+- Raw audio bytes (binary data)
+- Configuration messages (JSON)
+**HuggingFace → Backend**:
+```json
+{{
+    "type": "processing_result",
+    "timestamp": 1234567890.123,
+    "data": {{
+        "text": "transcribed text here",
+        "speaker": "Speaker_1",
+        "confidence": 0.95
+    }}
+}}
+```
+### Test Connection
+Your backend is configured to connect to: `{ws_server.processing_stats.get('backend_url', HF_SPACE_URL)}`
             """)
+        with gr.Tab("🚀 API Documentation"):
             gr.Markdown("""
+### WebSocket API Reference
+#### Endpoint
+- **URL**: `/ws_inference`
+- **Protocol**: WebSocket
+- **Accepts**: Binary audio data + JSON messages
+#### Message Types
+##### 1. Audio Processing
+- **Input**: Raw audio bytes (binary)
+- **Output**: Processing result (JSON)
+##### 2. Configuration
+- **Input**:
+```json
+{
+    "type": "config",
+    "settings": {
+        "language": "en",
+        "enable_diarization": true,
+        "max_speakers": 4,
+        "threshold": 0.65
+    }
+}
+```
+##### 3. Status Check
+- **Input**: `{"type": "status_request"}`
+- **Output**: Server statistics
+##### 4. Ping/Pong
+- **Input**: `{"type": "ping"}`
+- **Output**: `{"type": "pong", "timestamp": 1234567890}`
+#### Error Handling
+All errors are returned as:
+```json
+{
+    "type": "error",
+    "message": "Error description",
+    "timestamp": 1234567890.123
+}
+```
             """)
     return demo
 def run_websocket_server():
+    """Run WebSocket server in background thread"""
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
+        logger.info("Starting WebSocket server thread...")
+        loop.run_until_complete(ws_server.start_server())
     except Exception as e:
+        logger.error(f"WebSocket server error: {e}")
     finally:
         loop.close()
+# Mount UI to inference.py
+def mount_ui(app):
+    """Mount Gradio interface to FastAPI app"""
+    try:
+        demo = create_gradio_interface()
+        # Mount without starting server (FastAPI will handle it)
+        demo.mount_to_app(app)
+        logger.info("Gradio UI mounted to FastAPI app")
+        return True
+    except Exception as e:
+        logger.error(f"Error mounting UI: {e}")
+        return False
+# Start WebSocket server in background
+logger.info("Initializing WebSocket server...")
 websocket_thread = threading.Thread(target=run_websocket_server, daemon=True)
 websocket_thread.start()
+# Give server time to start
+time.sleep(2)
 # Create and launch Gradio interface
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=True,
         show_error=True
     )