"""
Warm Sandbox Pool for Modal - Async Queue-Based Implementation
This module provides a pre-warmed pool of Modal sandboxes to reduce cold-start latency.
"""
import asyncio
import time
from typing import Optional, Dict, Any
from contextlib import asynccontextmanager
from dataclasses import dataclass
from enum import Enum

import modal

from mcp_hub.logging_config import logger
from mcp_hub.exceptions import CodeExecutionError


class SandboxHealth(Enum):
    """Sandbox health status."""
    HEALTHY = "healthy"
    UNHEALTHY = "unhealthy"
    UNKNOWN = "unknown"


@dataclass
class PooledSandbox:
    """Container for a pooled sandbox with metadata."""
    sandbox: modal.Sandbox
    created_at: float
    last_used: float
    health: SandboxHealth = SandboxHealth.UNKNOWN
    use_count: int = 0


class WarmSandboxPool:
    """Async queue-based warm sandbox pool with health checking."""
    
    def __init__(
        self,
        app: modal.App,
        image: modal.Image,
        pool_size: int = 3,
        max_age_seconds: int = 300,  # 5 minutes
        max_uses_per_sandbox: int = 10,
        health_check_interval: int = 60,  # 1 minute
    ):
        self.app = app
        self.image = image
        self.pool_size = pool_size
        self.max_age_seconds = max_age_seconds
        self.max_uses_per_sandbox = max_uses_per_sandbox
        self.health_check_interval = health_check_interval
        
        # Queue to hold available sandboxes
        self._sandbox_queue: asyncio.Queue[PooledSandbox] = asyncio.Queue(maxsize=pool_size)
        
        # Background tasks
        self._warmup_task: Optional[asyncio.Task] = None
        self._health_check_task: Optional[asyncio.Task] = None
        self._cleanup_task: Optional[asyncio.Task] = None
        
        # Pool statistics
        self._stats = {
            "created": 0,
            "reused": 0,
            "recycled": 0,
            "health_checks": 0,
            "failures": 0
        }
        
        # Health tracking for better error recovery
        self._consecutive_failures = 0
        self._last_successful_creation = time.time()
        self._pool_reset_threshold = 5  # Reset pool after 5 consecutive failures
        
        self._running = False
        
    async def start(self):
        """Start the pool and background tasks."""
        if self._running:
            return
            
        self._running = True
        logger.info(f"Starting warm sandbox pool with {self.pool_size} sandboxes")
        
        # Start background tasks
        self._warmup_task = asyncio.create_task(self._warmup_pool())
        self._health_check_task = asyncio.create_task(self._health_check_loop())
        self._cleanup_task = asyncio.create_task(self._cleanup_loop())
        
        # Wait for initial warmup
        await asyncio.sleep(1)  # Give warmup a moment to start
        
    async def stop(self):
        """Stop the pool and cleanup resources."""
        if not self._running:
            return
            
        self._running = False
        logger.info("Stopping warm sandbox pool")
        
        # Cancel background tasks
        for task in [self._warmup_task, self._health_check_task, self._cleanup_task]:
            if task and not task.done():
                task.cancel()
                try:
                    await task
                except asyncio.CancelledError:
                    pass
          # Cleanup remaining sandboxes
        while not self._sandbox_queue.empty():
            try:
                pooled_sb = self._sandbox_queue.get_nowait()
                await self._terminate_sandbox(pooled_sb.sandbox)
            except asyncio.QueueEmpty:
                break
                
    @asynccontextmanager
    async def get_sandbox(self, timeout: float = 5.0):
        pooled_sb = None
        created_new = False
        try:
            # Check if we need to reset the pool due to consecutive failures
            if self._consecutive_failures >= self._pool_reset_threshold:
                logger.warning(f"Pool has {self._consecutive_failures} consecutive failures, attempting reset")
                await self._emergency_pool_reset()
            
            # Try to get a warm sandbox from the pool, retry if not alive
            max_retries = 3  # Increased retries for better reliability
            for attempt in range(max_retries):
                try:
                    # Try to get from pool first
                    pooled_sb = await asyncio.wait_for(self._sandbox_queue.get(), timeout=timeout)
                    # Check if the sandbox is alive
                    alive = await self._is_sandbox_alive(pooled_sb.sandbox)
                    if not alive:
                        logger.info(f"Got dead sandbox from pool on attempt {attempt + 1}, terminating and trying next.")
                        await self._terminate_sandbox(pooled_sb.sandbox)
                        pooled_sb = None
                        continue  # Try again
                    
                    # Sandbox is alive, use it
                    pooled_sb.last_used = time.time()
                    pooled_sb.use_count += 1
                    self._stats["reused"] += 1
                    self._consecutive_failures = 0  # Reset failure counter on success
                    break
                    
                except asyncio.TimeoutError:
                    # Pool empty or taking too long, create a new one
                    logger.info(f"Pool timeout on attempt {attempt + 1}, creating new sandbox")
                    try:
                        sandbox = await self._create_sandbox()
                        pooled_sb = PooledSandbox(
                            sandbox=sandbox,
                            created_at=time.time(),
                            last_used=time.time(),
                            use_count=1
                        )
                        created_new = True
                        self._stats["created"] += 1
                        self._consecutive_failures = 0  # Reset failure counter on success
                        self._last_successful_creation = time.time()
                        break
                    except Exception as create_error:
                        logger.error(f"Failed to create sandbox on attempt {attempt + 1}: {create_error}")
                        self._consecutive_failures += 1
                        if attempt == max_retries - 1:  # Last attempt
                            raise CodeExecutionError(f"Failed to create sandbox after {max_retries} attempts: {create_error}")
                        await asyncio.sleep(2 ** attempt)  # Exponential backoff
            else:
                self._consecutive_failures += 1
                raise CodeExecutionError("Could not obtain a live sandbox from the pool after all retry attempts.")
            
            logger.info(f"Yielding sandbox of type from sandbox_pool: {type(pooled_sb.sandbox)}")    
            yield pooled_sb.sandbox
            
        except Exception as e:
            logger.error(f"Error getting sandbox: {e}")
            self._stats["failures"] += 1
            self._consecutive_failures += 1
            raise CodeExecutionError(f"Failed to get sandbox: {e}")        
        finally:
            if pooled_sb:
                should_recycle = (
                    not created_new and
                    self._should_recycle_sandbox(pooled_sb) and
                    self._running
                )
                if should_recycle:
                    # Double-check sandbox is alive and functional before returning to pool
                    if await self._is_sandbox_alive(pooled_sb.sandbox):
                        # Additional check: try a quick execution to ensure sandbox is fully functional
                        try:
                            await asyncio.wait_for(
                                asyncio.get_event_loop().run_in_executor(
                                    None,
                                    lambda: pooled_sb.sandbox.exec("python", "-c", "import sys; print('ready')", timeout=2)
                                ),
                                timeout=3.0
                            )
                            
                            # Sandbox is healthy and functional - return to pool
                            try:
                                self._sandbox_queue.put_nowait(pooled_sb)
                                logger.debug("Returned healthy sandbox to pool")
                            except asyncio.QueueFull:
                                # Pool is full - terminate excess sandbox
                                await self._terminate_sandbox(pooled_sb.sandbox)
                                logger.debug("Pool full, terminated excess sandbox")
                        except Exception as e:
                            # Sandbox failed functional test - terminate it
                            logger.debug(f"Sandbox failed functional test, terminating: {e}")
                            await self._terminate_sandbox(pooled_sb.sandbox)
                    else:
                        # Sandbox is dead - terminate it
                        logger.debug("Sandbox is dead, terminating instead of recycling")
                        await self._terminate_sandbox(pooled_sb.sandbox)
                else:
                    # Should not recycle - terminate sandbox
                    await self._terminate_sandbox(pooled_sb.sandbox)
                    if not created_new:
                        self._stats["recycled"] += 1
                        logger.debug("Terminated sandbox (exceeded recycle criteria)")
    
    async def _create_sandbox(self) -> modal.Sandbox:
        """Create a new Modal sandbox with timeout protection."""
        try:
            # Add timeout protection for sandbox creation
            sandbox_creation = asyncio.get_event_loop().run_in_executor(
                None,
                lambda: modal.Sandbox.create(
                    app=self.app,
                    image=self.image,
                    cpu=2.0,
                    memory=1024,
                    timeout=35
                )
            )
              # Wait for sandbox creation with timeout
            sandbox = await asyncio.wait_for(sandbox_creation, timeout=120)  # 2 minute timeout
            logger.debug(f"Created new sandbox of type: {type(sandbox)}")
            return sandbox
        except asyncio.TimeoutError:
            logger.error("Sandbox creation timed out after 2 minutes")
            raise Exception("Sandbox creation timed out - Modal may be experiencing issues")
        except Exception as e:
            logger.error(f"Failed to create sandbox: {e}")
            raise
    
    async def _terminate_sandbox(self, sandbox: modal.Sandbox):
        """Safely terminate a sandbox with better error handling."""
        try:
            # Check if sandbox is still responsive before termination
            if hasattr(sandbox, '_terminated') and sandbox._terminated:
                logger.debug("Sandbox already terminated")
                return
                
            # Use asyncio timeout for termination
            await asyncio.wait_for(
                asyncio.get_event_loop().run_in_executor(None, sandbox.terminate),
                timeout=10.0  # 10 second timeout for termination
            )
            logger.debug("Terminated sandbox successfully")
        except asyncio.TimeoutError:
            logger.warning("Sandbox termination timed out - may be unresponsive")
        except Exception as e:
            # Log the error but don't fail - sandbox may already be dead
            logger.warning(f"Failed to terminate sandbox (may already be dead): {e}")
            # Mark sandbox as terminated to avoid repeated attempts
            if hasattr(sandbox, '_terminated'):
                sandbox._terminated = True
    
    def _should_recycle_sandbox(self, pooled_sb: PooledSandbox) -> bool:
        """Determine if a sandbox should be recycled back to the pool."""
        now = time.time()
        
        # Check age
        if now - pooled_sb.created_at > self.max_age_seconds:
            logger.debug("Sandbox too old, not recycling")
            return False
            
        # Check usage count
        if pooled_sb.use_count >= self.max_uses_per_sandbox:
            logger.debug("Sandbox used too many times, not recycling")
            return False
            
        # Check health (if we've checked it)
        if pooled_sb.health == SandboxHealth.UNHEALTHY:
            logger.debug("Sandbox unhealthy, not recycling")
            return False
            
        return True
    async def _warmup_pool(self):
        """Background task to maintain warm sandboxes in the pool with aggressive replenishment."""
        while self._running:
            try:
                current_size = self._sandbox_queue.qsize()
                
                # More aggressive warmup - start warming when below 90% capacity
                warmup_threshold = max(1, int(self.pool_size * 0.9))
                
                if current_size < warmup_threshold:
                    needed = self.pool_size - current_size
                    logger.info(f"Pool size ({current_size}) below threshold ({warmup_threshold}). Warming {needed} sandboxes...")
                    
                    # Create new sandboxes to fill the pool - but limit concurrent creation
                    max_concurrent = min(needed, 2)  # Don't overwhelm Modal
                    tasks = []
                    for _ in range(max_concurrent):
                        task = asyncio.create_task(self._create_and_queue_sandbox())
                        tasks.append(task)
                    
                    if tasks:
                        results = await asyncio.gather(*tasks, return_exceptions=True)
                        # Log any failures
                        successful = 0
                        for i, result in enumerate(results):
                            if isinstance(result, Exception):
                                logger.warning(f"Failed to create sandbox {i+1}/{max_concurrent}: {result}")
                            else:
                                successful += 1
                        
                        if successful > 0:
                            logger.info(f"Successfully warmed {successful}/{max_concurrent} sandboxes")
                
                # Adaptive sleep interval based on pool health
                if current_size == 0:
                    # Critical: no sandboxes available
                    sleep_interval = 1
                elif current_size < warmup_threshold:
                    # Low: need more sandboxes
                    sleep_interval = 2
                else:
                    # Healthy: normal monitoring
                    sleep_interval = 5
                    
                await asyncio.sleep(sleep_interval)
                
            except Exception as e:
                logger.error(f"Error in warmup loop: {e}")
                await asyncio.sleep(10)  # Wait longer on error
    
    async def _create_and_queue_sandbox(self):
        """Create a sandbox and add it to the queue."""
        start_time = time.time()
        try:
            # Create the sandbox
            sandbox = await self._create_sandbox()
            creation_time = time.time() - start_time
            logger.info(f"Sandbox creation took {creation_time:.2f}s")
            
            # Proactively warm up the sandbox with core imports
            warmup_start = time.time()
            await self._warmup_sandbox_imports(sandbox)
            warmup_time = time.time() - warmup_start
            logger.info(f"Sandbox warmup with imports took {warmup_time:.2f}s")
            
            pooled_sb = PooledSandbox(
                sandbox=sandbox,
                created_at=time.time(),
                last_used=time.time()
            )
            
            try:
                self._sandbox_queue.put_nowait(pooled_sb)
                total_time = time.time() - start_time
                logger.info(f"Added warm sandbox to pool (total time: {total_time:.2f}s)")
            except asyncio.QueueFull:
                # Pool is full, terminate this sandbox
                await self._terminate_sandbox(sandbox)
                
        except Exception as e:
            total_time = time.time() - start_time
            logger.error(f"Failed to create and queue sandbox after {total_time:.2f}s: {e}")

    async def _warmup_sandbox_imports(self, sandbox: modal.Sandbox):
        """Warm up sandbox by importing core packages."""
        try:
            from mcp_hub.package_utils import get_warmup_import_commands
            
            # Get warmup commands
            import_commands = get_warmup_import_commands()
            warmup_script = "; ".join(import_commands)
            
            # Execute the warmup script
            logger.debug("Running sandbox warmup imports...")
            proc = await asyncio.get_event_loop().run_in_executor(
                None,
                lambda: sandbox.exec("python", "-c", warmup_script, timeout=30)
            )
            
            # Check if warmup was successful
            if hasattr(proc, 'stdout') and hasattr(proc.stdout, 'read'):
                output = proc.stdout.read()
                if "Core packages warmed up successfully" in output:
                    logger.debug("Sandbox warmup imports completed successfully")
                else:
                    logger.warning(f"Sandbox warmup completed but output unexpected: {output}")
            else:
                logger.debug("Sandbox warmup imports completed")
                
        except Exception as e:
            logger.warning(f"Failed to warm up sandbox imports (sandbox still usable): {e}")
    async def _health_check_loop(self):
        """Background task to check sandbox health and perform proactive cleanup."""
        while self._running:
            try:
                # Perform regular health checks every interval
                await asyncio.sleep(self.health_check_interval)
                
                # First do a quick proactive cleanup
                cleaned = await self._proactive_cleanup()
                
                # Then do the full health check
                await self._perform_health_checks()
                
                # If we cleaned up sandboxes, trigger warmup
                if cleaned > 0:
                    logger.info(f"Health check cleaned {cleaned} sandboxes, pool may need warming")
                    
            except Exception as e:
                logger.error(f"Error in health check loop: {e}")
                await asyncio.sleep(10)  # Wait longer on error
    
    async def _perform_health_checks(self):
        """Perform health checks on sandboxes in the pool."""
        # This is a simplified health check - in practice you might want
        # to run a simple command to verify the sandbox is responsive
        temp_sandboxes = []
        
        # Drain the queue to check each sandbox
        while not self._sandbox_queue.empty():
            try:
                pooled_sb = self._sandbox_queue.get_nowait()
                is_healthy = await self._check_sandbox_health(pooled_sb.sandbox)
                pooled_sb.health = SandboxHealth.HEALTHY if is_healthy else SandboxHealth.UNHEALTHY
                if is_healthy:
                    temp_sandboxes.append(pooled_sb)
                else:
                    # TERMINATE unhealthy sandbox
                    await self._terminate_sandbox(pooled_sb.sandbox)
                    self._stats["recycled"] += 1
            except asyncio.QueueEmpty:
                break
        
        # Put healthy sandboxes back
        for pooled_sb in temp_sandboxes:
            try:
                self._sandbox_queue.put_nowait(pooled_sb)
            except asyncio.QueueFull:
                await self._terminate_sandbox(pooled_sb.sandbox)
        
        self._stats["health_checks"] += 1
        logger.debug(f"Health check completed. Pool size: {self._sandbox_queue.qsize()}")
    
    async def _check_sandbox_health(self, sandbox: modal.Sandbox) -> bool:
        """Check if a sandbox is healthy."""
        try:
            # Run a simple Python command to check if the sandbox is responsive
            proc = await asyncio.get_event_loop().run_in_executor(
                None,
                lambda: sandbox.exec("python", "-c", "print('health_check')", timeout=5)
            )
            output = proc.stdout.read()
            return "health_check" in output
        except Exception as e:
            logger.debug(f"Sandbox health check failed: {e}")
            return False
    
    async def _cleanup_loop(self):
        """Background task to cleanup old sandboxes."""
        while self._running:
            try:
                await asyncio.sleep(30)  # Check every 30 seconds
                await self._cleanup_old_sandboxes()
            except Exception as e:
                logger.error(f"Error in cleanup loop: {e}")
    
    async def _cleanup_old_sandboxes(self):
        """Remove old sandboxes from the pool."""
        now = time.time()
        temp_sandboxes = []
        
        while not self._sandbox_queue.empty():
            try:
                pooled_sb = self._sandbox_queue.get_nowait()
                if now - pooled_sb.created_at < self.max_age_seconds:
                    temp_sandboxes.append(pooled_sb)
                else:
                    # TERMINATE expired sandbox
                    await self._terminate_sandbox(pooled_sb.sandbox)
                    self._stats["recycled"] += 1
                    logger.debug("Cleaned up old sandbox")
            except asyncio.QueueEmpty:
                break
        
        # Put non-expired sandboxes back
        for pooled_sb in temp_sandboxes:
            try:
                self._sandbox_queue.put_nowait(pooled_sb)            
            except asyncio.QueueFull:
                await self._terminate_sandbox(pooled_sb.sandbox)

    async def _is_sandbox_alive(self, sandbox: modal.Sandbox) -> bool:
        """Check if a sandbox is alive by running a trivial command with better error handling."""
        try:
            # Check if sandbox was already marked as terminated
            if hasattr(sandbox, '_terminated') and sandbox._terminated:
                return False
                
            # Use a shorter timeout for liveness checks
            proc = await asyncio.wait_for(
                asyncio.get_event_loop().run_in_executor(
                    None,
                    lambda: sandbox.exec("python", "-c", "print('ping')", timeout=3)
                ),
                timeout=5.0  # Overall timeout
            )
            
            if hasattr(proc, "stdout") and hasattr(proc.stdout, "read"):
                out = proc.stdout.read()
                return "ping" in out
            else:
                # For some Modal versions, output might be returned directly
                out = str(proc)
                return "ping" in out
                
        except asyncio.TimeoutError:
            logger.debug("Liveness check timed out - sandbox likely dead")
            return False
        except Exception as e:
            logger.debug(f"Liveness check failed: {e}")
            # Mark sandbox as dead to avoid repeated checks
            if hasattr(sandbox, '_terminated'):
                sandbox._terminated = True
            return False
    
    async def _emergency_pool_reset(self):
        """Emergency reset of the pool when too many consecutive failures occur."""
        logger.warning("Performing emergency pool reset due to consecutive failures")
        
        # Drain and terminate all sandboxes in the pool
        terminated_count = 0
        while not self._sandbox_queue.empty():
            try:
                pooled_sb = self._sandbox_queue.get_nowait()
                await self._terminate_sandbox(pooled_sb.sandbox)
                terminated_count += 1
            except asyncio.QueueEmpty:
                break
        
        logger.info(f"Emergency reset: terminated {terminated_count} sandboxes")
        
        # Reset failure counter
        self._consecutive_failures = 0
        
        # Try to create one fresh sandbox to test if the underlying issue is resolved
        try:
            test_sandbox = await self._create_sandbox()
            test_pooled = PooledSandbox(
                sandbox=test_sandbox,
                created_at=time.time(),
                last_used=time.time(),
                use_count=0
            )            
            self._sandbox_queue.put_nowait(test_pooled)
            logger.info("Emergency reset successful: created test sandbox")
        except Exception as e:
            logger.error(f"Emergency reset failed to create test sandbox: {e}")
            # Still reset the counter to allow retries
            pass

    def get_stats(self) -> Dict[str, Any]:
        """Get pool statistics including health metrics."""
        return {
            **self._stats,
            "pool_size": self._sandbox_queue.qsize(),
            "target_pool_size": self.pool_size,
            "running": self._running,
            "consecutive_failures": self._consecutive_failures,
            "last_successful_creation": self._last_successful_creation,
            "time_since_last_success": time.time() - self._last_successful_creation,
            "health_status": "healthy" if self._consecutive_failures < 3 else "degraded" if self._consecutive_failures < self._pool_reset_threshold else "critical"
        }

    async def _proactive_cleanup(self):
        """Proactively clean up dead or unhealthy sandboxes from the pool."""
        temp_sandboxes = []
        cleaned_count = 0
        
        # Drain the queue to check each sandbox
        while not self._sandbox_queue.empty():
            try:
                pooled_sb = self._sandbox_queue.get_nowait()
                
                # Quick health check
                if await self._is_sandbox_alive(pooled_sb.sandbox):
                    # Sandbox is alive - keep it
                    temp_sandboxes.append(pooled_sb)
                else:
                    # Sandbox is dead - terminate it
                    await self._terminate_sandbox(pooled_sb.sandbox)
                    cleaned_count += 1
                    logger.debug("Cleaned up dead sandbox during proactive cleanup")
                    
            except asyncio.QueueEmpty:
                break
        
        # Put healthy sandboxes back
        for pooled_sb in temp_sandboxes:
            try:
                self._sandbox_queue.put_nowait(pooled_sb)
            except asyncio.QueueFull:
                # Shouldn't happen, but terminate if it does
                await self._terminate_sandbox(pooled_sb.sandbox)
                cleaned_count += 1
        
        if cleaned_count > 0:
            logger.info(f"Proactive cleanup removed {cleaned_count} dead sandboxes")
            
        return cleaned_count

# Helper function for testing and debugging the sandbox pool
async def test_sandbox_pool_health(pool: WarmSandboxPool) -> Dict[str, Any]:
    """Test sandbox pool health and return detailed diagnostics."""
    diagnostics: Dict[str, Any] = {
        "timestamp": time.time(),
        "pool_stats": pool.get_stats(),
        "tests": {}
    }
    
    logger.info("Starting sandbox pool health test...")
    
    # Test 1: Pool basic stats
    stats = pool.get_stats()
    diagnostics["tests"]["pool_stats"] = {
        "passed": True,
        "details": stats
    }
    
    # Test 2: Try to get a sandbox
    try:
        async with pool.get_sandbox(timeout=10.0) as sandbox:
            # Test 3: Try to run a simple command
            try:
                proc = await asyncio.get_event_loop().run_in_executor(
                    None,
                    lambda: sandbox.exec("python", "-c", "print('health_test_ok')", timeout=5)
                )
                output = proc.stdout.read() if hasattr(proc.stdout, "read") else str(proc)
                
                diagnostics["tests"]["sandbox_execution"] = {
                    "passed": "health_test_ok" in output,
                    "output": output[:200],  # First 200 chars
                    "details": "Successfully executed test command"
                }
            except Exception as e:
                diagnostics["tests"]["sandbox_execution"] = {
                    "passed": False,
                    "error": str(e),
                    "details": "Failed to execute test command in sandbox"
                }
        
        diagnostics["tests"]["sandbox_acquisition"] = {
            "passed": True,
            "details": "Successfully acquired and released sandbox"
        }
        
    except Exception as e:
        diagnostics["tests"]["sandbox_acquisition"] = {
            "passed": False,
            "error": str(e),
            "details": "Failed to acquire sandbox from pool"
        }
        
        diagnostics["tests"]["sandbox_execution"] = {
            "passed": False,
            "error": "Could not test - no sandbox available",
            "details": "Skipped due to sandbox acquisition failure"
        }
    
    # Test 4: Check pool warmup status
    if pool._running:
        warmup_needed = pool.pool_size - stats["pool_size"]
        diagnostics["tests"]["pool_warmup"] = {
            "passed": warmup_needed <= 1,  # Allow 1 sandbox to be missing
            "details": f"Pool has {stats['pool_size']}/{pool.pool_size} sandboxes, {warmup_needed} needed"
        }
    else:
        diagnostics["tests"]["pool_warmup"] = {
            "passed": False,
            "details": "Pool is not running"
        }
    
    # Overall health assessment
    all_tests_passed = all(test.get("passed", False) for test in diagnostics["tests"].values())
    diagnostics["overall_health"] = "healthy" if all_tests_passed else "unhealthy"
    
    logger.info(f"Sandbox pool health test completed. Overall health: {diagnostics['overall_health']}")
    return diagnostics