File size: 6,204 Bytes
8f7f87a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import logging
import time
import torch
import psutil # Ensure psutil is imported here as well

logger = logging.getLogger(__name__)

class EnsembleMonitorAgent:
    def __init__(self):
        self.performance_metrics = {
            "model_accuracy": {},
            "response_times": {},
            "confidence_distribution": {},
            "consensus_rate": 0.0
        }
        self.alerts = []
    
    def monitor_prediction(self, model_id, prediction, confidence, response_time):
        """Monitor individual model performance"""
        if model_id not in self.performance_metrics["model_accuracy"]:
            self.performance_metrics["model_accuracy"][model_id] = []
            self.performance_metrics["response_times"][model_id] = []
            self.performance_metrics["confidence_distribution"][model_id] = []
        
        self.performance_metrics["response_times"][model_id].append(response_time)
        self.performance_metrics["confidence_distribution"][model_id].append(confidence)
        
        # Check for performance issues
        self._check_performance_issues(model_id)
    
    def _check_performance_issues(self, model_id):
        """Check for any performance anomalies"""
        response_times = self.performance_metrics["response_times"][model_id]
        if len(response_times) > 10:
            avg_time = sum(response_times[-10:]) / 10
            if avg_time > 2.0:  # More than 2 seconds
                self.alerts.append(f"High latency detected for {model_id}: {avg_time:.2f}s")

class WeightOptimizationAgent:
    def __init__(self, weight_manager):
        self.weight_manager = weight_manager
        self.prediction_history = []  # Stores (ensemble_prediction_label, assumed_actual_label)
        self.optimization_threshold = 0.05  # 5% change in accuracy triggers optimization
        self.min_history_for_optimization = 20 # Minimum samples before optimizing
    
    def analyze_performance(self, ensemble_prediction_label, actual_label=None):
        """Analyze ensemble performance and record for optimization"""
        # If actual_label is not provided, assume ensemble is correct if not UNCERTAIN
        assumed_actual_label = actual_label
        if assumed_actual_label is None and ensemble_prediction_label != "UNCERTAIN":
            assumed_actual_label = ensemble_prediction_label
        
        self.prediction_history.append((ensemble_prediction_label, assumed_actual_label))
        
        if len(self.prediction_history) >= self.min_history_for_optimization and self._should_optimize():
            self._optimize_weights()
    
    def _calculate_accuracy(self, history_subset):
        """Calculates accuracy based on history where actual_label is known."""
        correct_predictions = 0
        total_known = 0
        for ensemble_pred, actual_label in history_subset:
            if actual_label is not None:
                total_known += 1
                if ensemble_pred == actual_label:
                    correct_predictions += 1
        return correct_predictions / total_known if total_known > 0 else 0.0

    def _should_optimize(self):
        """Determine if weights should be optimized based on recent performance change."""
        if len(self.prediction_history) < self.min_history_for_optimization * 2: # Need enough history for comparison
            return False
        
        # Compare accuracy of recent batch with previous batch
        recent_batch = self.prediction_history[-self.min_history_for_optimization:]
        previous_batch = self.prediction_history[-self.min_history_for_optimization*2:-self.min_history_for_optimization]
        
        recent_accuracy = self._calculate_accuracy(recent_batch)
        previous_accuracy = self._calculate_accuracy(previous_batch)
        
        # Trigger optimization if there's a significant drop in accuracy
        if previous_accuracy > 0 and (previous_accuracy - recent_accuracy) / previous_accuracy > self.optimization_threshold:
            logger.warning(f"Performance degradation detected (from {previous_accuracy:.2f} to {recent_accuracy:.2f}). Triggering weight optimization.")
            return True
        return False
    
    def _optimize_weights(self):
        """Optimize model weights based on performance."""
        logger.info("Optimizing model weights based on recent performance.")
        # Placeholder for sophisticated optimization logic.
        # This is where you would adjust self.weight_manager.base_weights
        # based on which models contributed more to correct predictions or errors.
        # For now, it's just a log message.


class SystemHealthAgent:
    def __init__(self):
        self.health_metrics = {
            "memory_usage": [],
            "gpu_utilization": [],
            "model_load_times": {},
            "error_rates": {}
        }
    
    def monitor_system_health(self):
        """Monitor overall system health"""
        self._check_memory_usage()
        self._check_gpu_utilization()
        # You might add _check_model_health() here later
    
    def _check_memory_usage(self):
        """Monitor memory usage"""
        try:
            import psutil
            memory = psutil.virtual_memory()
            self.health_metrics["memory_usage"].append(memory.percent)
            
            if memory.percent > 90:
                logger.warning(f"High memory usage detected: {memory.percent}%")
        except ImportError:
            logger.warning("psutil not installed. Cannot monitor memory usage.")
    
    def _check_gpu_utilization(self):
        """Monitor GPU utilization if available"""
        if torch.cuda.is_available():
            try:
                gpu_util = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated()
                self.health_metrics["gpu_utilization"].append(gpu_util)
                
                if gpu_util > 0.9:
                    logger.warning(f"High GPU utilization detected: {gpu_util*100:.2f}%")
            except Exception as e:
                logger.warning(f"Error monitoring GPU utilization: {e}")
        else:
            logger.info("CUDA not available. Skipping GPU utilization monitoring.")