LPX
refactor: reorganize agent structure by moving models to agents directory, update logging level, and enhance .gitignore for model files
c1d03da
import logging | |
import time | |
import torch | |
import psutil # Ensure psutil is imported here as well | |
logger = logging.getLogger(__name__) | |
class EnsembleMonitorAgent: | |
def __init__(self): | |
self.performance_metrics = { | |
"model_accuracy": {}, | |
"response_times": {}, | |
"confidence_distribution": {}, | |
"consensus_rate": 0.0 | |
} | |
self.alerts = [] | |
def monitor_prediction(self, model_id, prediction, confidence, response_time): | |
"""Monitor individual model performance""" | |
if model_id not in self.performance_metrics["model_accuracy"]: | |
self.performance_metrics["model_accuracy"][model_id] = [] | |
self.performance_metrics["response_times"][model_id] = [] | |
self.performance_metrics["confidence_distribution"][model_id] = [] | |
self.performance_metrics["response_times"][model_id].append(response_time) | |
self.performance_metrics["confidence_distribution"][model_id].append(confidence) | |
# Check for performance issues | |
self._check_performance_issues(model_id) | |
def _check_performance_issues(self, model_id): | |
"""Check for any performance anomalies""" | |
response_times = self.performance_metrics["response_times"][model_id] | |
if len(response_times) > 10: | |
avg_time = sum(response_times[-10:]) / 10 | |
if avg_time > 2.0: # More than 2 seconds | |
self.alerts.append(f"High latency detected for {model_id}: {avg_time:.2f}s") | |
class WeightOptimizationAgent: | |
def __init__(self, weight_manager): | |
self.weight_manager = weight_manager | |
self.prediction_history = [] # Stores (ensemble_prediction_label, assumed_actual_label) | |
self.optimization_threshold = 0.05 # 5% change in accuracy triggers optimization | |
self.min_history_for_optimization = 20 # Minimum samples before optimizing | |
def analyze_performance(self, ensemble_prediction_label, actual_label=None): | |
"""Analyze ensemble performance and record for optimization""" | |
# If actual_label is not provided, assume ensemble is correct if not UNCERTAIN | |
assumed_actual_label = actual_label | |
if assumed_actual_label is None and ensemble_prediction_label != "UNCERTAIN": | |
assumed_actual_label = ensemble_prediction_label | |
self.prediction_history.append((ensemble_prediction_label, assumed_actual_label)) | |
if len(self.prediction_history) >= self.min_history_for_optimization and self._should_optimize(): | |
self._optimize_weights() | |
def _calculate_accuracy(self, history_subset): | |
"""Calculates accuracy based on history where actual_label is known.""" | |
correct_predictions = 0 | |
total_known = 0 | |
for ensemble_pred, actual_label in history_subset: | |
if actual_label is not None: | |
total_known += 1 | |
if ensemble_pred == actual_label: | |
correct_predictions += 1 | |
return correct_predictions / total_known if total_known > 0 else 0.0 | |
def _should_optimize(self): | |
"""Determine if weights should be optimized based on recent performance change.""" | |
if len(self.prediction_history) < self.min_history_for_optimization * 2: # Need enough history for comparison | |
return False | |
# Compare accuracy of recent batch with previous batch | |
recent_batch = self.prediction_history[-self.min_history_for_optimization:] | |
previous_batch = self.prediction_history[-self.min_history_for_optimization*2:-self.min_history_for_optimization] | |
recent_accuracy = self._calculate_accuracy(recent_batch) | |
previous_accuracy = self._calculate_accuracy(previous_batch) | |
# Trigger optimization if there's a significant drop in accuracy | |
if previous_accuracy > 0 and (previous_accuracy - recent_accuracy) / previous_accuracy > self.optimization_threshold: | |
logger.warning(f"Performance degradation detected (from {previous_accuracy:.2f} to {recent_accuracy:.2f}). Triggering weight optimization.") | |
return True | |
return False | |
def _optimize_weights(self): | |
"""Optimize model weights based on performance.""" | |
logger.info("Optimizing model weights based on recent performance.") | |
# Placeholder for sophisticated optimization logic. | |
# This is where you would adjust self.weight_manager.base_weights | |
# based on which models contributed more to correct predictions or errors. | |
# For now, it's just a log message. | |
class SystemHealthAgent: | |
def __init__(self): | |
self.health_metrics = { | |
"memory_usage": [], | |
"gpu_utilization": [], | |
"model_load_times": {}, | |
"error_rates": {} | |
} | |
def monitor_system_health(self): | |
"""Monitor overall system health""" | |
self._check_memory_usage() | |
self._check_gpu_utilization() | |
# You might add _check_model_health() here later | |
def _check_memory_usage(self): | |
"""Monitor memory usage""" | |
try: | |
import psutil | |
memory = psutil.virtual_memory() | |
self.health_metrics["memory_usage"].append(memory.percent) | |
if memory.percent > 90: | |
logger.warning(f"High memory usage detected: {memory.percent}%") | |
except ImportError: | |
logger.warning("psutil not installed. Cannot monitor memory usage.") | |
def _check_gpu_utilization(self): | |
"""Monitor GPU utilization if available""" | |
if torch.cuda.is_available(): | |
try: | |
gpu_util = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated() | |
self.health_metrics["gpu_utilization"].append(gpu_util) | |
if gpu_util > 0.9: | |
logger.warning(f"High GPU utilization detected: {gpu_util*100:.2f}%") | |
except Exception as e: | |
logger.warning(f"Error monitoring GPU utilization: {e}") | |
else: | |
logger.info("CUDA not available. Skipping GPU utilization monitoring.") |