diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,3554 +1,36 @@ -import requests -import pandas as pd -import gradio as gr -import plotly.graph_objects as go -import plotly.express as px -from plotly.subplots import make_subplots -from datetime import datetime, timedelta -import json -# Commenting out blockchain-related imports that cause loading issues -# from web3 import Web3 -import os -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -import random -import logging -from typing import List, Dict, Any, Optional -# Comment out the import for now and replace with dummy functions -# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations -# Import APR vs agent hash visualization functions -from apr_vs_agent_hash import generate_apr_vs_agent_hash_visualizations -# APR visualization functions integrated directly - -# Set up logging with appropriate verbosity -logging.basicConfig( - level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity - format="%(asctime)s - %(levelname)s - %(message)s", - handlers=[ - logging.FileHandler("app_debug.log"), # Log to file for persistence - logging.StreamHandler() # Also log to console - ] -) -logger = logging.getLogger(__name__) - -# Reduce third-party library logging -logging.getLogger("urllib3").setLevel(logging.WARNING) -logging.getLogger("httpx").setLevel(logging.WARNING) -logging.getLogger("matplotlib").setLevel(logging.WARNING) - -# Log the startup information -logger.info("============= APPLICATION STARTING =============") -logger.info(f"Running from directory: {os.getcwd()}") - -# Global variables to store the data for reuse -global_df = None -global_roi_df = None -global_volume_df = None - -# Configuration -API_BASE_URL = "https://afmdb.autonolas.tech" -logger.info(f"Using API endpoint: {API_BASE_URL}") - -def get_agent_type_by_name(type_name: str) -> Dict[str, Any]: - """Get agent type by name""" - url = f"{API_BASE_URL}/api/agent-types/name/{type_name}" - logger.debug(f"Calling API: {url}") - - try: - response = requests.get(url) - logger.debug(f"Response status: {response.status_code}") - - if response.status_code == 404: - logger.error(f"Agent type '{type_name}' not found") - return None - - response.raise_for_status() - result = response.json() - logger.debug(f"Agent type response: {result}") - return result - except Exception as e: - logger.error(f"Error in get_agent_type_by_name: {e}") - return None - -def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]: - """Get attribute definition by name""" - url = f"{API_BASE_URL}/api/attributes/name/{attr_name}" - logger.debug(f"Calling API: {url}") - - try: - response = requests.get(url) - logger.debug(f"Response status: {response.status_code}") - - if response.status_code == 404: - logger.error(f"Attribute definition '{attr_name}' not found") - return None - - response.raise_for_status() - result = response.json() - logger.debug(f"Attribute definition response: {result}") - return result - except Exception as e: - logger.error(f"Error in get_attribute_definition_by_name: {e}") - return None - -def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]: - """Get all agents of a specific type""" - url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/" - logger.debug(f"Calling API: {url}") - - try: - response = requests.get(url) - logger.debug(f"Response status: {response.status_code}") - - if response.status_code == 404: - logger.error(f"No agents found for type ID {type_id}") - return [] - - response.raise_for_status() - result = response.json() - logger.debug(f"Agents count: {len(result)}") - logger.debug(f"First few agents: {result[:2] if result else []}") - return result - except Exception as e: - logger.error(f"Error in get_agents_by_type: {e}") - return [] - -def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]: - """Get all attribute values for a specific attribute definition across all agents of a given list""" - all_attributes = [] - logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}") - - # For each agent, get their attributes and filter for the one we want - for agent in agents: - agent_id = agent["agent_id"] - - # Call the /api/agents/{agent_id}/attributes/ endpoint - url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/" - logger.debug(f"Calling API for agent {agent_id}: {url}") - - try: - response = requests.get(url, params={"limit": 1000}) - - if response.status_code == 404: - logger.error(f"No attributes found for agent ID {agent_id}") - continue - - response.raise_for_status() - agent_attrs = response.json() - logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes") - - # Filter for the specific attribute definition ID - filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id] - logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes") - - if filtered_attrs: - logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}") - - all_attributes.extend(filtered_attrs) - except requests.exceptions.RequestException as e: - logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}") - - logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}") - return all_attributes - -def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str: - """Get agent name from agent ID""" - for agent in agents: - if agent["agent_id"] == agent_id: - return agent["agent_name"] - return "Unknown" - -def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]: - """Extract APR value, adjusted APR value, ROI value, volume, and timestamp from JSON value""" - try: - agent_id = attr.get("agent_id", "unknown") - logger.debug(f"Extracting APR value for agent {agent_id}") - - # The APR value is stored in the json_value field - if attr["json_value"] is None: - logger.debug(f"Agent {agent_id}: json_value is None") - return {"apr": None, "adjusted_apr": None, "roi": None, "volume": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False} - - # If json_value is a string, parse it - if isinstance(attr["json_value"], str): - logger.debug(f"Agent {agent_id}: json_value is string, parsing") - json_data = json.loads(attr["json_value"]) - else: - json_data = attr["json_value"] - - apr = json_data.get("apr") - adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present - timestamp = json_data.get("timestamp") - volume = json_data.get("volume") # Extract volume if present - - # Extract ROI (f_i_ratio) from calculation_metrics if it exists - roi = None - if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None: - roi = json_data["calculation_metrics"].get("f_i_ratio") - - # Try to extract volume from portfolio_snapshot if it's not directly in json_data - if volume is None and "portfolio_snapshot" in json_data and json_data["portfolio_snapshot"] is not None: - portfolio = json_data["portfolio_snapshot"].get("portfolio") - if portfolio and isinstance(portfolio, dict): - volume = portfolio.get("volume") - - # Extract agent_hash from json_data or portfolio_snapshot - agent_hash = json_data.get("agent_hash") - if agent_hash is None and "portfolio_snapshot" in json_data and json_data["portfolio_snapshot"] is not None: - portfolio = json_data["portfolio_snapshot"].get("portfolio") - if portfolio and isinstance(portfolio, dict): - agent_hash = portfolio.get("agent_hash") - - logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, volume: {volume}, timestamp: {timestamp}, agent_hash: {agent_hash}") - - # Convert timestamp to datetime if it exists - timestamp_dt = None - if timestamp: - timestamp_dt = datetime.fromtimestamp(timestamp) - - result = { - "apr": apr, - "adjusted_apr": adjusted_apr, - "roi": roi, - "volume": volume, - "timestamp": timestamp_dt, - "agent_id": agent_id, - "agent_hash": agent_hash, - "is_dummy": False - } - logger.debug(f"Agent {agent_id}: Extracted result: {result}") - return result - except (json.JSONDecodeError, KeyError, TypeError) as e: - logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}") - logger.error(f"Problematic json_value: {attr.get('json_value')}") - return {"apr": None, "adjusted_apr": None, "roi": None, "volume": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False} - -def fetch_apr_data_from_db(): - """ - Fetch APR data from database using the API. - """ - global global_df - global global_roi_df - - logger.info("==== Starting APR data fetch ====") - - try: - # Step 1: Find the Modius agent type - logger.info("Finding Modius agent type") - modius_type = get_agent_type_by_name("Modius") - if not modius_type: - logger.error("Modius agent type not found, using placeholder data") - global_df = pd.DataFrame([]) - return global_df - - type_id = modius_type["type_id"] - logger.info(f"Found Modius agent type with ID: {type_id}") - - # Step 2: Find the APR attribute definition - logger.info("Finding APR attribute definition") - apr_attr_def = get_attribute_definition_by_name("APR") - if not apr_attr_def: - logger.error("APR attribute definition not found, using placeholder data") - global_df = pd.DataFrame([]) - return global_df - - attr_def_id = apr_attr_def["attr_def_id"] - logger.info(f"Found APR attribute definition with ID: {attr_def_id}") - - # Step 3: Get all agents of type Modius - logger.info(f"Getting all agents of type Modius (type_id: {type_id})") - modius_agents = get_agents_by_type(type_id) - if not modius_agents: - logger.error("No agents of type 'Modius' found") - global_df = pd.DataFrame([]) - return global_df - - logger.info(f"Found {len(modius_agents)} Modius agents") - logger.debug(f"Modius agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in modius_agents]}") - - # Step 4: Fetch all APR values for Modius agents - logger.info(f"Fetching APR values for all Modius agents (attr_def_id: {attr_def_id})") - apr_attributes = get_attribute_values_by_type_and_attr(modius_agents, attr_def_id) - if not apr_attributes: - logger.error("No APR values found for 'Modius' agents") - global_df = pd.DataFrame([]) - return global_df - - logger.info(f"Found {len(apr_attributes)} APR attributes total") - - # Step 5: Extract APR and ROI data - logger.info("Extracting APR and ROI data from attributes") - apr_data_list = [] - roi_data_list = [] - - for attr in apr_attributes: - data = extract_apr_value(attr) - if data["timestamp"] is not None: - # Get agent name - agent_name = get_agent_name(attr["agent_id"], modius_agents) - # Add agent name to the data - data["agent_name"] = agent_name - # Add is_dummy flag (all real data) - data["is_dummy"] = False - - # Process APR data - if data["apr"] is not None: - # Include all APR values (including negative ones) EXCEPT zero and -100 - if data["apr"] != 0 and data["apr"] != -100: - apr_entry = data.copy() - apr_entry["metric_type"] = "APR" - logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}") - # Add to the APR data list - apr_data_list.append(apr_entry) - else: - # Log that we're skipping zero or -100 values - logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)") - - # Process ROI data - if data["roi"] is not None: - # Include all ROI values except extreme outliers - if data["roi"] > -10 and data["roi"] < 10: # Filter extreme outliers - roi_entry = { - "roi": data["roi"], - "timestamp": data["timestamp"], - "agent_id": data["agent_id"], - "agent_name": agent_name, - "is_dummy": False, - "metric_type": "ROI" - } - logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}") - # Add to the ROI data list - roi_data_list.append(roi_entry) - else: - # Log that we're skipping extreme outlier values - logger.debug(f"Skipping ROI value for agent {agent_name} ({attr['agent_id']}): {data['roi']} (extreme outlier)") - - logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points") - - # Added debug for adjusted APR data after May 10th - may_10_2025 = datetime(2025, 5, 10) - after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025] - with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None] - - logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}") - logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}") - - # Log detailed information about when data began - first_adjusted = None - if with_adjusted_after_may_10: - first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp']) - logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})") - - # Check all data for first adjusted_apr - all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None] - if all_with_adjusted: - first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp']) - logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})") - last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp']) - logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})") - - # Calculate overall coverage - adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100 - logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)") - - # Log per-agent adjusted APR statistics - agent_stats = {} - for record in apr_data_list: - agent_id = record['agent_id'] - has_adjusted = record['adjusted_apr'] is not None - - if agent_id not in agent_stats: - agent_stats[agent_id] = {'total': 0, 'adjusted': 0} - - agent_stats[agent_id]['total'] += 1 - if has_adjusted: - agent_stats[agent_id]['adjusted'] += 1 - - # Log stats for agents with meaningful data - for agent_id, stats in agent_stats.items(): - if stats['total'] > 0: - coverage = (stats['adjusted'] / stats['total']) * 100 - if coverage > 0: # Only log agents that have at least some adjusted data - logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)") - - # Check for gaps in adjusted APR data - for agent_id in agent_stats: - # Get all records for this agent - agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id] - # Sort by timestamp - agent_records.sort(key=lambda x: x['timestamp']) - - # Find where adjusted APR starts and if there are gaps - has_adjusted = False - gap_count = 0 - streak_length = 0 - for record in agent_records: - if record['adjusted_apr'] is not None: - if not has_adjusted: - has_adjusted = True - logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}") - streak_length += 1 - elif has_adjusted: - # We had adjusted data but now it's missing - gap_count += 1 - if streak_length > 0: - logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records") - streak_length = 0 - - if gap_count > 0: - logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data") - elif has_adjusted: - logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps") - - # Provide summary statistics - agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0) - agents_with_gaps = sum(1 for agent_id in agent_stats if - any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and - i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and - apr_data_list[i+1]['adjusted_apr'] is None - for i in range(len(apr_data_list)-1))) - - logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data") - if agents_with_gaps > 0: - logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data") - logger.warning("These gaps may cause discontinuities in the adjusted APR graph") - else: - logger.info("No gaps detected in adjusted APR data - graph should be continuous") - - if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0: - logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data") - - # Log agent IDs with missing adjusted_apr after May 10th - agents_after_may_10 = set(d['agent_id'] for d in after_may_10) - logger.info(f"Agents with data after May 10th: {agents_after_may_10}") - - # Check these same agents before May 10th - before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025] - agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None} - - # Agents that had adjusted_apr before but not after - missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10) - if missing_adjusted: - logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}") - - # Find the last valid adjusted_apr date for these agents - for agent_id in missing_adjusted: - agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None] - if agent_data: - last_entry = max(agent_data, key=lambda d: d['timestamp']) - logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}") - - # Look at the first entry after the cutoff without adjusted_apr - agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id] - if agent_after: - first_after = min(agent_after, key=lambda d: d['timestamp']) - logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr") - - # If the agent data has the 'adjusted_apr_key' field, log that info - if 'adjusted_apr_key' in first_after: - logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}") - - # Add debug logic to check for any adjusted_apr after May 10th and which agents have it - elif len(with_adjusted_after_may_10) > 0: - logger.info("Found adjusted_apr values after May 10th, 2025") - - # Group by agent and log - agent_counts = {} - for item in with_adjusted_after_may_10: - agent_id = item['agent_id'] - if agent_id in agent_counts: - agent_counts[agent_id] += 1 - else: - agent_counts[agent_id] = 1 - - logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}") - - # Log adjusted_apr keys used - keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item} - if keys_used: - logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}") - - # Convert to DataFrames - if not apr_data_list: - logger.error("No valid APR data extracted") - global_df = pd.DataFrame([]) - else: - # Convert list of dictionaries to DataFrame for APR - global_df = pd.DataFrame(apr_data_list) - - if not roi_data_list: - logger.error("No valid ROI data extracted") - global_roi_df = pd.DataFrame([]) - else: - # Convert list of dictionaries to DataFrame for ROI - global_roi_df = pd.DataFrame(roi_data_list) - - # Log the resulting dataframe - logger.info(f"Created DataFrame with {len(global_df)} rows") - logger.info(f"DataFrame columns: {global_df.columns.tolist()}") - logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}") - - # Log adjusted APR statistics if available - if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any(): - logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}") - logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}") - - # Log the difference between APR and adjusted APR - valid_rows = global_df[global_df['adjusted_apr'].notna()] - if not valid_rows.empty: - avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean() - max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max() - min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min() - logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}") - - # All values are APR type (excluding zero and -100 values) - logger.info("All values are APR type (excluding zero and -100 values)") - logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}") - - # Log the entire dataframe for debugging - logger.debug("Final DataFrame contents:") - for idx, row in global_df.iterrows(): - logger.debug(f"Row {idx}: {row.to_dict()}") - - # Add this at the end, right before returning - logger.info("Analyzing adjusted_apr data availability...") - log_adjusted_apr_availability(global_df) - - return global_df, global_roi_df - - except requests.exceptions.RequestException as e: - logger.error(f"API request error: {e}") - global_df = pd.DataFrame([]) - global_roi_df = pd.DataFrame([]) - return global_df, global_roi_df - except Exception as e: - logger.error(f"Error fetching APR data: {e}") - logger.exception("Exception traceback:") - global_df = pd.DataFrame([]) - global_roi_df = pd.DataFrame([]) - return global_df, global_roi_df - -def log_adjusted_apr_availability(df): - """ - Analyzes and logs detailed information about adjusted_apr data availability. - - Args: - df: DataFrame containing the APR data with adjusted_apr column - """ - if df.empty or 'adjusted_apr' not in df.columns: - logger.warning("No adjusted_apr data available for analysis") - return - - # Get only rows with valid adjusted_apr values - has_adjusted = df[df['adjusted_apr'].notna()] - - if has_adjusted.empty: - logger.warning("No valid adjusted_apr values found in the dataset") - return - - # 1. When did adjusted_apr data start? - first_adjusted = has_adjusted['timestamp'].min() - last_adjusted = has_adjusted['timestamp'].max() - logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}") - logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}") - logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days") - - # Calculate coverage percentage - total_records = len(df) - records_with_adjusted = len(has_adjusted) - coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0 - logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)") - - # 2. How many agents are providing adjusted_apr? - agents_with_adjusted = has_adjusted['agent_id'].unique() - logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr") - logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}") - - # 3. May 10th cutoff analysis - may_10_2025 = datetime(2025, 5, 10) - before_cutoff = df[df['timestamp'] < may_10_2025] - after_cutoff = df[df['timestamp'] >= may_10_2025] - - if not before_cutoff.empty and not after_cutoff.empty: - before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum() - before_pct = (before_with_adjusted / len(before_cutoff)) * 100 - - after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum() - after_pct = (after_with_adjusted / len(after_cutoff)) * 100 - - logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)") - logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)") - - # Check which agents had data before and after - agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) - agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) - - missing_after = agents_before - agents_after - if missing_after: - logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}") - - new_after = agents_after - agents_before - if new_after: - logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}") - - # 4. Find date ranges for missing adjusted_apr - # Group by agent to analyze per-agent data availability - logger.info("=== DETAILED AGENT ANALYSIS ===") - for agent_id in df['agent_id'].unique(): - agent_data = df[df['agent_id'] == agent_id] - agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}" - - # Get the valid adjusted_apr values for this agent - agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()] - - if agent_adjusted.empty: - logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available") - continue - - # Get the date range for this agent's data - agent_start = agent_data['timestamp'].min() - agent_end = agent_data['timestamp'].max() - - # Get the date range for adjusted_apr data - adjusted_start = agent_adjusted['timestamp'].min() - adjusted_end = agent_adjusted['timestamp'].max() - - total_agent_records = len(agent_data) - agent_with_adjusted = len(agent_adjusted) - coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0 - - logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)") - logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}") - logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}") - - # Calculate if this agent had data before/after May 10th - if not before_cutoff.empty and not after_cutoff.empty: - agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id] - agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id] - - has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any() - has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any() - - if has_before and not has_after: - last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max() - logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}") - elif not has_before and has_after: - first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min() - logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}") - - # Check for gaps in adjusted_apr (periods of 24+ hours without data) - if len(agent_adjusted) < 2: - continue - - # Sort by timestamp - sorted_data = agent_adjusted.sort_values('timestamp') - - # Calculate time differences between consecutive data points - time_diffs = sorted_data['timestamp'].diff() - - # Find gaps larger than 24 hours - gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)] - - if not gaps.empty: - logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data") - - # Log the gaps - for i, row in gaps.iterrows(): - # Find the previous timestamp before the gap - prev_idx = sorted_data.index.get_loc(i) - 1 - prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None - - if prev_time: - gap_start = prev_time - gap_end = row['timestamp'] - gap_duration = gap_end - gap_start - logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)") - -def generate_apr_visualizations(): - """Generate APR visualizations with real data only (no dummy data)""" - global global_df - - # Fetch data from database - df, _ = fetch_apr_data_from_db() - - # If we got no data at all, return placeholder figures - if df.empty: - logger.info("No APR data available. Using fallback visualization.") - # Create empty visualizations with a message using Plotly - fig = go.Figure() - fig.add_annotation( - x=0.5, y=0.5, - text="No APR data available", - font=dict(size=20), - showarrow=False - ) - fig.update_layout( - xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) - ) - - # Save as static file for reference - fig.write_html("modius_apr_combined_graph.html") - fig.write_image("modius_apr_combined_graph.png") - - csv_file = None - return fig, csv_file - - # No longer generating dummy data - # Set global_df for access by other functions - global_df = df - - # Save to CSV before creating visualizations - csv_file = save_to_csv(df) - - # Only create combined time series graph - combined_fig = create_combined_time_series_graph(df) - - return combined_fig, csv_file - -def generate_volume_visualizations(): - """Generate volume visualizations with real data only (no dummy data)""" - global global_df - global global_volume_df - - # Use the existing APR data which already contains volume - if global_df is None or global_df.empty: - df, _ = fetch_apr_data_from_db() - else: - df = global_df - - # Filter for records with volume data - volume_df = df[df['volume'].notna()].copy() - - # Set global_volume_df for access by other functions - global_volume_df = volume_df - - # If we got no data at all, return placeholder figures - if volume_df.empty: - logger.info("No volume data available. Using fallback visualization.") - # Create empty visualizations with a message using Plotly - fig = go.Figure() - fig.add_annotation( - x=0.5, y=0.5, - text="No volume data available", - font=dict(size=20), - showarrow=False - ) - fig.update_layout( - xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) - ) - - # Save as static file for reference - fig.write_html("modius_volume_graph.html") - fig.write_image("modius_volume_graph.png") - - csv_file = None - return fig, csv_file - - # Save to CSV before creating visualizations - csv_file = save_volume_to_csv(volume_df) - - # Create combined time series graph for volume - combined_fig = create_combined_volume_time_series_graph(volume_df) - - return combined_fig, csv_file - -def save_volume_to_csv(df): - """Save the volume data DataFrame to a CSV file and return the file path""" - if df.empty: - logger.error("No volume data to save to CSV") - return None - - # Define the CSV file path - csv_file = "modius_volume_values.csv" - - # Save to CSV - df.to_csv(csv_file, index=False) - logger.info(f"Volume data saved to {csv_file}") - - return csv_file - -def create_combined_volume_time_series_graph(df): - """Create a time series graph showing volume values across all agents""" - if len(df) == 0: - logger.error("No data to plot combined volume graph") - fig = go.Figure() - fig.add_annotation( - text="No volume data available", - x=0.5, y=0.5, - showarrow=False, font=dict(size=20) - ) - return fig - - # IMPORTANT: Force data types to ensure consistency - df['volume'] = df['volume'].astype(float) # Ensure volume is float - - # Get min and max time for shapes - min_time = df['timestamp'].min() - max_time = df['timestamp'].max() - - # Use the actual start date from the data - x_start_date = min_time - - # CRITICAL: Log the exact dataframe we're using for plotting to help debug - logger.info(f"Volume Graph data - shape: {df.shape}, columns: {df.columns}") - logger.info(f"Volume Graph data - unique agents: {df['agent_name'].unique().tolist()}") - logger.info(f"Volume Graph data - min volume: {df['volume'].min()}, max volume: {df['volume'].max()}") - - # Export full dataframe to CSV for debugging - debug_csv = "debug_volume_data.csv" - df.to_csv(debug_csv) - logger.info(f"Exported volume graph data to {debug_csv} for debugging") - - # Create Plotly figure in a clean state - fig = go.Figure() - - # Add background shape for volume region - fig.add_shape( - type="rect", - fillcolor="rgba(230, 243, 255, 0.3)", - line=dict(width=0), - y0=0, y1=df['volume'].max() * 1.1, # Use a reasonable upper limit for volume - x0=min_time, x1=max_time, - layer="below" - ) - - # Add zero line - fig.add_shape( - type="line", - line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, - x0=min_time, x1=max_time - ) - - # Group by timestamp and calculate mean volume - avg_volume_data = df.groupby('timestamp')['volume'].mean().reset_index() - - # Sort by timestamp - avg_volume_data = avg_volume_data.sort_values('timestamp') - - # Log the average volume data - logger.info(f"Calculated average volume data with {len(avg_volume_data)} points") - for idx, row in avg_volume_data.iterrows(): - logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_volume={row['volume']}") - - # Calculate moving average based on a time window (3 days) - # Sort data by timestamp - df_sorted = df.sort_values('timestamp') - - # Create a new dataframe for the moving average - avg_volume_data_with_ma = avg_volume_data.copy() - avg_volume_data_with_ma['moving_avg'] = None # Initialize the moving average column - - # Define the time window for the moving average (3 days) - time_window = pd.Timedelta(days=3) - logger.info(f"Calculating moving average with time window of {time_window}") - - # Calculate the moving averages for each timestamp - for i, row in avg_volume_data_with_ma.iterrows(): - current_time = row['timestamp'] - window_start = current_time - time_window - - # Get all data points within the 3-day time window - window_data = df_sorted[ - (df_sorted['timestamp'] >= window_start) & - (df_sorted['timestamp'] <= current_time) - ] - - # Calculate the average volume for the 3-day time window - if not window_data.empty: - avg_volume_data_with_ma.at[i, 'moving_avg'] = window_data['volume'].mean() - logger.debug(f"Volume time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['volume'].mean()}") - else: - # If no data points in the window, use the current value - avg_volume_data_with_ma.at[i, 'moving_avg'] = row['volume'] - logger.debug(f"No data points in time window for {current_time}, using current value {row['volume']}") - - logger.info(f"Calculated time-based moving averages with {len(avg_volume_data_with_ma)} points") - - # Find the last date where we have valid moving average data - last_valid_ma_date = avg_volume_data_with_ma[avg_volume_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_volume_data_with_ma['moving_avg'].dropna().empty else None - - # If we don't have any valid moving average data, use the max time from the original data - last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max() - - logger.info(f"Last valid moving average date: {last_valid_ma_date}") - logger.info(f"Using last valid date for graph: {last_valid_date}") - - # Plot individual agent data points with agent names in hover, but limit display for scalability - if not df.empty: - # Group by agent to use different colors for each agent - unique_agents = df['agent_name'].unique() - colors = px.colors.qualitative.Plotly[:len(unique_agents)] - - # Create a color map for agents - color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} - - # Calculate the total number of data points per agent to determine which are most active - agent_counts = df['agent_name'].value_counts() - - # Determine how many agents to show individually (limit to top 5 most active) - MAX_VISIBLE_AGENTS = 5 - top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() - - logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") - - # Add data points for each agent, but only make top agents visible by default - for agent_name in unique_agents: - agent_data = df[df['agent_name'] == agent_name] - - # Explicitly convert to Python lists - x_values = agent_data['timestamp'].tolist() - y_values = agent_data['volume'].tolist() - - # Change default visibility to False to hide all agent data points - is_visible = False - - # Add data points as markers for volume - fig.add_trace( - go.Scatter( - x=x_values, - y=y_values, - mode='markers', # Only markers for original data - marker=dict( - color=color_map[agent_name], - symbol='circle', - size=10, - line=dict(width=1, color='black') - ), - name=f'Agent: {agent_name} (Volume)', - hovertemplate='Time: %{x}
Volume: %{y:.2f}
Agent: ' + agent_name + '', - visible=is_visible # All agents hidden by default - ) - ) - logger.info(f"Added volume data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") - - # Add volume moving average as a smooth line - x_values_ma = avg_volume_data_with_ma['timestamp'].tolist() - y_values_ma = avg_volume_data_with_ma['moving_avg'].tolist() - - # Create hover template for the volume moving average line - hover_data_volume = [] - for idx, row in avg_volume_data_with_ma.iterrows(): - timestamp = row['timestamp'] - # Format timestamp to show only up to seconds (not milliseconds) - formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S') - - # Calculate number of active agents in the last 24 hours - time_24h_ago = timestamp - pd.Timedelta(hours=24) - active_agents = len(df[(df['timestamp'] >= time_24h_ago) & - (df['timestamp'] <= timestamp)]['agent_id'].unique()) - - hover_data_volume.append( - f"Time: {formatted_timestamp}
Avg Volume (3d window): {row['moving_avg']:.2f}
Active agents (24h): {active_agents}" - ) - - fig.add_trace( - go.Scatter( - x=x_values_ma, - y=y_values_ma, - mode='lines', # Only lines for moving average - line=dict(color='purple', width=2), # Purple line for volume - name='Average Volume (3d window)', - hovertext=hover_data_volume, - hoverinfo='text', - visible=True # Visible by default - ) - ) - logger.info(f"Added 3-day moving average volume trace with {len(x_values_ma)} points") - - # Update layout - fig.update_layout( - title=dict( - text="Modius Agents Volume", - font=dict( - family="Arial, sans-serif", - size=22, - color="black", - weight="bold" - ) - ), - xaxis_title=None, # Remove x-axis title to use annotation instead - yaxis_title=None, # Remove the y-axis title as we'll use annotations instead - template="plotly_white", - height=600, # Reduced height for better fit on smaller screens - autosize=True, # Enable auto-sizing for responsiveness - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1, - groupclick="toggleitem" - ), - margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title - hovermode="closest" - ) - - # Add single annotation for y-axis - fig.add_annotation( - x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels - y=df['volume'].max() / 2, # Center of the y-axis - xref="paper", - yref="y", - text="Volume", - showarrow=False, - font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - textangle=-90, # Rotate text to be vertical - align="center" - ) - - # Update layout for legend - fig.update_layout( - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1, - groupclick="toggleitem", - font=dict( - family="Arial, sans-serif", - size=14, # Adjusted font size - color="black", - weight="bold" - ) - ) - ) - - # Update y-axis with autoscaling for volume - fig.update_yaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - autorange=True, # Enable autoscaling for volume - tickformat=".2f", # Format tick labels with 2 decimal places - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove the built-in axis title since we're using annotations - ) - - # Update x-axis with better formatting and fixed range - fig.update_xaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - # Set fixed range with start date and ending at the last valid date - autorange=False, # Disable autoscaling - range=[x_start_date, last_valid_date], # Set fixed range from start date to last valid date - tickformat="%b %d", # Simplified date format without time - tickangle=-30, # Angle the labels for better readability - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove built-in title to use annotation instead - ) - - try: - # Save the figure - graph_file = "modius_volume_graph.html" - fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) - - # Also save as image for compatibility - img_file = "modius_volume_graph.png" - try: - fig.write_image(img_file) - logger.info(f"Volume graph saved to {graph_file} and {img_file}") - except Exception as e: - logger.error(f"Error saving volume image: {e}") - logger.info(f"Volume graph saved to {graph_file} only") - - # Return the figure object for direct use in Gradio - return fig - except Exception as e: - # If the complex graph approach fails, create a simpler one - logger.error(f"Error creating advanced volume graph: {e}") - logger.info("Falling back to simpler volume graph") - - # Create a simpler graph as fallback - simple_fig = go.Figure() - - # Add zero line - simple_fig.add_shape( - type="line", - line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, - x0=min_time, x1=max_time - ) - - # Simply plot the average volume data with moving average - if not avg_volume_data.empty: - # Add moving average as a line - simple_fig.add_trace( - go.Scatter( - x=avg_volume_data_with_ma['timestamp'], - y=avg_volume_data_with_ma['moving_avg'], - mode='lines', - name='Average Volume (3d window)', - line=dict(width=2, color='purple') # Purple line for volume - ) - ) - - # Simplified layout with adjusted y-axis range - simple_fig.update_layout( - title=dict( - text="Modius Agents Volume", - font=dict( - family="Arial, sans-serif", - size=22, - color="black", - weight="bold" - ) - ), - xaxis_title=None, - yaxis_title=None, - template="plotly_white", - height=600, - autosize=True, - margin=dict(r=30, l=120, t=40, b=50) - ) - - # Update y-axis with autoscaling for volume - simple_fig.update_yaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - autorange=True, # Enable autoscaling for volume - tickformat=".2f", - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), - title=None # Remove the built-in axis title since we're using annotations - ) - - # Update x-axis with better formatting and fixed range - simple_fig.update_xaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - autorange=False, - range=[x_start_date, max_time], - tickformat="%b %d", - tickangle=-30, - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") - ) - - # Save the figure - graph_file = "modius_volume_graph.html" - simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) - - # Return the simple figure - return simple_fig - -def generate_roi_visualizations(): - """Generate ROI visualizations with real data only (no dummy data)""" - global global_roi_df - - # Fetch data from database if not already fetched - if global_roi_df is None or global_roi_df.empty: - _, df_roi = fetch_apr_data_from_db() - else: - df_roi = global_roi_df - - # If we got no data at all, return placeholder figures - if df_roi.empty: - logger.info("No ROI data available. Using fallback visualization.") - # Create empty visualizations with a message using Plotly - fig = go.Figure() - fig.add_annotation( - x=0.5, y=0.5, - text="No ROI data available", - font=dict(size=20), - showarrow=False - ) - fig.update_layout( - xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) - ) - - # Save as static file for reference - fig.write_html("modius_roi_graph.html") - fig.write_image("modius_roi_graph.png") - - csv_file = None - return fig, csv_file - - # Set global_roi_df for access by other functions - global_roi_df = df_roi - - # Save to CSV before creating visualizations - csv_file = save_roi_to_csv(df_roi) - - # Create combined time series graph for ROI - combined_fig = create_combined_roi_time_series_graph(df_roi) - - return combined_fig, csv_file - -def create_combined_roi_time_series_graph(df): - """Create a time series graph showing average ROI values across all agents""" - if len(df) == 0: - logger.error("No data to plot combined ROI graph") - fig = go.Figure() - fig.add_annotation( - text="No ROI data available", - x=0.5, y=0.5, - showarrow=False, font=dict(size=20) - ) - return fig - - # Define fixed start date (February 1, 2025) - fixed_start_date = datetime(2025, 2, 1) - logger.info(f"Using fixed start date for ROI runtime calculation: {fixed_start_date}") - - # Calculate runtime for each agent from fixed start date - agent_runtimes = {} - for agent_id in df['agent_id'].unique(): - agent_data = df[df['agent_id'] == agent_id] - agent_name = agent_data['agent_name'].iloc[0] - last_report = agent_data['timestamp'].max() - runtime_days = (last_report - fixed_start_date).total_seconds() / (24 * 3600) # Convert to days - agent_runtimes[agent_id] = { - 'agent_name': agent_name, - 'last_report': last_report, - 'runtime_days': runtime_days - } - - # Calculate average runtime - avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0 - logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days") - - # Log individual agent runtimes for debugging - for agent_id, data in agent_runtimes.items(): - logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}") - - # IMPORTANT: Force data types to ensure consistency - df['roi'] = df['roi'].astype(float) # Ensure ROI is float - # Convert ROI values to percentages (multiply by 100) - df['roi'] = df['roi'] * 100 - df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string - - # Get min and max time for shapes - min_time = df['timestamp'].min() - max_time = df['timestamp'].max() - - # Use the actual start date from the data instead of a fixed date - x_start_date = min_time - - # CRITICAL: Log the exact dataframe we're using for plotting to help debug - logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}") - logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}") - logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}") - - # Export full dataframe to CSV for debugging - debug_csv = "debug_roi_data.csv" - df.to_csv(debug_csv) - logger.info(f"Exported ROI graph data to {debug_csv} for debugging") - - # Create Plotly figure in a clean state - fig = go.Figure() - - # Get min and max time for shapes - min_time = df['timestamp'].min() - max_time = df['timestamp'].max() - - # Add background shapes for positive and negative regions - # Add shape for positive ROI region (above zero) - fig.add_shape( - type="rect", - fillcolor="rgba(230, 243, 255, 0.3)", - line=dict(width=0), - y0=0, y1=100, # Use a fixed positive value (percentage) - x0=min_time, x1=max_time, - layer="below" - ) - - # Add shape for negative ROI region (below zero) - fig.add_shape( - type="rect", - fillcolor="rgba(255, 230, 230, 0.3)", - line=dict(width=0), - y0=-100, y1=0, # Use a fixed negative value (percentage) - x0=min_time, x1=max_time, - layer="below" - ) - - # Add zero line - fig.add_shape( - type="line", - line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, - x0=min_time, x1=max_time - ) - - # Filter out outliers (ROI values above 200% or below -200%) - outlier_data = df[(df['roi'] > 200) | (df['roi'] < -200)].copy() - df_filtered = df[(df['roi'] <= 200) & (df['roi'] >= -200)].copy() - - # Log the outliers for better debugging - if len(outlier_data) > 0: - excluded_count = len(outlier_data) - logger.info(f"Excluded {excluded_count} data points with outlier ROI values (>200% or <-200%)") - - # Group outliers by agent for detailed logging - outlier_agents = outlier_data.groupby('agent_name') - for agent_name, agent_outliers in outlier_agents: - logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:") - for idx, row in agent_outliers.iterrows(): - logger.info(f" - ROI: {row['roi']}, timestamp: {row['timestamp']}") - - # Use the filtered data for all subsequent operations - df = df_filtered - - # Group by timestamp and calculate mean ROI - avg_roi_data = df.groupby('timestamp')['roi'].mean().reset_index() - - # Sort by timestamp - avg_roi_data = avg_roi_data.sort_values('timestamp') - - # Log the average ROI data - logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points") - for idx, row in avg_roi_data.iterrows(): - logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}") - - # Calculate moving average based on a time window (3 days) - # Sort data by timestamp - df_sorted = df.sort_values('timestamp') - - # Create a new dataframe for the moving average - avg_roi_data_with_ma = avg_roi_data.copy() - avg_roi_data_with_ma['moving_avg'] = None # Initialize the moving average column - - # Define the time window for the moving average (3 days) - time_window = pd.Timedelta(days=3) - logger.info(f"Calculating moving average with time window of {time_window}") - - # Calculate the moving averages for each timestamp - for i, row in avg_roi_data_with_ma.iterrows(): - current_time = row['timestamp'] - window_start = current_time - time_window - - # Get all data points within the 3-day time window - window_data = df_sorted[ - (df_sorted['timestamp'] >= window_start) & - (df_sorted['timestamp'] <= current_time) - ] - - # Calculate the average ROI for the 3-day time window - if not window_data.empty: - avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean() - logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}") - else: - # If no data points in the window, use the current value - avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi'] - logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}") - - logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points") - - # Find the last date where we have valid moving average data - last_valid_ma_date = avg_roi_data_with_ma[avg_roi_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_roi_data_with_ma['moving_avg'].dropna().empty else None - - # If we don't have any valid moving average data, use the max time from the original data - last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max() - - logger.info(f"Last valid moving average date: {last_valid_ma_date}") - logger.info(f"Using last valid date for graph: {last_valid_date}") - - # Plot individual agent data points with agent names in hover, but limit display for scalability - if not df.empty: - # Group by agent to use different colors for each agent - unique_agents = df['agent_name'].unique() - colors = px.colors.qualitative.Plotly[:len(unique_agents)] - - # Create a color map for agents - color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} - - # Calculate the total number of data points per agent to determine which are most active - agent_counts = df['agent_name'].value_counts() - - # Determine how many agents to show individually (limit to top 5 most active) - MAX_VISIBLE_AGENTS = 5 - top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() - - logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") - - # Add data points for each agent, but only make top agents visible by default - for agent_name in unique_agents: - agent_data = df[df['agent_name'] == agent_name] - - # Explicitly convert to Python lists - x_values = agent_data['timestamp'].tolist() - y_values = agent_data['roi'].tolist() - - # Change default visibility to False to hide all agent data points - is_visible = False - - # Add data points as markers for ROI - fig.add_trace( - go.Scatter( - x=x_values, - y=y_values, - mode='markers', # Only markers for original data - marker=dict( - color=color_map[agent_name], - symbol='circle', - size=10, - line=dict(width=1, color='black') - ), - name=f'Agent: {agent_name} (ROI)', - hovertemplate='Time: %{x}
ROI: %{y:.2f}
Agent: ' + agent_name + '', - visible=is_visible # All agents hidden by default - ) - ) - logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") - - # Add ROI moving average as a smooth line - x_values_ma = avg_roi_data_with_ma['timestamp'].tolist() - y_values_ma = avg_roi_data_with_ma['moving_avg'].tolist() - - # Create hover template for the ROI moving average line - hover_data_roi = [] - for idx, row in avg_roi_data_with_ma.iterrows(): - timestamp = row['timestamp'] - # Format timestamp to show only up to seconds (not milliseconds) - formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S') - - # Calculate number of active agents in the last 24 hours - time_24h_ago = timestamp - pd.Timedelta(hours=24) - active_agents = len(df[(df['timestamp'] >= time_24h_ago) & - (df['timestamp'] <= timestamp)]['agent_id'].unique()) - - hover_data_roi.append( - f"Time: {formatted_timestamp}
Avg ROI (3d window): {row['moving_avg']:.2f}%
Active agents (24h): {active_agents}" - ) - - fig.add_trace( - go.Scatter( - x=x_values_ma, - y=y_values_ma, - mode='lines', # Only lines for moving average - line=dict(color='blue', width=2), # Thinner line - name='Average ROI (3d window)', - hovertext=hover_data_roi, - hoverinfo='text', - visible=True # Visible by default - ) - ) - logger.info(f"Added 3-day moving average ROI trace with {len(x_values_ma)} points") - - # Update layout with average runtime information in the title - fig.update_layout( - title=dict( - text=f"Modius Agents ROI (over avg. {avg_runtime:.1f} days runtime)", - font=dict( - family="Arial, sans-serif", - size=22, - color="black", - weight="bold" - ) - ), - xaxis_title=None, # Remove x-axis title to use annotation instead - yaxis_title=None, # Remove the y-axis title as we'll use annotations instead - template="plotly_white", - height=600, # Reduced height for better fit on smaller screens - autosize=True, # Enable auto-sizing for responsiveness - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1, - groupclick="toggleitem" - ), - margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title - hovermode="closest" - ) - - # Add single annotation for y-axis - fig.add_annotation( - x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels - y=0, # Center of the y-axis - xref="paper", - yref="y", - text="ROI [%]", - showarrow=False, - font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - textangle=-90, # Rotate text to be vertical - align="center" - ) - - # Update layout for legend - fig.update_layout( - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1, - groupclick="toggleitem", - font=dict( - family="Arial, sans-serif", - size=14, # Adjusted font size - color="black", - weight="bold" - ) - ) - ) - - # Update y-axis with fixed range of -100% to +100% for ROI - fig.update_yaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - # Use fixed range instead of autoscaling - autorange=False, # Disable autoscaling - range=[-100, 100], # Set fixed range from -100% to +100% - tickformat=".2f", # Format tick labels with 2 decimal places - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove the built-in axis title since we're using annotations - ) - - # Update x-axis with better formatting and fixed range - fig.update_xaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - # Set fixed range with start date and ending at the last valid date - autorange=False, # Disable autoscaling - range=[x_start_date, last_valid_date], # Set fixed range from start date to last valid date - tickformat="%b %d", # Simplified date format without time - tickangle=-30, # Angle the labels for better readability - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove built-in title to use annotation instead - ) - - try: - # Save the figure - graph_file = "modius_roi_graph.html" - fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) - - # Also save as image for compatibility - img_file = "modius_roi_graph.png" - try: - fig.write_image(img_file) - logger.info(f"ROI graph saved to {graph_file} and {img_file}") - except Exception as e: - logger.error(f"Error saving ROI image: {e}") - logger.info(f"ROI graph saved to {graph_file} only") - - # Return the figure object for direct use in Gradio - return fig - except Exception as e: - # If the complex graph approach fails, create a simpler one - logger.error(f"Error creating advanced ROI graph: {e}") - logger.info("Falling back to Simpler ROI graph") - - # Create a simpler graph as fallback - simple_fig = go.Figure() - - # Add zero line - simple_fig.add_shape( - type="line", - line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, - x0=min_time, x1=max_time - ) - - # Simply plot the average ROI data with moving average - if not avg_roi_data.empty: - # Add moving average as a line - simple_fig.add_trace( - go.Scatter( - x=avg_roi_data_with_ma['timestamp'], - y=avg_roi_data_with_ma['moving_avg'], - mode='lines', - name='Average ROI (3d window)', - line=dict(width=2, color='blue') # Thinner line - ) - ) - - # Simplified layout with adjusted y-axis range - simple_fig.update_layout( - title=dict( - text="Modius Agents ROI", - font=dict( - family="Arial, sans-serif", - size=22, - color="black", - weight="bold" - ) - ), - xaxis_title=None, - yaxis_title=None, - template="plotly_white", - height=600, - autosize=True, - margin=dict(r=30, l=120, t=40, b=50) - ) - - # Update y-axis with fixed range of -100% to +100% for ROI - simple_fig.update_yaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - autorange=False, - range=[-100, 100], - tickformat=".2f", - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), - title=None # Remove the built-in axis title since we're using annotations - ) - - # Update x-axis with better formatting and fixed range - simple_fig.update_xaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - autorange=False, - range=[x_start_date, max_time], - tickformat="%b %d", - tickangle=-30, - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") - ) - - # Save the figure - graph_file = "modius_roi_graph.html" - simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) - - # Return the simple figure - return simple_fig - -def save_roi_to_csv(df): - """Save the ROI data DataFrame to a CSV file and return the file path""" - if df.empty: - logger.error("No ROI data to save to CSV") - return None - - # Define the CSV file path - csv_file = "modius_roi_values.csv" - - # Save to CSV - df.to_csv(csv_file, index=False) - logger.info(f"ROI data saved to {csv_file}") - - return csv_file - -def create_time_series_graph_per_agent(df): - """Create a time series graph for each agent using Plotly""" - # Get unique agents - unique_agents = df['agent_id'].unique() - - if len(unique_agents) == 0: - logger.error("No agent data to plot") - fig = go.Figure() - fig.add_annotation( - text="No agent data available", - x=0.5, y=0.5, - showarrow=False, font=dict(size=20) - ) - return fig - - # Create a subplot figure for each agent - fig = make_subplots(rows=len(unique_agents), cols=1, - subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}" - for agent_id in unique_agents], - vertical_spacing=0.1) - - # Plot data for each agent - for i, agent_id in enumerate(unique_agents): - agent_data = df[df['agent_id'] == agent_id].copy() - agent_name = agent_data['agent_name'].iloc[0] - row = i + 1 - - # Add zero line to separate APR and Performance - fig.add_shape( - type="line", line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), - row=row, col=1 - ) - - # Add background colors - fig.add_shape( - type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), - y0=0, y1=1000, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), - row=row, col=1, layer="below" - ) - fig.add_shape( - type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), - y0=-1000, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), - row=row, col=1, layer="below" - ) - - # Create separate dataframes for different data types - apr_data = agent_data[agent_data['metric_type'] == 'APR'] - perf_data = agent_data[agent_data['metric_type'] == 'Performance'] - - # Sort all data by timestamp for the line plots - combined_agent_data = agent_data.sort_values('timestamp') - - # Add main line connecting all points - fig.add_trace( - go.Scatter( - x=combined_agent_data['timestamp'], - y=combined_agent_data['apr'], - mode='lines', - line=dict(color='purple', width=2), - name=f'{agent_name}', - legendgroup=agent_name, - showlegend=(i == 0), # Only show in legend once - hovertemplate='Time: %{x}
Value: %{y:.2f}' - ), - row=row, col=1 - ) - - # Add scatter points for APR values - if not apr_data.empty: - fig.add_trace( - go.Scatter( - x=apr_data['timestamp'], - y=apr_data['apr'], - mode='markers', - marker=dict(color='blue', size=10, symbol='circle'), - name='APR', - legendgroup='APR', - showlegend=(i == 0), - hovertemplate='Time: %{x}
APR: %{y:.2f}' - ), - row=row, col=1 - ) - - # Add scatter points for Performance values - if not perf_data.empty: - fig.add_trace( - go.Scatter( - x=perf_data['timestamp'], - y=perf_data['apr'], - mode='markers', - marker=dict(color='red', size=10, symbol='square'), - name='Performance', - legendgroup='Performance', - showlegend=(i == 0), - hovertemplate='Time: %{x}
Performance: %{y:.2f}' - ), - row=row, col=1 - ) - - # Update axes - fig.update_xaxes(title_text="Time", row=row, col=1) - fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)') - - # Update layout - fig.update_layout( - height=400 * len(unique_agents), - width=1000, - title_text="APR and Performance Values per Agent", - template="plotly_white", - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1 - ), - margin=dict(r=20, l=20, t=30, b=20), - hovermode="closest" - ) - - # Save the figure (still useful for reference) - graph_file = "modius_apr_per_agent_graph.html" - fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) - - # Also save as image for compatibility - img_file = "modius_apr_per_agent_graph.png" - fig.write_image(img_file) - - logger.info(f"Per-agent graph saved to {graph_file} and {img_file}") - - # Return the figure object for direct use in Gradio - return fig - -def write_debug_info(df, fig): - """Minimal debug info function""" - try: - # Just log minimal information - logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces") - return True - except Exception as e: - logger.error(f"Error writing debug info: {e}") - return False - -def create_combined_time_series_graph(df): - """Create a time series graph showing average APR values across all agents""" - if len(df) == 0: - logger.error("No data to plot combined graph") - fig = go.Figure() - fig.add_annotation( - text="No data available", - x=0.5, y=0.5, - showarrow=False, font=dict(size=20) - ) - return fig - - # IMPORTANT: Force data types to ensure consistency - df['apr'] = df['apr'].astype(float) # Ensure APR is float - df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string - - # Get min and max time for shapes - min_time = df['timestamp'].min() - max_time = df['timestamp'].max() - - # Use April 17th, 2025 as the fixed start date for APR graph - x_start_date = datetime(2025, 4, 17) - - # CRITICAL: Log the exact dataframe we're using for plotting to help debug - logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}") - logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}") - logger.info("Graph data - all positive APR values only") - logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}") - - # Export full dataframe to CSV for debugging - debug_csv = "debug_graph_data.csv" - df.to_csv(debug_csv) - logger.info(f"Exported graph data to {debug_csv} for debugging") - - # Write detailed data report - with open("debug_graph_data_report.txt", "w") as f: - f.write("==== GRAPH DATA REPORT ====\n\n") - f.write(f"Total data points: {len(df)}\n") - f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n") - - # Output per-agent details - unique_agents = df['agent_id'].unique() - f.write(f"Number of agents: {len(unique_agents)}\n\n") - - for agent_id in unique_agents: - agent_data = df[df['agent_id'] == agent_id] - agent_name = agent_data['agent_name'].iloc[0] - - f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n") - f.write(f" Total data points: {len(agent_data)}\n") - - apr_data = agent_data[agent_data['metric_type'] == 'APR'] - - f.write(f" APR data points: {len(apr_data)}\n") - - if not apr_data.empty: - f.write(f" APR values: {apr_data['apr'].tolist()}\n") - f.write(f" APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n") - - f.write("\n") - - logger.info("Generated detailed graph data report") - - # ENSURE THERE ARE NO CONFLICTING AXES OR TRACES - # Create Plotly figure in a clean state - fig = go.Figure() - - # Enable autoscaling instead of fixed ranges - logger.info("Using autoscaling for axes ranges") - - # Add background shapes for APR and Performance regions - min_time = df['timestamp'].min() - max_time = df['timestamp'].max() - - # Add shape for positive APR region (above zero) - fig.add_shape( - type="rect", - fillcolor="rgba(230, 243, 255, 0.3)", - line=dict(width=0), - y0=0, y1=100, # Use a fixed positive value - x0=min_time, x1=max_time, - layer="below" - ) - - # Add shape for negative APR region (below zero) - fig.add_shape( - type="rect", - fillcolor="rgba(255, 230, 230, 0.3)", - line=dict(width=0), - y0=-100, y1=0, # Use a fixed negative value - x0=min_time, x1=max_time, - layer="below" - ) - - # Add zero line - fig.add_shape( - type="line", - line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, - x0=min_time, x1=max_time - ) - - # MODIFIED: Calculate average APR values across all agents for each timestamp - # Filter for APR data only - apr_data = df[df['metric_type'] == 'APR'].copy() - - # Filter out outliers (APR values above 200 or below -200) - outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy() - apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy() - - # Log the outliers for better debugging - if len(outlier_data) > 0: - excluded_count = len(outlier_data) - logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)") - - # Group outliers by agent for detailed logging - outlier_agents = outlier_data.groupby('agent_name') - for agent_name, agent_outliers in outlier_agents: - logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:") - for idx, row in agent_outliers.iterrows(): - logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}") - - # Use the filtered data for all subsequent operations - apr_data = apr_data_filtered - - # Group by timestamp and calculate mean APR - avg_apr_data = apr_data.groupby('timestamp')['apr'].mean().reset_index() - - # Sort by timestamp - avg_apr_data = avg_apr_data.sort_values('timestamp') - - # Log the average APR data - logger.info(f"Calculated average APR data with {len(avg_apr_data)} points") - for idx, row in avg_apr_data.iterrows(): - logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}") - - # Calculate moving average based on a time window (2 hours) - # Sort data by timestamp - apr_data_sorted = apr_data.sort_values('timestamp') - - # Create a new dataframe for the moving average - avg_apr_data_with_ma = avg_apr_data.copy() - avg_apr_data_with_ma['moving_avg'] = None # Initialize the moving average column - - # Define the time window for the moving average (3 days) - time_window = pd.Timedelta(days=3) - logger.info(f"Calculating moving average with time window of {time_window}") - - # Calculate moving averages: one for APR and one for adjusted APR - avg_apr_data_with_ma['moving_avg'] = None # 3-day window for APR - avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR - - # Keep track of the last valid adjusted_moving_avg value to handle gaps - last_valid_adjusted_moving_avg = None - - # Calculate the moving averages for each timestamp - for i, row in avg_apr_data_with_ma.iterrows(): - current_time = row['timestamp'] - window_start = current_time - time_window - - # Get all data points within the 3-day time window - window_data = apr_data_sorted[ - (apr_data_sorted['timestamp'] >= window_start) & - (apr_data_sorted['timestamp'] <= current_time) - ] - - # Calculate the average APR for the 3-day time window - if not window_data.empty: - avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean() - logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}") - - # Calculate adjusted APR moving average if data exists - has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any() - if has_adjusted_apr: - adjusted_avg = window_data['adjusted_apr'].dropna().mean() - avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg - last_valid_adjusted_moving_avg = adjusted_avg - logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}") - else: - # If we don't have adjusted_apr data in this window but had some previously, - # use the last valid value to maintain continuity in the graph - if last_valid_adjusted_moving_avg is not None: - avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg - logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}") - else: - # If no data points in the window, use the current value - avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr'] - logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}") - - logger.info(f"Calculated time-based moving averages with {len(avg_apr_data_with_ma)} points") - - # Find the last date where we have valid moving average data - last_valid_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_apr_data_with_ma['moving_avg'].dropna().empty else None - - # Find the last date where we have valid adjusted moving average data - last_valid_adj_ma_date = None - if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any(): - last_valid_adj_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['adjusted_moving_avg'].notna()]['timestamp'].max() - - # Determine the last valid date for either moving average - last_valid_date = last_valid_ma_date - if last_valid_adj_ma_date is not None: - last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date - - # If we don't have any valid moving average data, use the max time from the original data - if last_valid_date is None: - last_valid_date = df['timestamp'].max() - - logger.info(f"Last valid moving average date: {last_valid_ma_date}") - logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}") - logger.info(f"Using last valid date for graph: {last_valid_date}") - - # Plot individual agent data points with agent names in hover, but limit display for scalability - if not apr_data.empty: - # Group by agent to use different colors for each agent - unique_agents = apr_data['agent_name'].unique() - colors = px.colors.qualitative.Plotly[:len(unique_agents)] - - # Create a color map for agents - color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} - - # Calculate the total number of data points per agent to determine which are most active - agent_counts = apr_data['agent_name'].value_counts() - - # Determine how many agents to show individually (limit to top 5 most active) - MAX_VISIBLE_AGENTS = 5 - top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() - - logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") - - # Add data points for each agent, but only make top agents visible by default - for agent_name in unique_agents: - agent_data = apr_data[apr_data['agent_name'] == agent_name] - - # Explicitly convert to Python lists - x_values = agent_data['timestamp'].tolist() - y_values = agent_data['apr'].tolist() - - # Change default visibility to False to hide all agent data points - is_visible = False - - # Add data points as markers for APR - fig.add_trace( - go.Scatter( - x=x_values, - y=y_values, - mode='markers', # Only markers for original data - marker=dict( - color=color_map[agent_name], - symbol='circle', - size=10, - line=dict(width=1, color='black') - ), - name=f'Agent: {agent_name} (APR)', - hovertemplate='Time: %{x}
APR: %{y:.2f}
Agent: ' + agent_name + '', - visible=is_visible # All agents hidden by default - ) - ) - logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") - - # Add data points for adjusted APR if it exists - if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any(): - x_values_adj = agent_data['timestamp'].tolist() - y_values_adj = agent_data['adjusted_apr'].tolist() - - fig.add_trace( - go.Scatter( - x=x_values_adj, - y=y_values_adj, - mode='markers', # Only markers for original data - marker=dict( - color=color_map[agent_name], - symbol='diamond', # Different symbol for adjusted APR - size=10, - line=dict(width=1, color='black') - ), - name=f'Agent: {agent_name} (Adjusted APR)', - hovertemplate='Time: %{x}
Adjusted APR: %{y:.2f}
Agent: ' + agent_name + '', - visible=is_visible # All agents hidden by default - ) - ) - logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})") - - # Add APR moving average as a smooth line - x_values_ma = avg_apr_data_with_ma['timestamp'].tolist() - y_values_ma = avg_apr_data_with_ma['moving_avg'].tolist() - - # Create hover template for the APR moving average line - hover_data_apr = [] - for idx, row in avg_apr_data_with_ma.iterrows(): - timestamp = row['timestamp'] - # Format timestamp to show only up to seconds (not milliseconds) - formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S') - - # Calculate number of active agents in the last 24 hours - # Use ROI data after April 25th, 2025, and APR data before that date - time_24h_ago = timestamp - pd.Timedelta(hours=24) - april_25_2025 = datetime(2025, 4, 25) - - if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty: - # After April 25th, 2025: Use ROI data - roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) & - (global_roi_df['timestamp'] <= timestamp)] - active_agents = len(roi_window_data['agent_id'].unique()) - logger.debug(f"Using ROI data for active agent count at {timestamp} (after Apr 25): {active_agents} agents") - else: - # Before April 25th, 2025 or if ROI data is not available: Use APR data - active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) & - (apr_data['timestamp'] <= timestamp)]['agent_id'].unique()) - logger.debug(f"Using APR data for active agent count at {timestamp} (before Apr 25): {active_agents} agents") - - hover_data_apr.append( - f"Time: {formatted_timestamp}
Avg APR (3d window): {row['moving_avg']:.2f}
Active agents (24h): {active_agents}" - ) - - fig.add_trace( - go.Scatter( - x=x_values_ma, - y=y_values_ma, - mode='lines', # Only lines for moving average - line=dict(color='red', width=2), # Thinner line - name='Average APR (3d window)', - hovertext=hover_data_apr, - hoverinfo='text', - visible=True # Visible by default - ) - ) - logger.info(f"Added 3-day moving average APR trace with {len(x_values_ma)} points") - - # Add adjusted APR moving average line if it exists - if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any(): - # Create a copy of the dataframe with forward-filled adjusted_moving_avg values - # to ensure the line continues even when we have missing data - filled_avg_apr_data = avg_apr_data_with_ma.copy() - filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill') - - # Use the filled dataframe for the adjusted APR line - x_values_adj = filled_avg_apr_data['timestamp'].tolist() - y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist() - - # Create hover template for the adjusted APR moving average line - hover_data_adj = [] - for idx, row in filled_avg_apr_data.iterrows(): - timestamp = row['timestamp'] - # Format timestamp to show only up to seconds (not milliseconds) - formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S') - - # Calculate number of active agents in the last 24 hours - # Use ROI data after April 25th, 2025, and APR data before that date - time_24h_ago = timestamp - pd.Timedelta(hours=24) - april_25_2025 = datetime(2025, 4, 25) - - if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty: - # After April 25th, 2025: Use ROI data - roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) & - (global_roi_df['timestamp'] <= timestamp)] - active_agents = len(roi_window_data['agent_id'].unique()) - logger.debug(f"Using ROI data for adjusted APR active agent count at {timestamp} (after Apr 25)") - else: - # Before April 25th, 2025 or if ROI data is not available: Use APR data - active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) & - (apr_data['timestamp'] <= timestamp)]['agent_id'].unique()) - logger.debug(f"Using APR data for adjusted APR active agent count at {timestamp} (before Apr 25)") - - if pd.notna(row['adjusted_moving_avg']): - hover_data_adj.append( - f"Time: {formatted_timestamp}
Avg ETH Adjusted APR (3d window): {row['adjusted_moving_avg']:.2f}
Active agents (24h): {active_agents}" - ) - else: - hover_data_adj.append( - f"Time: {formatted_timestamp}
Avg ETH Adjusted APR (3d window): N/A
Active agents (24h): {active_agents}" - ) - - fig.add_trace( - go.Scatter( - x=x_values_adj, - y=y_values_adj_ma, - mode='lines', # Only lines for moving average - line=dict(color='green', width=4), # Thicker solid line for adjusted APR - name='Average ETH Adjusted APR (3d window)', - hovertext=hover_data_adj, - hoverinfo='text', - visible=True # Visible by default - ) - ) - logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)") - else: - logger.warning("No adjusted APR moving average data available to plot") - - # Removed cumulative APR as requested - logger.info("Cumulative APR graph line has been removed as requested") - - # Update layout - use simple boolean values everywhere - # Make chart responsive instead of fixed width - fig.update_layout( - title=dict( - text="Modius Agents", - font=dict( - family="Arial, sans-serif", - size=22, - color="black", - weight="bold" - ) - ), - xaxis_title=None, # Remove x-axis title to use annotation instead - yaxis_title=None, # Remove the y-axis title as we'll use annotations instead - template="plotly_white", - height=600, # Reduced height for better fit on smaller screens - # Removed fixed width to enable responsiveness - autosize=True, # Enable auto-sizing for responsiveness - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1, - groupclick="toggleitem" - ), - margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title - hovermode="closest" - ) - - # Add annotations for y-axis regions - fig.add_annotation( - x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels - y=-25, # Middle of the negative region - xref="paper", - yref="y", - text="Percent drawdown [%]", - showarrow=False, - font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - textangle=-90, # Rotate text to be vertical - align="center" - ) - - fig.add_annotation( - x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels - y=50, # Middle of the positive region - xref="paper", - yref="y", - text="Agent APR [%]", - showarrow=False, - font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - textangle=-90, # Rotate text to be vertical - align="center" - ) - - # Remove x-axis title annotation - # fig.add_annotation( - # x=0.5, # Center of the x-axis - # y=-0.15, # Below the x-axis - # xref="paper", - # yref="paper", - # text="Date", - # showarrow=False, - # font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - # align="center" - # ) - - # Update layout for legend - fig.update_layout( - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1, - groupclick="toggleitem", - font=dict( - family="Arial, sans-serif", - size=14, # Adjusted font size - color="black", - weight="bold" - ) - ) - ) - - # Update y-axis with fixed range of -50 to +100 for psychological effect - fig.update_yaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - # Use fixed range instead of autoscaling - autorange=False, # Disable autoscaling - range=[-50, 100], # Set fixed range from -50 to +100 - tickformat=".2f", # Format tick labels with 2 decimal places - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove the built-in axis title since we're using annotations - ) - - # Update x-axis with better formatting and fixed range - fig.update_xaxes( - showgrid=True, - gridwidth=1, - gridcolor='rgba(0,0,0,0.1)', - # Set fixed range with April 17 as start date and ending at the last valid date - autorange=False, # Disable autoscaling - range=[x_start_date, last_valid_date], # Set fixed range from April 17 to last valid date - tickformat="%b %d", # Simplified date format without time - tickangle=-30, # Angle the labels for better readability - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove built-in title to use annotation instead - ) - - # SIMPLIFIED APPROACH: Do a direct plot without markers for comparison - # This creates a simple, reliable fallback plot if the advanced one fails - try: - # Write detailed debug information before saving the figure - write_debug_info(df, fig) - - # Save the figure (still useful for reference) - graph_file = "modius_apr_combined_graph.html" - fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) - - # Also save as image for compatibility - img_file = "modius_apr_combined_graph.png" - try: - fig.write_image(img_file) - logger.info(f"Combined graph saved to {graph_file} and {img_file}") - except Exception as e: - logger.error(f"Error saving image: {e}") - logger.info(f"Combined graph saved to {graph_file} only") - - # Return the figure object for direct use in Gradio - return fig - except Exception as e: - # If the complex graph approach fails, create a simpler one - logger.error(f"Error creating advanced graph: {e}") - logger.info("Falling back to Simpler graph") - - # Create a simpler graph as fallback - simple_fig = go.Figure() - - # Add zero line - simple_fig.add_shape( - type="line", - line=dict(dash="solid", width=1.5, color="black"), - y0=0, y1=0, - x0=min_time, x1=max_time - ) - - # Define colors for the fallback graph - fallback_colors = px.colors.qualitative.Plotly - - # Simply plot the average APR data with moving average - if not avg_apr_data.empty: - # Sort by timestamp - avg_apr_data = avg_apr_data.sort_values('timestamp') - - # Calculate both moving averages for the fallback graph - avg_apr_data_with_ma = avg_apr_data.copy() - avg_apr_data_with_ma['moving_avg'] = None # 2-hour window - avg_apr_data_with_ma['infinite_avg'] = None # Infinite window - - # Define the time window (6 hours) - time_window = pd.Timedelta(hours=6) - - # Calculate the moving averages for each timestamp - for i, row in avg_apr_data_with_ma.iterrows(): - current_time = row['timestamp'] - window_start = current_time - time_window - - # Get all data points within the 2-hour time window - window_data = apr_data[ - (apr_data['timestamp'] >= window_start) & - (apr_data['timestamp'] <= current_time) - ] - - # Get all data points up to the current timestamp (infinite window) - infinite_window_data = apr_data[ - apr_data['timestamp'] <= current_time - ] - - # Calculate the average APR for the 2-hour time window - if not window_data.empty: - avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean() - else: - # If no data points in the window, use the current value - avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr'] - - # Calculate the average APR for the infinite window - if not infinite_window_data.empty: - avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean() - else: - avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr'] - - # Add data points for each agent, but only make top agents visible by default - unique_agents = apr_data['agent_name'].unique() - colors = px.colors.qualitative.Plotly[:len(unique_agents)] - color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} - - # Calculate the total number of data points per agent - agent_counts = apr_data['agent_name'].value_counts() - - # Determine how many agents to show individually (limit to top 5 most active) - MAX_VISIBLE_AGENTS = 5 - top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() - - for agent_name in unique_agents: - agent_data = apr_data[apr_data['agent_name'] == agent_name] - - # Determine if this agent should be visible by default - is_visible = agent_name in top_agents - - # Add data points as markers - simple_fig.add_trace( - go.Scatter( - x=agent_data['timestamp'], - y=agent_data['apr'], - mode='markers', - name=f'Agent: {agent_name}', - marker=dict( - size=10, - color=color_map[agent_name] - ), - hovertemplate='Time: %{x}
APR: %{y:.2f}
Agent: ' + agent_name + '', - visible=is_visible # Only top agents visible by default - ) - ) - - # Add 2-hour moving average as a line - simple_fig.add_trace( - go.Scatter( - x=avg_apr_data_with_ma['timestamp'], - y=avg_apr_data_with_ma['moving_avg'], - mode='lines', - name='Average APR (6h window)', - line=dict(width=2, color='red') # Thinner line - ) - ) - - # Add infinite window moving average as another line - simple_fig.add_trace( - go.Scatter( - x=avg_apr_data_with_ma['timestamp'], - y=avg_apr_data_with_ma['infinite_avg'], - mode='lines', - name='Cumulative Average APR (all data)', - line=dict(width=4, color='green') # Thicker solid line - ) - ) - - # Simplified layout with adjusted y-axis range and increased size - simple_fig.update_layout( - title=dict( - text="Modius Agents", - font=dict( - family="Arial, sans-serif", - size=22, - color="black", - weight="bold" - ) - ), - xaxis_title=None, # Remove x-axis title to use annotation instead - yaxis_title=None, # Remove the y-axis title as we'll use annotations instead - yaxis=dict( - # No fixed range - let Plotly autoscale - autorange=True, # Explicitly enable autoscaling - tickformat=".2f", # Format tick labels with 2 decimal places - tickfont=dict(size=12) # Larger font for tick labels - ), - height=600, # Reduced height for better fit - # Removed fixed width to enable responsiveness - autosize=True, # Enable auto-sizing for responsiveness - template="plotly_white", # Use a cleaner template - margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title - ) - - # Add annotations for y-axis regions in the fallback graph - simple_fig.add_annotation( - x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels - y=-25, # Middle of the negative region - xref="paper", - yref="y", - text="Percent drawdown [%]", - showarrow=False, - font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - textangle=-90, # Rotate text to be vertical - align="center" - ) - - simple_fig.add_annotation( - x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels - y=50, # Middle of the positive region - xref="paper", - yref="y", - text="Agent APR [%]", - showarrow=False, - font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - textangle=-90, # Rotate text to be vertical - align="center" - ) - - # Remove x-axis title annotation - # simple_fig.add_annotation( - # x=0.5, # Center of the x-axis - # y=-0.15, # Below the x-axis - # xref="paper", - # yref="paper", - # text="Date", - # showarrow=False, - # font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - # align="center" - # ) - - # Update legend font for fallback graph - simple_fig.update_layout( - legend=dict( - font=dict( - family="Arial, sans-serif", - size=14, # Adjusted font size - color="black", - weight="bold" - ) - ) - ) - - # Apply fixed range to the x-axis for the fallback graph - simple_fig.update_xaxes( - autorange=False, # Disable autoscaling - range=[x_start_date, max_time], # Set fixed range from April 17 - tickformat="%b %d", # Simplified date format without time - tickangle=-30, - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size - title=None # Remove built-in title to use annotation instead - ) - - # Update y-axis tick font for fallback graph - simple_fig.update_yaxes( - tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size - ) - - # Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS - if len(unique_agents) > MAX_VISIBLE_AGENTS: - simple_fig.add_annotation( - text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.", - xref="paper", yref="paper", - x=0.5, y=1.05, - showarrow=False, - font=dict(size=12, color="gray"), - align="center" - ) - - # Return the simple figure - return simple_fig - -def save_to_csv(df): - """Save the APR data DataFrame to a CSV file and return the file path""" - if df.empty: - logger.error("No APR data to save to CSV") - return None - - # Define the CSV file path - csv_file = "modius_apr_values.csv" - - # Save to CSV - df.to_csv(csv_file, index=False) - logger.info(f"APR data saved to {csv_file}") - - # Also generate a statistics CSV file - stats_df = generate_statistics_from_data(df) - stats_csv = "modius_apr_statistics.csv" - stats_df.to_csv(stats_csv, index=False) - logger.info(f"Statistics saved to {stats_csv}") - - # Log detailed statistics about adjusted APR - if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any(): - adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()] - logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}") - - for _, row in adjusted_stats.iterrows(): - if row['agent_id'] != 'ALL': # Skip the overall stats row - logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}") - - # Log overall adjusted APR stats - overall_row = stats_df[stats_df['agent_id'] == 'ALL'] - if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]): - logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}") - - return csv_file - -def generate_statistics_from_data(df): - """Generate statistics from the APR data""" - if df.empty: - return pd.DataFrame() - - # Get unique agents - unique_agents = df['agent_id'].unique() - stats_list = [] - - # Generate per-agent statistics - for agent_id in unique_agents: - agent_data = df[df['agent_id'] == agent_id] - agent_name = agent_data['agent_name'].iloc[0] - - # APR statistics - apr_data = agent_data[agent_data['metric_type'] == 'APR'] - real_apr = apr_data[apr_data['is_dummy'] == False] - - # Performance statistics - perf_data = agent_data[agent_data['metric_type'] == 'Performance'] - real_perf = perf_data[perf_data['is_dummy'] == False] - - # Check if adjusted_apr exists and has non-null values - has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any() - - stats = { - 'agent_id': agent_id, - 'agent_name': agent_name, - 'total_points': len(agent_data), - 'apr_points': len(apr_data), - 'performance_points': len(perf_data), - 'real_apr_points': len(real_apr), - 'real_performance_points': len(real_perf), - 'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None, - 'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None, - 'max_apr': apr_data['apr'].max() if not apr_data.empty else None, - 'min_apr': apr_data['apr'].min() if not apr_data.empty else None, - 'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None, - 'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None, - 'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None, - 'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None - } - stats_list.append(stats) - - # Generate overall statistics - apr_only = df[df['metric_type'] == 'APR'] - perf_only = df[df['metric_type'] == 'Performance'] - - # Check if adjusted_apr exists and has non-null values for overall stats - has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any() - - overall_stats = { - 'agent_id': 'ALL', - 'agent_name': 'All Agents', - 'total_points': len(df), - 'apr_points': len(apr_only), - 'performance_points': len(perf_only), - 'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]), - 'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]), - 'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None, - 'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None, - 'max_apr': apr_only['apr'].max() if not apr_only.empty else None, - 'min_apr': apr_only['apr'].min() if not apr_only.empty else None, - 'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None, - 'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None, - 'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None, - 'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None - } - stats_list.append(overall_stats) - - return pd.DataFrame(stats_list) - -# Create dummy functions for the commented out imports -def create_transcation_visualizations(): - """Dummy implementation that returns a placeholder graph""" - fig = go.Figure() - fig.add_annotation( - text="Blockchain data loading disabled - placeholder visualization", - x=0.5, y=0.5, xref="paper", yref="paper", - showarrow=False, font=dict(size=20) - ) - return fig - -def create_active_agents_visualizations(): - """Dummy implementation that returns a placeholder graph""" - fig = go.Figure() - fig.add_annotation( - text="Blockchain data loading disabled - placeholder visualization", - x=0.5, y=0.5, xref="paper", yref="paper", - showarrow=False, font=dict(size=20) - ) - return fig - -# Comment out the blockchain connection code """ -# Load environment variables from .env file -# RPC URLs -OPTIMISM_RPC_URL = os.getenv('OPTIMISM_RPC_URL') -MODE_RPC_URL = os.getenv('MODE_RPC_URL') - -# Initialize Web3 instances -web3_instances = { - 'optimism': Web3(Web3.HTTPProvider(OPTIMISM_RPC_URL)), - 'mode': Web3(Web3.HTTPProvider(MODE_RPC_URL)) -} - -# Contract addresses for service registries -contract_addresses = { - 'optimism': '0x3d77596beb0f130a4415df3D2D8232B3d3D31e44', - 'mode': '0x3C1fF68f5aa342D296d4DEe4Bb1cACCA912D95fE' -} - -# Load the ABI from the provided JSON file -with open('./contracts/service_registry_abi.json', 'r') as abi_file: - contract_abi = json.load(abi_file) +Optimized Modius Agent Performance Dashboard - Main Application Entry Point -# Create the contract instances -service_registries = { - chain_name: web3.eth.contract(address=contract_addresses[chain_name], abi=contract_abi) - for chain_name, web3 in web3_instances.items() -} - -# Check if connections are successful -for chain_name, web3_instance in web3_instances.items(): - if not web3_instance.is_connected(): - raise Exception(f"Failed to connect to the {chain_name.capitalize()} network.") - else: - print(f"Successfully connected to the {chain_name.capitalize()} network.") +This is the refactored version of the original app.py with improved modularity, +maintainability, and performance. """ +import os +import sys -# Dummy blockchain functions to replace the commented ones -def get_transfers(integrator: str, wallet: str) -> str: - """Dummy function that returns an empty result""" - return {"transfers": []} - -def fetch_and_aggregate_transactions(): - """Dummy function that returns empty data""" - return [], {} - -# Function to parse the transaction data and prepare it for visualization -def process_transactions_and_agents(data): - """Dummy function that returns empty dataframes""" - df_transactions = pd.DataFrame() - df_agents = pd.DataFrame(columns=['date', 'agent_count']) - df_agents_weekly = pd.DataFrame() - return df_transactions, df_agents, df_agents_weekly - -# Function to create visualizations based on the metrics -def create_visualizations(): - """ - # Commenting out the original visualization code temporarily for debugging - transactions_data = fetch_and_aggregate_transactions() - df_transactions, df_agents, df_agents_weekly = process_transactions_and_agents(transactions_data) - - # Fetch daily value locked data - df_tvl = pd.read_csv('daily_value_locked.csv') - - # Calculate total value locked per chain per day - df_tvl["total_value_locked_usd"] = df_tvl["amount0_usd"] + df_tvl["amount1_usd"] - df_tvl_daily = df_tvl.groupby(["date", "chain_name"])["total_value_locked_usd"].sum().reset_index() - df_tvl_daily['date'] = pd.to_datetime(df_tvl_daily['date']) - - # Filter out dates with zero total value locked - df_tvl_daily = df_tvl_daily[df_tvl_daily["total_value_locked_usd"] > 0] - - chain_name_map = { - "mode": "Mode", - "base": "Base", - "ethereum": "Ethereum", - "optimism": "Optimism" - } - df_tvl_daily["chain_name"] = df_tvl_daily["chain_name"].map(chain_name_map) - - # Plot total value locked - fig_tvl = px.bar( - df_tvl_daily, - x="date", - y="total_value_locked_usd", - color="chain_name", - opacity=0.7, - title="Total Volume Invested in Pools in Different Chains Daily", - labels={"date": "Date","chain_name": "Transaction Chain", "total_value_locked_usd": "Total Volume Invested (USD)"}, - barmode='stack', - color_discrete_map={ - "Mode": "orange", - "Base": "purple", - "Ethereum": "darkgreen", - "Optimism": "blue" - } - ) - fig_tvl.update_layout( - xaxis_title="Date", - - yaxis=dict(tickmode='linear', tick0=0, dtick=4), - xaxis=dict( - tickmode='array', - tickvals=df_tvl_daily['date'], - ticktext=df_tvl_daily['date'].dt.strftime('%b %d'), - tickangle=-45, - ), - bargap=0.6, # Increase gap between bar groups (0-1) - bargroupgap=0.1, # Decrease gap between bars in a group (0-1) - height=600, - width=1200, # Specify width to prevent bars from being too wide - showlegend=True, - template='plotly_white' - ) - fig_tvl.update_xaxes(tickformat="%b %d") - - chain_name_map = { - 10: "Optimism", - 8453: "Base", - 1: "Ethereum", - 34443: "Mode" - } - - df_transactions["sending_chain"] = df_transactions["sending_chain"].map(chain_name_map) - df_transactions["receiving_chain"] = df_transactions["receiving_chain"].map(chain_name_map) - - df_transactions["sending_chain"] = df_transactions["sending_chain"].astype(str) - df_transactions["receiving_chain"] = df_transactions["receiving_chain"].astype(str) - df_transactions['date'] = pd.to_datetime(df_transactions['date']) - df_transactions["is_swap"] = df_transactions.apply(lambda x: x["sending_chain"] == x["receiving_chain"], axis=1) - - swaps_per_chain = df_transactions[df_transactions["is_swap"]].groupby(["date", "sending_chain"]).size().reset_index(name="swap_count") - fig_swaps_chain = px.bar( - swaps_per_chain, - x="date", - y="swap_count", - color="sending_chain", - title="Chain Daily Activity: Swaps", - labels={"sending_chain": "Transaction Chain", "swap_count": "Daily Swap Nr"}, - barmode="stack", - opacity=0.7, - color_discrete_map={ - "Optimism": "blue", - "Ethereum": "darkgreen", - "Base": "purple", - "Mode": "orange" - } - ) - fig_swaps_chain.update_layout( - xaxis_title="Date", - yaxis_title="Daily Swap Count", - yaxis=dict(tickmode='linear', tick0=0, dtick=1), - xaxis=dict( - tickmode='array', - tickvals=[d for d in swaps_per_chain['date']], - ticktext=[d.strftime('%m-%d') for d in swaps_per_chain['date']], - tickangle=-45, - ), - bargap=0.6, - bargroupgap=0.1, - height=600, - width=1200, - margin=dict(l=50, r=50, t=50, b=50), - showlegend=True, - legend=dict( - yanchor="top", - y=0.99, - xanchor="right", - x=0.99 - ), - template='plotly_white' - ) - fig_swaps_chain.update_xaxes(tickformat="%m-%d") - - df_transactions["is_bridge"] = df_transactions.apply(lambda x: x["sending_chain"] != x["receiving_chain"], axis=1) - - bridges_per_chain = df_transactions[df_transactions["is_bridge"]].groupby(["date", "sending_chain"]).size().reset_index(name="bridge_count") - fig_bridges_chain = px.bar( - bridges_per_chain, - x="date", - y="bridge_count", - color="sending_chain", - title="Chain Daily Activity: Bridges", - labels={"sending_chain": "Transaction Chain", "bridge_count": "Daily Bridge Nr"}, - barmode="stack", - opacity=0.7, - color_discrete_map={ - "Optimism": "blue", - "Ethereum": "darkgreen", - "Base": "purple", - "Mode": "orange" - } - ) - fig_bridges_chain.update_layout( - xaxis_title="Date", - yaxis_title="Daily Bridge Count", - yaxis=dict(tickmode='linear', tick0=0, dtick=1), - xaxis=dict( - tickmode='array', - tickvals=[d for d in bridges_per_chain['date']], - ticktext=[d.strftime('%m-%d') for d in bridges_per_chain['date']], - tickangle=-45, - ), - bargap=0.6, - bargroupgap=0.1, - height=600, - width=1200, - margin=dict(l=50, r=50, t=50, b=50), - showlegend=True, - legend=dict( - yanchor="top", - y=0.99, - xanchor="right", - x=0.99 - ), - template='plotly_white' - ) - fig_bridges_chain.update_xaxes(tickformat="%m-%d") - df_agents['date'] = pd.to_datetime(df_agents['date']) +# Add the modius_performance package to the path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - daily_agents_df = df_agents.groupby('date').agg({'agent_count': 'sum'}).reset_index() - daily_agents_df.rename(columns={'agent_count': 'daily_agent_count'}, inplace=True) - # Sort by date to ensure proper running total calculation - daily_agents_df = daily_agents_df.sort_values('date') - - # Create week column - daily_agents_df['week'] = daily_agents_df['date'].dt.to_period('W').apply(lambda r: r.start_time) - - # Calculate running total within each week - daily_agents_df['running_weekly_total'] = daily_agents_df.groupby('week')['daily_agent_count'].cumsum() - - # Create final merged dataframe - weekly_merged_df = daily_agents_df.copy() - adjustment_date = pd.to_datetime('2024-11-15') - weekly_merged_df.loc[weekly_merged_df['date'] == adjustment_date, 'daily_agent_count'] -= 1 - weekly_merged_df.loc[weekly_merged_df['date'] == adjustment_date, 'running_weekly_total'] -= 1 - fig_agents_registered = go.Figure(data=[ - go.Bar( - name='Daily nr of Registered Agents', - x=weekly_merged_df['date'].dt.strftime("%b %d"), - y=weekly_merged_df['daily_agent_count'], - opacity=0.7, - marker_color='blue' - ), - go.Bar( - name='Weekly Nr of Registered Agents', - x=weekly_merged_df['date'].dt.strftime("%b %d"), - y=weekly_merged_df['running_weekly_total'], - opacity=0.7, - marker_color='purple' - ) - ]) +from modius_performance.utils.logging_config import setup_logging +from modius_performance.ui.dashboard import create_dashboard - fig_agents_registered.update_layout( - xaxis_title='Date', - yaxis_title='Number of Agents', - title="Nr of Agents Registered", - barmode='group', - yaxis=dict(tickmode='linear', tick0=0, dtick=1), - xaxis=dict( - categoryorder='array', - categoryarray=weekly_merged_df['date'].dt.strftime("%b %d"), - tickangle=-45 - ), - bargap=0.3, - height=600, - width=1200, - showlegend=True, - legend=dict( - yanchor="top", - xanchor="right", - ), - template='plotly_white', - ) - return fig_swaps_chain, fig_bridges_chain, fig_agents_registered,fig_tvl - """ - # Placeholder figures for testing - fig_swaps_chain = go.Figure() - fig_swaps_chain.add_annotation( - text="Blockchain data loading disabled - placeholder visualization", - x=0.5, y=0.5, xref="paper", yref="paper", - showarrow=False, font=dict(size=20) - ) - - fig_bridges_chain = go.Figure() - fig_bridges_chain.add_annotation( - text="Blockchain data loading disabled - placeholder visualization", - x=0.5, y=0.5, xref="paper", yref="paper", - showarrow=False, font=dict(size=20) - ) +def main(): + """Main application entry point.""" + # Set up logging + setup_logging() - fig_agents_registered = go.Figure() - fig_agents_registered.add_annotation( - text="Blockchain data loading disabled - placeholder visualization", - x=0.5, y=0.5, xref="paper", yref="paper", - showarrow=False, font=dict(size=20) - ) + # Create and launch the dashboard + dashboard = create_dashboard() - fig_tvl = go.Figure() - fig_tvl.add_annotation( - text="Blockchain data loading disabled - placeholder visualization", - x=0.5, y=0.5, xref="paper", yref="paper", - showarrow=False, font=dict(size=20) + # Launch the application with automatic port selection + dashboard.launch( + server_name="0.0.0.0", + server_port=None, # Let Gradio find an available port + share=False, + debug=False, + show_error=True ) - - return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl -# Modify dashboard function to make the plot container responsive -def dashboard(): - with gr.Blocks() as demo: - gr.Markdown("# Average Modius Agent Performance") - - # Create tabs for APR, ROI, Volume, and APR vs Agent Hash metrics - with gr.Tabs(): - # APR Metrics tab - with gr.Tab("APR Metrics"): - with gr.Column(): - refresh_apr_btn = gr.Button("Refresh APR Data") - - # Create container for plotly figure with responsive sizing - with gr.Column(): - combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot") - - # Create compact toggle controls at the bottom of the graph - with gr.Row(visible=True): - gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title") - - with gr.Row(): - with gr.Column(): - with gr.Row(elem_id="apr_toggle_container"): - with gr.Column(scale=1, min_width=150): - apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle") - - with gr.Column(scale=1, min_width=150): - adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle") - - # Add a text area for status messages - apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) - - # ROI Metrics tab - with gr.Tab("ROI Metrics"): - with gr.Column(): - refresh_roi_btn = gr.Button("Refresh ROI Data") - - # Create container for plotly figure with responsive sizing - with gr.Column(): - combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot") - - # Create compact toggle controls at the bottom of the graph - with gr.Row(visible=True): - gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title") - - with gr.Row(): - with gr.Column(): - with gr.Row(elem_id="roi_toggle_container"): - with gr.Column(scale=1, min_width=150): - roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle") - - # Add a text area for status messages - roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) - - # Volume Metrics tab - with gr.Tab("Volume Metrics"): - with gr.Column(): - refresh_volume_btn = gr.Button("Refresh Volume Data") - - # Create container for plotly figure with responsive sizing - with gr.Column(): - combined_volume_graph = gr.Plot(label="Volume for All Agents", elem_id="responsive_volume_plot") - - # Create compact toggle controls at the bottom of the graph - with gr.Row(visible=True): - gr.Markdown("##### Toggle Graph Lines", elem_id="volume_toggle_title") - - with gr.Row(): - with gr.Column(): - with gr.Row(elem_id="volume_toggle_container"): - with gr.Column(scale=1, min_width=150): - volume_toggle = gr.Checkbox(label="Volume Average", value=True, elem_id="volume_toggle") - - # Add a text area for status messages - volume_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) - - # Performance Graph tab - with gr.Tab("Performance Graph"): - with gr.Column(): - refresh_apr_hash_btn = gr.Button("Refresh APR vs Agent Hash Data") - - # Create container for plotly figure with responsive sizing - with gr.Column(): - apr_vs_agent_hash_graph = gr.Plot(label="APR vs Agent Hash", elem_id="responsive_apr_hash_plot") - - # Add a text area for status messages - apr_hash_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) - - # Add custom CSS for making the plots responsive - gr.HTML(""" - - """) - - # Function to update the APR graph - def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True): - # Generate visualization and get figure object directly - try: - combined_fig, _ = generate_apr_visualizations() - - # Update visibility of traces based on toggle values - for i, trace in enumerate(combined_fig.data): - # Check if this is a moving average trace - if trace.name == 'Average APR (3d window)': - trace.visible = show_apr_ma - elif trace.name == 'Average ETH Adjusted APR (3d window)': - trace.visible = show_adjusted_apr_ma - - return combined_fig - except Exception as e: - logger.exception("Error generating APR visualization") - # Create error figure - error_fig = go.Figure() - error_fig.add_annotation( - text=f"Error: {str(e)}", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15, color="red") - ) - return error_fig - - # Function to update the ROI graph - def update_roi_graph(show_roi_ma=True): - # Generate visualization and get figure object directly - try: - combined_fig, _ = generate_roi_visualizations() - - # Update visibility of traces based on toggle values - for i, trace in enumerate(combined_fig.data): - # Check if this is a moving average trace - if trace.name == 'Average ROI (3d window)': - trace.visible = show_roi_ma - - return combined_fig - except Exception as e: - logger.exception("Error generating ROI visualization") - # Create error figure - error_fig = go.Figure() - error_fig.add_annotation( - text=f"Error: {str(e)}", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15, color="red") - ) - return error_fig - - # Function to update the Volume graph - def update_volume_graph(show_volume_ma=True): - # Generate visualization and get figure object directly - try: - combined_fig, _ = generate_volume_visualizations() - - # Update visibility of traces based on toggle values - for i, trace in enumerate(combined_fig.data): - # Check if this is a moving average trace - if trace.name == 'Average Volume (3d window)': - trace.visible = show_volume_ma - - return combined_fig - except Exception as e: - logger.exception("Error generating Volume visualization") - # Create error figure - error_fig = go.Figure() - error_fig.add_annotation( - text=f"Error: {str(e)}", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15, color="red") - ) - return error_fig - - # Initialize the APR graph on load with a placeholder - apr_placeholder_fig = go.Figure() - apr_placeholder_fig.add_annotation( - text="Click 'Refresh APR Data' to load APR graph", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15) - ) - combined_apr_graph.value = apr_placeholder_fig - - # Initialize the ROI graph on load with a placeholder - roi_placeholder_fig = go.Figure() - roi_placeholder_fig.add_annotation( - text="Click 'Refresh ROI Data' to load ROI graph", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15) - ) - combined_roi_graph.value = roi_placeholder_fig - - # Initialize the Volume graph on load with a placeholder - volume_placeholder_fig = go.Figure() - volume_placeholder_fig.add_annotation( - text="Click 'Refresh Volume Data' to load Volume graph", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15) - ) - combined_volume_graph.value = volume_placeholder_fig - - # Initialize the APR vs Agent Hash graph on load with a placeholder - apr_hash_placeholder_fig = go.Figure() - apr_hash_placeholder_fig.add_annotation( - text="Click 'Refresh APR vs Agent Hash Data' to load APR vs Agent Hash graph", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15) - ) - apr_vs_agent_hash_graph.value = apr_hash_placeholder_fig - - # Function to update the APR graph based on toggle states - def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible): - return update_apr_graph(apr_visible, adjusted_apr_visible) - - # Function to update the ROI graph based on toggle states - def update_roi_graph_with_toggles(roi_visible): - return update_roi_graph(roi_visible) - - # Function to refresh APR data - def refresh_apr_data(): - """Refresh APR data from the database and update the visualization""" - try: - # Fetch new APR data - logger.info("Manually refreshing APR data...") - fetch_apr_data_from_db() - - # Verify data was fetched successfully - if global_df is None or len(global_df) == 0: - logger.error("Failed to fetch APR data") - return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details." - - # Log info about fetched data with focus on adjusted_apr - may_10_2025 = datetime(2025, 5, 10) - if 'timestamp' in global_df and 'adjusted_apr' in global_df: - after_may_10 = global_df[global_df['timestamp'] >= may_10_2025] - with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()] - - logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}") - logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}") - - # Generate new visualization - logger.info("Generating new APR visualization...") - new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value) - return new_graph, "APR data refreshed successfully" - except Exception as e: - logger.error(f"Error refreshing APR data: {e}") - return combined_apr_graph.value, f"Error: {str(e)}" - - # Function to refresh ROI data - def refresh_roi_data(): - """Refresh ROI data from the database and update the visualization""" - try: - # Fetch new ROI data - logger.info("Manually refreshing ROI data...") - fetch_apr_data_from_db() # This also fetches ROI data - - # Verify data was fetched successfully - if global_roi_df is None or len(global_roi_df) == 0: - logger.error("Failed to fetch ROI data") - return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details." - - # Generate new visualization - logger.info("Generating new ROI visualization...") - new_graph = update_roi_graph(roi_toggle.value) - return new_graph, "ROI data refreshed successfully" - except Exception as e: - logger.error(f"Error refreshing ROI data: {e}") - return combined_roi_graph.value, f"Error: {str(e)}" - - # Set up the button click event for APR refresh - refresh_apr_btn.click( - fn=refresh_apr_data, - inputs=[], - outputs=[combined_apr_graph, apr_status_text] - ) - - # Set up the button click event for ROI refresh - refresh_roi_btn.click( - fn=refresh_roi_data, - inputs=[], - outputs=[combined_roi_graph, roi_status_text] - ) - - # Set up the toggle switch events for APR - apr_toggle.change( - fn=update_apr_graph_with_toggles, - inputs=[apr_toggle, adjusted_apr_toggle], - outputs=[combined_apr_graph] - ) - - adjusted_apr_toggle.change( - fn=update_apr_graph_with_toggles, - inputs=[apr_toggle, adjusted_apr_toggle], - outputs=[combined_apr_graph] - ) - - # Set up the toggle switch events for ROI - roi_toggle.change( - fn=update_roi_graph_with_toggles, - inputs=[roi_toggle], - outputs=[combined_roi_graph] - ) - - # Function to refresh volume data - def refresh_volume_data(): - """Refresh volume data from the database and update the visualization""" - try: - # Fetch new volume data - logger.info("Manually refreshing volume data...") - fetch_apr_data_from_db() # This also fetches volume data - - # Verify data was fetched successfully - if global_df is None or len(global_df) == 0: - logger.error("Failed to fetch volume data") - return combined_volume_graph.value, "Error: Failed to fetch volume data. Check the logs for details." - - # Generate new visualization - logger.info("Generating new volume visualization...") - new_graph = update_volume_graph(volume_toggle.value) - return new_graph, "Volume data refreshed successfully" - except Exception as e: - logger.error(f"Error refreshing volume data: {e}") - return combined_volume_graph.value, f"Error: {str(e)}" - - # Set up the button click event for volume refresh - refresh_volume_btn.click( - fn=refresh_volume_data, - inputs=[], - outputs=[combined_volume_graph, volume_status_text] - ) - - # Set up the toggle switch events for volume - volume_toggle.change( - fn=update_volume_graph, - inputs=[volume_toggle], - outputs=[combined_volume_graph] - ) - - # Function to update the APR vs Agent Hash graph - def update_apr_vs_agent_hash_graph(): - """Update the APR vs Agent Hash graph""" - try: - # Generate visualization and get figure object directly - fig, _ = generate_apr_vs_agent_hash_visualizations(global_df) - return fig - except Exception as e: - logger.exception("Error generating APR vs Agent Hash visualization") - # Create error figure - error_fig = go.Figure() - error_fig.add_annotation( - text=f"Error: {str(e)}", - x=0.5, y=0.5, - showarrow=False, - font=dict(size=15, color="red") - ) - return error_fig - - # Function to refresh APR vs Agent Hash data - def refresh_apr_vs_agent_hash_data(): - """Refresh APR vs Agent Hash data from the database and update the visualization""" - try: - # Fetch new APR data if not already fetched - logger.info("Manually refreshing APR vs Agent Hash data...") - if global_df is None or global_df.empty: - fetch_apr_data_from_db() - - # Verify data was fetched successfully - if global_df is None or len(global_df) == 0: - logger.error("Failed to fetch APR data for APR vs Agent Hash visualization") - return apr_vs_agent_hash_graph.value, "Error: Failed to fetch APR data. Check the logs for details." - - # Check if agent_hash column exists - if 'agent_hash' not in global_df.columns: - logger.error("agent_hash column not found in DataFrame") - return apr_vs_agent_hash_graph.value, "Error: agent_hash column not found in data. Check the logs for details." - - # Generate new visualization - logger.info("Generating new APR vs Agent Hash visualization...") - new_graph = update_apr_vs_agent_hash_graph() - return new_graph, "APR vs Agent Hash data refreshed successfully" - except Exception as e: - logger.error(f"Error refreshing APR vs Agent Hash data: {e}") - return apr_vs_agent_hash_graph.value, f"Error: {str(e)}" - - # Set up the button click event for APR vs Agent Hash refresh - refresh_apr_hash_btn.click( - fn=refresh_apr_vs_agent_hash_data, - inputs=[], - outputs=[apr_vs_agent_hash_graph, apr_hash_status_text] - ) - - return demo -# Launch the dashboard if __name__ == "__main__": - dashboard().launch() - -def generate_adjusted_apr_report(): - """ - Generate a detailed report about adjusted_apr data availability and save it to a file. - Returns the path to the generated report file. - """ - global global_df - - if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns: - logger.warning("No adjusted_apr data available for report generation") - return None - - # Create a report file - report_path = "adjusted_apr_report.txt" - - with open(report_path, "w") as f: - f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n") - - # Summary statistics - total_records = len(global_df) - records_with_adjusted = global_df['adjusted_apr'].notna().sum() - pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0 - - f.write(f"Total APR records: {total_records}\n") - f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n") - - # First and last data points - if records_with_adjusted > 0: - has_adjusted = global_df[global_df['adjusted_apr'].notna()] - first_date = has_adjusted['timestamp'].min() - last_date = has_adjusted['timestamp'].max() - f.write(f"First adjusted_apr record: {first_date}\n") - f.write(f"Last adjusted_apr record: {last_date}\n") - f.write(f"Date range: {(last_date - first_date).days} days\n\n") - - # Agent statistics - f.write("===== AGENT STATISTICS =====\n\n") - - # Group by agent - agent_stats = [] - - for agent_id in global_df['agent_id'].unique(): - agent_data = global_df[global_df['agent_id'] == agent_id] - agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}" - - total_agent_records = len(agent_data) - agent_with_adjusted = agent_data['adjusted_apr'].notna().sum() - coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0 - - agent_stats.append({ - 'agent_id': agent_id, - 'agent_name': agent_name, - 'total_records': total_agent_records, - 'with_adjusted': agent_with_adjusted, - 'coverage_pct': coverage_pct - }) - - # Sort by coverage percentage (descending) - agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True) - - # Write agent statistics - for agent in agent_stats: - f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n") - f.write(f" Records: {agent['total_records']}\n") - f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n") - - # If agent has adjusted data, show date range - agent_data = global_df[global_df['agent_id'] == agent['agent_id']] - agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()] - - if not agent_adjusted.empty: - first = agent_adjusted['timestamp'].min() - last = agent_adjusted['timestamp'].max() - f.write(f" First adjusted_apr: {first}\n") - f.write(f" Last adjusted_apr: {last}\n") - - f.write("\n") - - # Check for May 10th cutoff issue - f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n") - may_10_2025 = datetime(2025, 5, 10) - - before_cutoff = global_df[global_df['timestamp'] < may_10_2025] - after_cutoff = global_df[global_df['timestamp'] >= may_10_2025] - - # Calculate coverage before and after - before_total = len(before_cutoff) - before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum() - before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0 - - after_total = len(after_cutoff) - after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum() - after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0 - - f.write(f"Before May 10th, 2025:\n") - f.write(f" Records: {before_total}\n") - f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n") - - f.write(f"After May 10th, 2025:\n") - f.write(f" Records: {after_total}\n") - f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n") - - # Check for agents that had data before but not after - if before_total > 0 and after_total > 0: - agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) - agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) - - missing_after = agents_before - agents_after - new_after = agents_after - agents_before - - if missing_after: - f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n") - - # For each missing agent, show the last date with adjusted_apr - for agent_id in missing_after: - agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) & - (before_cutoff['adjusted_apr'].notna())] - if not agent_data.empty: - last_date = agent_data['timestamp'].max() - agent_name = agent_data['agent_name'].iloc[0] - f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n") - - if new_after: - f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n") - - logger.info(f"Adjusted APR report generated: {report_path}") - return report_path + main()