diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,3554 +1,36 @@
-import requests
-import pandas as pd
-import gradio as gr
-import plotly.graph_objects as go
-import plotly.express as px
-from plotly.subplots import make_subplots
-from datetime import datetime, timedelta
-import json
-# Commenting out blockchain-related imports that cause loading issues
-# from web3 import Web3
-import os
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
-import random
-import logging
-from typing import List, Dict, Any, Optional
-# Comment out the import for now and replace with dummy functions
-# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
-# Import APR vs agent hash visualization functions
-from apr_vs_agent_hash import generate_apr_vs_agent_hash_visualizations
-# APR visualization functions integrated directly
-
-# Set up logging with appropriate verbosity
-logging.basicConfig(
- level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity
- format="%(asctime)s - %(levelname)s - %(message)s",
- handlers=[
- logging.FileHandler("app_debug.log"), # Log to file for persistence
- logging.StreamHandler() # Also log to console
- ]
-)
-logger = logging.getLogger(__name__)
-
-# Reduce third-party library logging
-logging.getLogger("urllib3").setLevel(logging.WARNING)
-logging.getLogger("httpx").setLevel(logging.WARNING)
-logging.getLogger("matplotlib").setLevel(logging.WARNING)
-
-# Log the startup information
-logger.info("============= APPLICATION STARTING =============")
-logger.info(f"Running from directory: {os.getcwd()}")
-
-# Global variables to store the data for reuse
-global_df = None
-global_roi_df = None
-global_volume_df = None
-
-# Configuration
-API_BASE_URL = "https://afmdb.autonolas.tech"
-logger.info(f"Using API endpoint: {API_BASE_URL}")
-
-def get_agent_type_by_name(type_name: str) -> Dict[str, Any]:
- """Get agent type by name"""
- url = f"{API_BASE_URL}/api/agent-types/name/{type_name}"
- logger.debug(f"Calling API: {url}")
-
- try:
- response = requests.get(url)
- logger.debug(f"Response status: {response.status_code}")
-
- if response.status_code == 404:
- logger.error(f"Agent type '{type_name}' not found")
- return None
-
- response.raise_for_status()
- result = response.json()
- logger.debug(f"Agent type response: {result}")
- return result
- except Exception as e:
- logger.error(f"Error in get_agent_type_by_name: {e}")
- return None
-
-def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]:
- """Get attribute definition by name"""
- url = f"{API_BASE_URL}/api/attributes/name/{attr_name}"
- logger.debug(f"Calling API: {url}")
-
- try:
- response = requests.get(url)
- logger.debug(f"Response status: {response.status_code}")
-
- if response.status_code == 404:
- logger.error(f"Attribute definition '{attr_name}' not found")
- return None
-
- response.raise_for_status()
- result = response.json()
- logger.debug(f"Attribute definition response: {result}")
- return result
- except Exception as e:
- logger.error(f"Error in get_attribute_definition_by_name: {e}")
- return None
-
-def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]:
- """Get all agents of a specific type"""
- url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/"
- logger.debug(f"Calling API: {url}")
-
- try:
- response = requests.get(url)
- logger.debug(f"Response status: {response.status_code}")
-
- if response.status_code == 404:
- logger.error(f"No agents found for type ID {type_id}")
- return []
-
- response.raise_for_status()
- result = response.json()
- logger.debug(f"Agents count: {len(result)}")
- logger.debug(f"First few agents: {result[:2] if result else []}")
- return result
- except Exception as e:
- logger.error(f"Error in get_agents_by_type: {e}")
- return []
-
-def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]:
- """Get all attribute values for a specific attribute definition across all agents of a given list"""
- all_attributes = []
- logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}")
-
- # For each agent, get their attributes and filter for the one we want
- for agent in agents:
- agent_id = agent["agent_id"]
-
- # Call the /api/agents/{agent_id}/attributes/ endpoint
- url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/"
- logger.debug(f"Calling API for agent {agent_id}: {url}")
-
- try:
- response = requests.get(url, params={"limit": 1000})
-
- if response.status_code == 404:
- logger.error(f"No attributes found for agent ID {agent_id}")
- continue
-
- response.raise_for_status()
- agent_attrs = response.json()
- logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes")
-
- # Filter for the specific attribute definition ID
- filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id]
- logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes")
-
- if filtered_attrs:
- logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}")
-
- all_attributes.extend(filtered_attrs)
- except requests.exceptions.RequestException as e:
- logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}")
-
- logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}")
- return all_attributes
-
-def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str:
- """Get agent name from agent ID"""
- for agent in agents:
- if agent["agent_id"] == agent_id:
- return agent["agent_name"]
- return "Unknown"
-
-def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]:
- """Extract APR value, adjusted APR value, ROI value, volume, and timestamp from JSON value"""
- try:
- agent_id = attr.get("agent_id", "unknown")
- logger.debug(f"Extracting APR value for agent {agent_id}")
-
- # The APR value is stored in the json_value field
- if attr["json_value"] is None:
- logger.debug(f"Agent {agent_id}: json_value is None")
- return {"apr": None, "adjusted_apr": None, "roi": None, "volume": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False}
-
- # If json_value is a string, parse it
- if isinstance(attr["json_value"], str):
- logger.debug(f"Agent {agent_id}: json_value is string, parsing")
- json_data = json.loads(attr["json_value"])
- else:
- json_data = attr["json_value"]
-
- apr = json_data.get("apr")
- adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present
- timestamp = json_data.get("timestamp")
- volume = json_data.get("volume") # Extract volume if present
-
- # Extract ROI (f_i_ratio) from calculation_metrics if it exists
- roi = None
- if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None:
- roi = json_data["calculation_metrics"].get("f_i_ratio")
-
- # Try to extract volume from portfolio_snapshot if it's not directly in json_data
- if volume is None and "portfolio_snapshot" in json_data and json_data["portfolio_snapshot"] is not None:
- portfolio = json_data["portfolio_snapshot"].get("portfolio")
- if portfolio and isinstance(portfolio, dict):
- volume = portfolio.get("volume")
-
- # Extract agent_hash from json_data or portfolio_snapshot
- agent_hash = json_data.get("agent_hash")
- if agent_hash is None and "portfolio_snapshot" in json_data and json_data["portfolio_snapshot"] is not None:
- portfolio = json_data["portfolio_snapshot"].get("portfolio")
- if portfolio and isinstance(portfolio, dict):
- agent_hash = portfolio.get("agent_hash")
-
- logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, volume: {volume}, timestamp: {timestamp}, agent_hash: {agent_hash}")
-
- # Convert timestamp to datetime if it exists
- timestamp_dt = None
- if timestamp:
- timestamp_dt = datetime.fromtimestamp(timestamp)
-
- result = {
- "apr": apr,
- "adjusted_apr": adjusted_apr,
- "roi": roi,
- "volume": volume,
- "timestamp": timestamp_dt,
- "agent_id": agent_id,
- "agent_hash": agent_hash,
- "is_dummy": False
- }
- logger.debug(f"Agent {agent_id}: Extracted result: {result}")
- return result
- except (json.JSONDecodeError, KeyError, TypeError) as e:
- logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}")
- logger.error(f"Problematic json_value: {attr.get('json_value')}")
- return {"apr": None, "adjusted_apr": None, "roi": None, "volume": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False}
-
-def fetch_apr_data_from_db():
- """
- Fetch APR data from database using the API.
- """
- global global_df
- global global_roi_df
-
- logger.info("==== Starting APR data fetch ====")
-
- try:
- # Step 1: Find the Modius agent type
- logger.info("Finding Modius agent type")
- modius_type = get_agent_type_by_name("Modius")
- if not modius_type:
- logger.error("Modius agent type not found, using placeholder data")
- global_df = pd.DataFrame([])
- return global_df
-
- type_id = modius_type["type_id"]
- logger.info(f"Found Modius agent type with ID: {type_id}")
-
- # Step 2: Find the APR attribute definition
- logger.info("Finding APR attribute definition")
- apr_attr_def = get_attribute_definition_by_name("APR")
- if not apr_attr_def:
- logger.error("APR attribute definition not found, using placeholder data")
- global_df = pd.DataFrame([])
- return global_df
-
- attr_def_id = apr_attr_def["attr_def_id"]
- logger.info(f"Found APR attribute definition with ID: {attr_def_id}")
-
- # Step 3: Get all agents of type Modius
- logger.info(f"Getting all agents of type Modius (type_id: {type_id})")
- modius_agents = get_agents_by_type(type_id)
- if not modius_agents:
- logger.error("No agents of type 'Modius' found")
- global_df = pd.DataFrame([])
- return global_df
-
- logger.info(f"Found {len(modius_agents)} Modius agents")
- logger.debug(f"Modius agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in modius_agents]}")
-
- # Step 4: Fetch all APR values for Modius agents
- logger.info(f"Fetching APR values for all Modius agents (attr_def_id: {attr_def_id})")
- apr_attributes = get_attribute_values_by_type_and_attr(modius_agents, attr_def_id)
- if not apr_attributes:
- logger.error("No APR values found for 'Modius' agents")
- global_df = pd.DataFrame([])
- return global_df
-
- logger.info(f"Found {len(apr_attributes)} APR attributes total")
-
- # Step 5: Extract APR and ROI data
- logger.info("Extracting APR and ROI data from attributes")
- apr_data_list = []
- roi_data_list = []
-
- for attr in apr_attributes:
- data = extract_apr_value(attr)
- if data["timestamp"] is not None:
- # Get agent name
- agent_name = get_agent_name(attr["agent_id"], modius_agents)
- # Add agent name to the data
- data["agent_name"] = agent_name
- # Add is_dummy flag (all real data)
- data["is_dummy"] = False
-
- # Process APR data
- if data["apr"] is not None:
- # Include all APR values (including negative ones) EXCEPT zero and -100
- if data["apr"] != 0 and data["apr"] != -100:
- apr_entry = data.copy()
- apr_entry["metric_type"] = "APR"
- logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}")
- # Add to the APR data list
- apr_data_list.append(apr_entry)
- else:
- # Log that we're skipping zero or -100 values
- logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)")
-
- # Process ROI data
- if data["roi"] is not None:
- # Include all ROI values except extreme outliers
- if data["roi"] > -10 and data["roi"] < 10: # Filter extreme outliers
- roi_entry = {
- "roi": data["roi"],
- "timestamp": data["timestamp"],
- "agent_id": data["agent_id"],
- "agent_name": agent_name,
- "is_dummy": False,
- "metric_type": "ROI"
- }
- logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}")
- # Add to the ROI data list
- roi_data_list.append(roi_entry)
- else:
- # Log that we're skipping extreme outlier values
- logger.debug(f"Skipping ROI value for agent {agent_name} ({attr['agent_id']}): {data['roi']} (extreme outlier)")
-
- logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points")
-
- # Added debug for adjusted APR data after May 10th
- may_10_2025 = datetime(2025, 5, 10)
- after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
- with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
-
- logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
- logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
-
- # Log detailed information about when data began
- first_adjusted = None
- if with_adjusted_after_may_10:
- first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
- logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
-
- # Check all data for first adjusted_apr
- all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
- if all_with_adjusted:
- first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
- logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
- last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
- logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
-
- # Calculate overall coverage
- adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
- logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
-
- # Log per-agent adjusted APR statistics
- agent_stats = {}
- for record in apr_data_list:
- agent_id = record['agent_id']
- has_adjusted = record['adjusted_apr'] is not None
-
- if agent_id not in agent_stats:
- agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
-
- agent_stats[agent_id]['total'] += 1
- if has_adjusted:
- agent_stats[agent_id]['adjusted'] += 1
-
- # Log stats for agents with meaningful data
- for agent_id, stats in agent_stats.items():
- if stats['total'] > 0:
- coverage = (stats['adjusted'] / stats['total']) * 100
- if coverage > 0: # Only log agents that have at least some adjusted data
- logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
-
- # Check for gaps in adjusted APR data
- for agent_id in agent_stats:
- # Get all records for this agent
- agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
- # Sort by timestamp
- agent_records.sort(key=lambda x: x['timestamp'])
-
- # Find where adjusted APR starts and if there are gaps
- has_adjusted = False
- gap_count = 0
- streak_length = 0
- for record in agent_records:
- if record['adjusted_apr'] is not None:
- if not has_adjusted:
- has_adjusted = True
- logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
- streak_length += 1
- elif has_adjusted:
- # We had adjusted data but now it's missing
- gap_count += 1
- if streak_length > 0:
- logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
- streak_length = 0
-
- if gap_count > 0:
- logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
- elif has_adjusted:
- logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
-
- # Provide summary statistics
- agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
- agents_with_gaps = sum(1 for agent_id in agent_stats if
- any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
- i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and
- apr_data_list[i+1]['adjusted_apr'] is None
- for i in range(len(apr_data_list)-1)))
-
- logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
- if agents_with_gaps > 0:
- logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
- logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
- else:
- logger.info("No gaps detected in adjusted APR data - graph should be continuous")
-
- if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
- logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
-
- # Log agent IDs with missing adjusted_apr after May 10th
- agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
- logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
-
- # Check these same agents before May 10th
- before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
- agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
-
- # Agents that had adjusted_apr before but not after
- missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
- if missing_adjusted:
- logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
-
- # Find the last valid adjusted_apr date for these agents
- for agent_id in missing_adjusted:
- agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
- if agent_data:
- last_entry = max(agent_data, key=lambda d: d['timestamp'])
- logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
-
- # Look at the first entry after the cutoff without adjusted_apr
- agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
- if agent_after:
- first_after = min(agent_after, key=lambda d: d['timestamp'])
- logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
-
- # If the agent data has the 'adjusted_apr_key' field, log that info
- if 'adjusted_apr_key' in first_after:
- logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
-
- # Add debug logic to check for any adjusted_apr after May 10th and which agents have it
- elif len(with_adjusted_after_may_10) > 0:
- logger.info("Found adjusted_apr values after May 10th, 2025")
-
- # Group by agent and log
- agent_counts = {}
- for item in with_adjusted_after_may_10:
- agent_id = item['agent_id']
- if agent_id in agent_counts:
- agent_counts[agent_id] += 1
- else:
- agent_counts[agent_id] = 1
-
- logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
-
- # Log adjusted_apr keys used
- keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
- if keys_used:
- logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
-
- # Convert to DataFrames
- if not apr_data_list:
- logger.error("No valid APR data extracted")
- global_df = pd.DataFrame([])
- else:
- # Convert list of dictionaries to DataFrame for APR
- global_df = pd.DataFrame(apr_data_list)
-
- if not roi_data_list:
- logger.error("No valid ROI data extracted")
- global_roi_df = pd.DataFrame([])
- else:
- # Convert list of dictionaries to DataFrame for ROI
- global_roi_df = pd.DataFrame(roi_data_list)
-
- # Log the resulting dataframe
- logger.info(f"Created DataFrame with {len(global_df)} rows")
- logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
- logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
-
- # Log adjusted APR statistics if available
- if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any():
- logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}")
- logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}")
-
- # Log the difference between APR and adjusted APR
- valid_rows = global_df[global_df['adjusted_apr'].notna()]
- if not valid_rows.empty:
- avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
- max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
- min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
- logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
-
- # All values are APR type (excluding zero and -100 values)
- logger.info("All values are APR type (excluding zero and -100 values)")
- logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}")
-
- # Log the entire dataframe for debugging
- logger.debug("Final DataFrame contents:")
- for idx, row in global_df.iterrows():
- logger.debug(f"Row {idx}: {row.to_dict()}")
-
- # Add this at the end, right before returning
- logger.info("Analyzing adjusted_apr data availability...")
- log_adjusted_apr_availability(global_df)
-
- return global_df, global_roi_df
-
- except requests.exceptions.RequestException as e:
- logger.error(f"API request error: {e}")
- global_df = pd.DataFrame([])
- global_roi_df = pd.DataFrame([])
- return global_df, global_roi_df
- except Exception as e:
- logger.error(f"Error fetching APR data: {e}")
- logger.exception("Exception traceback:")
- global_df = pd.DataFrame([])
- global_roi_df = pd.DataFrame([])
- return global_df, global_roi_df
-
-def log_adjusted_apr_availability(df):
- """
- Analyzes and logs detailed information about adjusted_apr data availability.
-
- Args:
- df: DataFrame containing the APR data with adjusted_apr column
- """
- if df.empty or 'adjusted_apr' not in df.columns:
- logger.warning("No adjusted_apr data available for analysis")
- return
-
- # Get only rows with valid adjusted_apr values
- has_adjusted = df[df['adjusted_apr'].notna()]
-
- if has_adjusted.empty:
- logger.warning("No valid adjusted_apr values found in the dataset")
- return
-
- # 1. When did adjusted_apr data start?
- first_adjusted = has_adjusted['timestamp'].min()
- last_adjusted = has_adjusted['timestamp'].max()
- logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
- logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
- logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
-
- # Calculate coverage percentage
- total_records = len(df)
- records_with_adjusted = len(has_adjusted)
- coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
- logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
-
- # 2. How many agents are providing adjusted_apr?
- agents_with_adjusted = has_adjusted['agent_id'].unique()
- logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
- logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
-
- # 3. May 10th cutoff analysis
- may_10_2025 = datetime(2025, 5, 10)
- before_cutoff = df[df['timestamp'] < may_10_2025]
- after_cutoff = df[df['timestamp'] >= may_10_2025]
-
- if not before_cutoff.empty and not after_cutoff.empty:
- before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
- before_pct = (before_with_adjusted / len(before_cutoff)) * 100
-
- after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
- after_pct = (after_with_adjusted / len(after_cutoff)) * 100
-
- logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
- logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
-
- # Check which agents had data before and after
- agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
- agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
-
- missing_after = agents_before - agents_after
- if missing_after:
- logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
-
- new_after = agents_after - agents_before
- if new_after:
- logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
-
- # 4. Find date ranges for missing adjusted_apr
- # Group by agent to analyze per-agent data availability
- logger.info("=== DETAILED AGENT ANALYSIS ===")
- for agent_id in df['agent_id'].unique():
- agent_data = df[df['agent_id'] == agent_id]
- agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
-
- # Get the valid adjusted_apr values for this agent
- agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
-
- if agent_adjusted.empty:
- logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
- continue
-
- # Get the date range for this agent's data
- agent_start = agent_data['timestamp'].min()
- agent_end = agent_data['timestamp'].max()
-
- # Get the date range for adjusted_apr data
- adjusted_start = agent_adjusted['timestamp'].min()
- adjusted_end = agent_adjusted['timestamp'].max()
-
- total_agent_records = len(agent_data)
- agent_with_adjusted = len(agent_adjusted)
- coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
-
- logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
- logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
- logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
-
- # Calculate if this agent had data before/after May 10th
- if not before_cutoff.empty and not after_cutoff.empty:
- agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
- agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
-
- has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
- has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
-
- if has_before and not has_after:
- last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
- logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
- elif not has_before and has_after:
- first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
- logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
-
- # Check for gaps in adjusted_apr (periods of 24+ hours without data)
- if len(agent_adjusted) < 2:
- continue
-
- # Sort by timestamp
- sorted_data = agent_adjusted.sort_values('timestamp')
-
- # Calculate time differences between consecutive data points
- time_diffs = sorted_data['timestamp'].diff()
-
- # Find gaps larger than 24 hours
- gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
-
- if not gaps.empty:
- logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
-
- # Log the gaps
- for i, row in gaps.iterrows():
- # Find the previous timestamp before the gap
- prev_idx = sorted_data.index.get_loc(i) - 1
- prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
-
- if prev_time:
- gap_start = prev_time
- gap_end = row['timestamp']
- gap_duration = gap_end - gap_start
- logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
-
-def generate_apr_visualizations():
- """Generate APR visualizations with real data only (no dummy data)"""
- global global_df
-
- # Fetch data from database
- df, _ = fetch_apr_data_from_db()
-
- # If we got no data at all, return placeholder figures
- if df.empty:
- logger.info("No APR data available. Using fallback visualization.")
- # Create empty visualizations with a message using Plotly
- fig = go.Figure()
- fig.add_annotation(
- x=0.5, y=0.5,
- text="No APR data available",
- font=dict(size=20),
- showarrow=False
- )
- fig.update_layout(
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
- )
-
- # Save as static file for reference
- fig.write_html("modius_apr_combined_graph.html")
- fig.write_image("modius_apr_combined_graph.png")
-
- csv_file = None
- return fig, csv_file
-
- # No longer generating dummy data
- # Set global_df for access by other functions
- global_df = df
-
- # Save to CSV before creating visualizations
- csv_file = save_to_csv(df)
-
- # Only create combined time series graph
- combined_fig = create_combined_time_series_graph(df)
-
- return combined_fig, csv_file
-
-def generate_volume_visualizations():
- """Generate volume visualizations with real data only (no dummy data)"""
- global global_df
- global global_volume_df
-
- # Use the existing APR data which already contains volume
- if global_df is None or global_df.empty:
- df, _ = fetch_apr_data_from_db()
- else:
- df = global_df
-
- # Filter for records with volume data
- volume_df = df[df['volume'].notna()].copy()
-
- # Set global_volume_df for access by other functions
- global_volume_df = volume_df
-
- # If we got no data at all, return placeholder figures
- if volume_df.empty:
- logger.info("No volume data available. Using fallback visualization.")
- # Create empty visualizations with a message using Plotly
- fig = go.Figure()
- fig.add_annotation(
- x=0.5, y=0.5,
- text="No volume data available",
- font=dict(size=20),
- showarrow=False
- )
- fig.update_layout(
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
- )
-
- # Save as static file for reference
- fig.write_html("modius_volume_graph.html")
- fig.write_image("modius_volume_graph.png")
-
- csv_file = None
- return fig, csv_file
-
- # Save to CSV before creating visualizations
- csv_file = save_volume_to_csv(volume_df)
-
- # Create combined time series graph for volume
- combined_fig = create_combined_volume_time_series_graph(volume_df)
-
- return combined_fig, csv_file
-
-def save_volume_to_csv(df):
- """Save the volume data DataFrame to a CSV file and return the file path"""
- if df.empty:
- logger.error("No volume data to save to CSV")
- return None
-
- # Define the CSV file path
- csv_file = "modius_volume_values.csv"
-
- # Save to CSV
- df.to_csv(csv_file, index=False)
- logger.info(f"Volume data saved to {csv_file}")
-
- return csv_file
-
-def create_combined_volume_time_series_graph(df):
- """Create a time series graph showing volume values across all agents"""
- if len(df) == 0:
- logger.error("No data to plot combined volume graph")
- fig = go.Figure()
- fig.add_annotation(
- text="No volume data available",
- x=0.5, y=0.5,
- showarrow=False, font=dict(size=20)
- )
- return fig
-
- # IMPORTANT: Force data types to ensure consistency
- df['volume'] = df['volume'].astype(float) # Ensure volume is float
-
- # Get min and max time for shapes
- min_time = df['timestamp'].min()
- max_time = df['timestamp'].max()
-
- # Use the actual start date from the data
- x_start_date = min_time
-
- # CRITICAL: Log the exact dataframe we're using for plotting to help debug
- logger.info(f"Volume Graph data - shape: {df.shape}, columns: {df.columns}")
- logger.info(f"Volume Graph data - unique agents: {df['agent_name'].unique().tolist()}")
- logger.info(f"Volume Graph data - min volume: {df['volume'].min()}, max volume: {df['volume'].max()}")
-
- # Export full dataframe to CSV for debugging
- debug_csv = "debug_volume_data.csv"
- df.to_csv(debug_csv)
- logger.info(f"Exported volume graph data to {debug_csv} for debugging")
-
- # Create Plotly figure in a clean state
- fig = go.Figure()
-
- # Add background shape for volume region
- fig.add_shape(
- type="rect",
- fillcolor="rgba(230, 243, 255, 0.3)",
- line=dict(width=0),
- y0=0, y1=df['volume'].max() * 1.1, # Use a reasonable upper limit for volume
- x0=min_time, x1=max_time,
- layer="below"
- )
-
- # Add zero line
- fig.add_shape(
- type="line",
- line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0,
- x0=min_time, x1=max_time
- )
-
- # Group by timestamp and calculate mean volume
- avg_volume_data = df.groupby('timestamp')['volume'].mean().reset_index()
-
- # Sort by timestamp
- avg_volume_data = avg_volume_data.sort_values('timestamp')
-
- # Log the average volume data
- logger.info(f"Calculated average volume data with {len(avg_volume_data)} points")
- for idx, row in avg_volume_data.iterrows():
- logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_volume={row['volume']}")
-
- # Calculate moving average based on a time window (3 days)
- # Sort data by timestamp
- df_sorted = df.sort_values('timestamp')
-
- # Create a new dataframe for the moving average
- avg_volume_data_with_ma = avg_volume_data.copy()
- avg_volume_data_with_ma['moving_avg'] = None # Initialize the moving average column
-
- # Define the time window for the moving average (3 days)
- time_window = pd.Timedelta(days=3)
- logger.info(f"Calculating moving average with time window of {time_window}")
-
- # Calculate the moving averages for each timestamp
- for i, row in avg_volume_data_with_ma.iterrows():
- current_time = row['timestamp']
- window_start = current_time - time_window
-
- # Get all data points within the 3-day time window
- window_data = df_sorted[
- (df_sorted['timestamp'] >= window_start) &
- (df_sorted['timestamp'] <= current_time)
- ]
-
- # Calculate the average volume for the 3-day time window
- if not window_data.empty:
- avg_volume_data_with_ma.at[i, 'moving_avg'] = window_data['volume'].mean()
- logger.debug(f"Volume time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['volume'].mean()}")
- else:
- # If no data points in the window, use the current value
- avg_volume_data_with_ma.at[i, 'moving_avg'] = row['volume']
- logger.debug(f"No data points in time window for {current_time}, using current value {row['volume']}")
-
- logger.info(f"Calculated time-based moving averages with {len(avg_volume_data_with_ma)} points")
-
- # Find the last date where we have valid moving average data
- last_valid_ma_date = avg_volume_data_with_ma[avg_volume_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_volume_data_with_ma['moving_avg'].dropna().empty else None
-
- # If we don't have any valid moving average data, use the max time from the original data
- last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
-
- logger.info(f"Last valid moving average date: {last_valid_ma_date}")
- logger.info(f"Using last valid date for graph: {last_valid_date}")
-
- # Plot individual agent data points with agent names in hover, but limit display for scalability
- if not df.empty:
- # Group by agent to use different colors for each agent
- unique_agents = df['agent_name'].unique()
- colors = px.colors.qualitative.Plotly[:len(unique_agents)]
-
- # Create a color map for agents
- color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
-
- # Calculate the total number of data points per agent to determine which are most active
- agent_counts = df['agent_name'].value_counts()
-
- # Determine how many agents to show individually (limit to top 5 most active)
- MAX_VISIBLE_AGENTS = 5
- top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
-
- logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
-
- # Add data points for each agent, but only make top agents visible by default
- for agent_name in unique_agents:
- agent_data = df[df['agent_name'] == agent_name]
-
- # Explicitly convert to Python lists
- x_values = agent_data['timestamp'].tolist()
- y_values = agent_data['volume'].tolist()
-
- # Change default visibility to False to hide all agent data points
- is_visible = False
-
- # Add data points as markers for volume
- fig.add_trace(
- go.Scatter(
- x=x_values,
- y=y_values,
- mode='markers', # Only markers for original data
- marker=dict(
- color=color_map[agent_name],
- symbol='circle',
- size=10,
- line=dict(width=1, color='black')
- ),
- name=f'Agent: {agent_name} (Volume)',
- hovertemplate='Time: %{x}
Volume: %{y:.2f}
Agent: ' + agent_name + '',
- visible=is_visible # All agents hidden by default
- )
- )
- logger.info(f"Added volume data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
-
- # Add volume moving average as a smooth line
- x_values_ma = avg_volume_data_with_ma['timestamp'].tolist()
- y_values_ma = avg_volume_data_with_ma['moving_avg'].tolist()
-
- # Create hover template for the volume moving average line
- hover_data_volume = []
- for idx, row in avg_volume_data_with_ma.iterrows():
- timestamp = row['timestamp']
- # Format timestamp to show only up to seconds (not milliseconds)
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-
- # Calculate number of active agents in the last 24 hours
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
- active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
- (df['timestamp'] <= timestamp)]['agent_id'].unique())
-
- hover_data_volume.append(
- f"Time: {formatted_timestamp}
Avg Volume (3d window): {row['moving_avg']:.2f}
Active agents (24h): {active_agents}"
- )
-
- fig.add_trace(
- go.Scatter(
- x=x_values_ma,
- y=y_values_ma,
- mode='lines', # Only lines for moving average
- line=dict(color='purple', width=2), # Purple line for volume
- name='Average Volume (3d window)',
- hovertext=hover_data_volume,
- hoverinfo='text',
- visible=True # Visible by default
- )
- )
- logger.info(f"Added 3-day moving average volume trace with {len(x_values_ma)} points")
-
- # Update layout
- fig.update_layout(
- title=dict(
- text="Modius Agents Volume",
- font=dict(
- family="Arial, sans-serif",
- size=22,
- color="black",
- weight="bold"
- )
- ),
- xaxis_title=None, # Remove x-axis title to use annotation instead
- yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
- template="plotly_white",
- height=600, # Reduced height for better fit on smaller screens
- autosize=True, # Enable auto-sizing for responsiveness
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1,
- groupclick="toggleitem"
- ),
- margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
- hovermode="closest"
- )
-
- # Add single annotation for y-axis
- fig.add_annotation(
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
- y=df['volume'].max() / 2, # Center of the y-axis
- xref="paper",
- yref="y",
- text="Volume",
- showarrow=False,
- font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- textangle=-90, # Rotate text to be vertical
- align="center"
- )
-
- # Update layout for legend
- fig.update_layout(
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1,
- groupclick="toggleitem",
- font=dict(
- family="Arial, sans-serif",
- size=14, # Adjusted font size
- color="black",
- weight="bold"
- )
- )
- )
-
- # Update y-axis with autoscaling for volume
- fig.update_yaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- autorange=True, # Enable autoscaling for volume
- tickformat=".2f", # Format tick labels with 2 decimal places
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove the built-in axis title since we're using annotations
- )
-
- # Update x-axis with better formatting and fixed range
- fig.update_xaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- # Set fixed range with start date and ending at the last valid date
- autorange=False, # Disable autoscaling
- range=[x_start_date, last_valid_date], # Set fixed range from start date to last valid date
- tickformat="%b %d", # Simplified date format without time
- tickangle=-30, # Angle the labels for better readability
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove built-in title to use annotation instead
- )
-
- try:
- # Save the figure
- graph_file = "modius_volume_graph.html"
- fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
-
- # Also save as image for compatibility
- img_file = "modius_volume_graph.png"
- try:
- fig.write_image(img_file)
- logger.info(f"Volume graph saved to {graph_file} and {img_file}")
- except Exception as e:
- logger.error(f"Error saving volume image: {e}")
- logger.info(f"Volume graph saved to {graph_file} only")
-
- # Return the figure object for direct use in Gradio
- return fig
- except Exception as e:
- # If the complex graph approach fails, create a simpler one
- logger.error(f"Error creating advanced volume graph: {e}")
- logger.info("Falling back to simpler volume graph")
-
- # Create a simpler graph as fallback
- simple_fig = go.Figure()
-
- # Add zero line
- simple_fig.add_shape(
- type="line",
- line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0,
- x0=min_time, x1=max_time
- )
-
- # Simply plot the average volume data with moving average
- if not avg_volume_data.empty:
- # Add moving average as a line
- simple_fig.add_trace(
- go.Scatter(
- x=avg_volume_data_with_ma['timestamp'],
- y=avg_volume_data_with_ma['moving_avg'],
- mode='lines',
- name='Average Volume (3d window)',
- line=dict(width=2, color='purple') # Purple line for volume
- )
- )
-
- # Simplified layout with adjusted y-axis range
- simple_fig.update_layout(
- title=dict(
- text="Modius Agents Volume",
- font=dict(
- family="Arial, sans-serif",
- size=22,
- color="black",
- weight="bold"
- )
- ),
- xaxis_title=None,
- yaxis_title=None,
- template="plotly_white",
- height=600,
- autosize=True,
- margin=dict(r=30, l=120, t=40, b=50)
- )
-
- # Update y-axis with autoscaling for volume
- simple_fig.update_yaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- autorange=True, # Enable autoscaling for volume
- tickformat=".2f",
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),
- title=None # Remove the built-in axis title since we're using annotations
- )
-
- # Update x-axis with better formatting and fixed range
- simple_fig.update_xaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- autorange=False,
- range=[x_start_date, max_time],
- tickformat="%b %d",
- tickangle=-30,
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")
- )
-
- # Save the figure
- graph_file = "modius_volume_graph.html"
- simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
-
- # Return the simple figure
- return simple_fig
-
-def generate_roi_visualizations():
- """Generate ROI visualizations with real data only (no dummy data)"""
- global global_roi_df
-
- # Fetch data from database if not already fetched
- if global_roi_df is None or global_roi_df.empty:
- _, df_roi = fetch_apr_data_from_db()
- else:
- df_roi = global_roi_df
-
- # If we got no data at all, return placeholder figures
- if df_roi.empty:
- logger.info("No ROI data available. Using fallback visualization.")
- # Create empty visualizations with a message using Plotly
- fig = go.Figure()
- fig.add_annotation(
- x=0.5, y=0.5,
- text="No ROI data available",
- font=dict(size=20),
- showarrow=False
- )
- fig.update_layout(
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
- )
-
- # Save as static file for reference
- fig.write_html("modius_roi_graph.html")
- fig.write_image("modius_roi_graph.png")
-
- csv_file = None
- return fig, csv_file
-
- # Set global_roi_df for access by other functions
- global_roi_df = df_roi
-
- # Save to CSV before creating visualizations
- csv_file = save_roi_to_csv(df_roi)
-
- # Create combined time series graph for ROI
- combined_fig = create_combined_roi_time_series_graph(df_roi)
-
- return combined_fig, csv_file
-
-def create_combined_roi_time_series_graph(df):
- """Create a time series graph showing average ROI values across all agents"""
- if len(df) == 0:
- logger.error("No data to plot combined ROI graph")
- fig = go.Figure()
- fig.add_annotation(
- text="No ROI data available",
- x=0.5, y=0.5,
- showarrow=False, font=dict(size=20)
- )
- return fig
-
- # Define fixed start date (February 1, 2025)
- fixed_start_date = datetime(2025, 2, 1)
- logger.info(f"Using fixed start date for ROI runtime calculation: {fixed_start_date}")
-
- # Calculate runtime for each agent from fixed start date
- agent_runtimes = {}
- for agent_id in df['agent_id'].unique():
- agent_data = df[df['agent_id'] == agent_id]
- agent_name = agent_data['agent_name'].iloc[0]
- last_report = agent_data['timestamp'].max()
- runtime_days = (last_report - fixed_start_date).total_seconds() / (24 * 3600) # Convert to days
- agent_runtimes[agent_id] = {
- 'agent_name': agent_name,
- 'last_report': last_report,
- 'runtime_days': runtime_days
- }
-
- # Calculate average runtime
- avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0
- logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days")
-
- # Log individual agent runtimes for debugging
- for agent_id, data in agent_runtimes.items():
- logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
-
- # IMPORTANT: Force data types to ensure consistency
- df['roi'] = df['roi'].astype(float) # Ensure ROI is float
- # Convert ROI values to percentages (multiply by 100)
- df['roi'] = df['roi'] * 100
- df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
-
- # Get min and max time for shapes
- min_time = df['timestamp'].min()
- max_time = df['timestamp'].max()
-
- # Use the actual start date from the data instead of a fixed date
- x_start_date = min_time
-
- # CRITICAL: Log the exact dataframe we're using for plotting to help debug
- logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}")
- logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}")
- logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}")
-
- # Export full dataframe to CSV for debugging
- debug_csv = "debug_roi_data.csv"
- df.to_csv(debug_csv)
- logger.info(f"Exported ROI graph data to {debug_csv} for debugging")
-
- # Create Plotly figure in a clean state
- fig = go.Figure()
-
- # Get min and max time for shapes
- min_time = df['timestamp'].min()
- max_time = df['timestamp'].max()
-
- # Add background shapes for positive and negative regions
- # Add shape for positive ROI region (above zero)
- fig.add_shape(
- type="rect",
- fillcolor="rgba(230, 243, 255, 0.3)",
- line=dict(width=0),
- y0=0, y1=100, # Use a fixed positive value (percentage)
- x0=min_time, x1=max_time,
- layer="below"
- )
-
- # Add shape for negative ROI region (below zero)
- fig.add_shape(
- type="rect",
- fillcolor="rgba(255, 230, 230, 0.3)",
- line=dict(width=0),
- y0=-100, y1=0, # Use a fixed negative value (percentage)
- x0=min_time, x1=max_time,
- layer="below"
- )
-
- # Add zero line
- fig.add_shape(
- type="line",
- line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0,
- x0=min_time, x1=max_time
- )
-
- # Filter out outliers (ROI values above 200% or below -200%)
- outlier_data = df[(df['roi'] > 200) | (df['roi'] < -200)].copy()
- df_filtered = df[(df['roi'] <= 200) & (df['roi'] >= -200)].copy()
-
- # Log the outliers for better debugging
- if len(outlier_data) > 0:
- excluded_count = len(outlier_data)
- logger.info(f"Excluded {excluded_count} data points with outlier ROI values (>200% or <-200%)")
-
- # Group outliers by agent for detailed logging
- outlier_agents = outlier_data.groupby('agent_name')
- for agent_name, agent_outliers in outlier_agents:
- logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
- for idx, row in agent_outliers.iterrows():
- logger.info(f" - ROI: {row['roi']}, timestamp: {row['timestamp']}")
-
- # Use the filtered data for all subsequent operations
- df = df_filtered
-
- # Group by timestamp and calculate mean ROI
- avg_roi_data = df.groupby('timestamp')['roi'].mean().reset_index()
-
- # Sort by timestamp
- avg_roi_data = avg_roi_data.sort_values('timestamp')
-
- # Log the average ROI data
- logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points")
- for idx, row in avg_roi_data.iterrows():
- logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}")
-
- # Calculate moving average based on a time window (3 days)
- # Sort data by timestamp
- df_sorted = df.sort_values('timestamp')
-
- # Create a new dataframe for the moving average
- avg_roi_data_with_ma = avg_roi_data.copy()
- avg_roi_data_with_ma['moving_avg'] = None # Initialize the moving average column
-
- # Define the time window for the moving average (3 days)
- time_window = pd.Timedelta(days=3)
- logger.info(f"Calculating moving average with time window of {time_window}")
-
- # Calculate the moving averages for each timestamp
- for i, row in avg_roi_data_with_ma.iterrows():
- current_time = row['timestamp']
- window_start = current_time - time_window
-
- # Get all data points within the 3-day time window
- window_data = df_sorted[
- (df_sorted['timestamp'] >= window_start) &
- (df_sorted['timestamp'] <= current_time)
- ]
-
- # Calculate the average ROI for the 3-day time window
- if not window_data.empty:
- avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean()
- logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}")
- else:
- # If no data points in the window, use the current value
- avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi']
- logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}")
-
- logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points")
-
- # Find the last date where we have valid moving average data
- last_valid_ma_date = avg_roi_data_with_ma[avg_roi_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_roi_data_with_ma['moving_avg'].dropna().empty else None
-
- # If we don't have any valid moving average data, use the max time from the original data
- last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
-
- logger.info(f"Last valid moving average date: {last_valid_ma_date}")
- logger.info(f"Using last valid date for graph: {last_valid_date}")
-
- # Plot individual agent data points with agent names in hover, but limit display for scalability
- if not df.empty:
- # Group by agent to use different colors for each agent
- unique_agents = df['agent_name'].unique()
- colors = px.colors.qualitative.Plotly[:len(unique_agents)]
-
- # Create a color map for agents
- color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
-
- # Calculate the total number of data points per agent to determine which are most active
- agent_counts = df['agent_name'].value_counts()
-
- # Determine how many agents to show individually (limit to top 5 most active)
- MAX_VISIBLE_AGENTS = 5
- top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
-
- logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
-
- # Add data points for each agent, but only make top agents visible by default
- for agent_name in unique_agents:
- agent_data = df[df['agent_name'] == agent_name]
-
- # Explicitly convert to Python lists
- x_values = agent_data['timestamp'].tolist()
- y_values = agent_data['roi'].tolist()
-
- # Change default visibility to False to hide all agent data points
- is_visible = False
-
- # Add data points as markers for ROI
- fig.add_trace(
- go.Scatter(
- x=x_values,
- y=y_values,
- mode='markers', # Only markers for original data
- marker=dict(
- color=color_map[agent_name],
- symbol='circle',
- size=10,
- line=dict(width=1, color='black')
- ),
- name=f'Agent: {agent_name} (ROI)',
- hovertemplate='Time: %{x}
ROI: %{y:.2f}
Agent: ' + agent_name + '',
- visible=is_visible # All agents hidden by default
- )
- )
- logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
-
- # Add ROI moving average as a smooth line
- x_values_ma = avg_roi_data_with_ma['timestamp'].tolist()
- y_values_ma = avg_roi_data_with_ma['moving_avg'].tolist()
-
- # Create hover template for the ROI moving average line
- hover_data_roi = []
- for idx, row in avg_roi_data_with_ma.iterrows():
- timestamp = row['timestamp']
- # Format timestamp to show only up to seconds (not milliseconds)
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-
- # Calculate number of active agents in the last 24 hours
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
- active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
- (df['timestamp'] <= timestamp)]['agent_id'].unique())
-
- hover_data_roi.append(
- f"Time: {formatted_timestamp}
Avg ROI (3d window): {row['moving_avg']:.2f}%
Active agents (24h): {active_agents}"
- )
-
- fig.add_trace(
- go.Scatter(
- x=x_values_ma,
- y=y_values_ma,
- mode='lines', # Only lines for moving average
- line=dict(color='blue', width=2), # Thinner line
- name='Average ROI (3d window)',
- hovertext=hover_data_roi,
- hoverinfo='text',
- visible=True # Visible by default
- )
- )
- logger.info(f"Added 3-day moving average ROI trace with {len(x_values_ma)} points")
-
- # Update layout with average runtime information in the title
- fig.update_layout(
- title=dict(
- text=f"Modius Agents ROI (over avg. {avg_runtime:.1f} days runtime)",
- font=dict(
- family="Arial, sans-serif",
- size=22,
- color="black",
- weight="bold"
- )
- ),
- xaxis_title=None, # Remove x-axis title to use annotation instead
- yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
- template="plotly_white",
- height=600, # Reduced height for better fit on smaller screens
- autosize=True, # Enable auto-sizing for responsiveness
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1,
- groupclick="toggleitem"
- ),
- margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
- hovermode="closest"
- )
-
- # Add single annotation for y-axis
- fig.add_annotation(
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
- y=0, # Center of the y-axis
- xref="paper",
- yref="y",
- text="ROI [%]",
- showarrow=False,
- font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- textangle=-90, # Rotate text to be vertical
- align="center"
- )
-
- # Update layout for legend
- fig.update_layout(
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1,
- groupclick="toggleitem",
- font=dict(
- family="Arial, sans-serif",
- size=14, # Adjusted font size
- color="black",
- weight="bold"
- )
- )
- )
-
- # Update y-axis with fixed range of -100% to +100% for ROI
- fig.update_yaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- # Use fixed range instead of autoscaling
- autorange=False, # Disable autoscaling
- range=[-100, 100], # Set fixed range from -100% to +100%
- tickformat=".2f", # Format tick labels with 2 decimal places
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove the built-in axis title since we're using annotations
- )
-
- # Update x-axis with better formatting and fixed range
- fig.update_xaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- # Set fixed range with start date and ending at the last valid date
- autorange=False, # Disable autoscaling
- range=[x_start_date, last_valid_date], # Set fixed range from start date to last valid date
- tickformat="%b %d", # Simplified date format without time
- tickangle=-30, # Angle the labels for better readability
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove built-in title to use annotation instead
- )
-
- try:
- # Save the figure
- graph_file = "modius_roi_graph.html"
- fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
-
- # Also save as image for compatibility
- img_file = "modius_roi_graph.png"
- try:
- fig.write_image(img_file)
- logger.info(f"ROI graph saved to {graph_file} and {img_file}")
- except Exception as e:
- logger.error(f"Error saving ROI image: {e}")
- logger.info(f"ROI graph saved to {graph_file} only")
-
- # Return the figure object for direct use in Gradio
- return fig
- except Exception as e:
- # If the complex graph approach fails, create a simpler one
- logger.error(f"Error creating advanced ROI graph: {e}")
- logger.info("Falling back to Simpler ROI graph")
-
- # Create a simpler graph as fallback
- simple_fig = go.Figure()
-
- # Add zero line
- simple_fig.add_shape(
- type="line",
- line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0,
- x0=min_time, x1=max_time
- )
-
- # Simply plot the average ROI data with moving average
- if not avg_roi_data.empty:
- # Add moving average as a line
- simple_fig.add_trace(
- go.Scatter(
- x=avg_roi_data_with_ma['timestamp'],
- y=avg_roi_data_with_ma['moving_avg'],
- mode='lines',
- name='Average ROI (3d window)',
- line=dict(width=2, color='blue') # Thinner line
- )
- )
-
- # Simplified layout with adjusted y-axis range
- simple_fig.update_layout(
- title=dict(
- text="Modius Agents ROI",
- font=dict(
- family="Arial, sans-serif",
- size=22,
- color="black",
- weight="bold"
- )
- ),
- xaxis_title=None,
- yaxis_title=None,
- template="plotly_white",
- height=600,
- autosize=True,
- margin=dict(r=30, l=120, t=40, b=50)
- )
-
- # Update y-axis with fixed range of -100% to +100% for ROI
- simple_fig.update_yaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- autorange=False,
- range=[-100, 100],
- tickformat=".2f",
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),
- title=None # Remove the built-in axis title since we're using annotations
- )
-
- # Update x-axis with better formatting and fixed range
- simple_fig.update_xaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- autorange=False,
- range=[x_start_date, max_time],
- tickformat="%b %d",
- tickangle=-30,
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")
- )
-
- # Save the figure
- graph_file = "modius_roi_graph.html"
- simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
-
- # Return the simple figure
- return simple_fig
-
-def save_roi_to_csv(df):
- """Save the ROI data DataFrame to a CSV file and return the file path"""
- if df.empty:
- logger.error("No ROI data to save to CSV")
- return None
-
- # Define the CSV file path
- csv_file = "modius_roi_values.csv"
-
- # Save to CSV
- df.to_csv(csv_file, index=False)
- logger.info(f"ROI data saved to {csv_file}")
-
- return csv_file
-
-def create_time_series_graph_per_agent(df):
- """Create a time series graph for each agent using Plotly"""
- # Get unique agents
- unique_agents = df['agent_id'].unique()
-
- if len(unique_agents) == 0:
- logger.error("No agent data to plot")
- fig = go.Figure()
- fig.add_annotation(
- text="No agent data available",
- x=0.5, y=0.5,
- showarrow=False, font=dict(size=20)
- )
- return fig
-
- # Create a subplot figure for each agent
- fig = make_subplots(rows=len(unique_agents), cols=1,
- subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}"
- for agent_id in unique_agents],
- vertical_spacing=0.1)
-
- # Plot data for each agent
- for i, agent_id in enumerate(unique_agents):
- agent_data = df[df['agent_id'] == agent_id].copy()
- agent_name = agent_data['agent_name'].iloc[0]
- row = i + 1
-
- # Add zero line to separate APR and Performance
- fig.add_shape(
- type="line", line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
- row=row, col=1
- )
-
- # Add background colors
- fig.add_shape(
- type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0),
- y0=0, y1=1000, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
- row=row, col=1, layer="below"
- )
- fig.add_shape(
- type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0),
- y0=-1000, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
- row=row, col=1, layer="below"
- )
-
- # Create separate dataframes for different data types
- apr_data = agent_data[agent_data['metric_type'] == 'APR']
- perf_data = agent_data[agent_data['metric_type'] == 'Performance']
-
- # Sort all data by timestamp for the line plots
- combined_agent_data = agent_data.sort_values('timestamp')
-
- # Add main line connecting all points
- fig.add_trace(
- go.Scatter(
- x=combined_agent_data['timestamp'],
- y=combined_agent_data['apr'],
- mode='lines',
- line=dict(color='purple', width=2),
- name=f'{agent_name}',
- legendgroup=agent_name,
- showlegend=(i == 0), # Only show in legend once
- hovertemplate='Time: %{x}
Value: %{y:.2f}'
- ),
- row=row, col=1
- )
-
- # Add scatter points for APR values
- if not apr_data.empty:
- fig.add_trace(
- go.Scatter(
- x=apr_data['timestamp'],
- y=apr_data['apr'],
- mode='markers',
- marker=dict(color='blue', size=10, symbol='circle'),
- name='APR',
- legendgroup='APR',
- showlegend=(i == 0),
- hovertemplate='Time: %{x}
APR: %{y:.2f}'
- ),
- row=row, col=1
- )
-
- # Add scatter points for Performance values
- if not perf_data.empty:
- fig.add_trace(
- go.Scatter(
- x=perf_data['timestamp'],
- y=perf_data['apr'],
- mode='markers',
- marker=dict(color='red', size=10, symbol='square'),
- name='Performance',
- legendgroup='Performance',
- showlegend=(i == 0),
- hovertemplate='Time: %{x}
Performance: %{y:.2f}'
- ),
- row=row, col=1
- )
-
- # Update axes
- fig.update_xaxes(title_text="Time", row=row, col=1)
- fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)')
-
- # Update layout
- fig.update_layout(
- height=400 * len(unique_agents),
- width=1000,
- title_text="APR and Performance Values per Agent",
- template="plotly_white",
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1
- ),
- margin=dict(r=20, l=20, t=30, b=20),
- hovermode="closest"
- )
-
- # Save the figure (still useful for reference)
- graph_file = "modius_apr_per_agent_graph.html"
- fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
-
- # Also save as image for compatibility
- img_file = "modius_apr_per_agent_graph.png"
- fig.write_image(img_file)
-
- logger.info(f"Per-agent graph saved to {graph_file} and {img_file}")
-
- # Return the figure object for direct use in Gradio
- return fig
-
-def write_debug_info(df, fig):
- """Minimal debug info function"""
- try:
- # Just log minimal information
- logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces")
- return True
- except Exception as e:
- logger.error(f"Error writing debug info: {e}")
- return False
-
-def create_combined_time_series_graph(df):
- """Create a time series graph showing average APR values across all agents"""
- if len(df) == 0:
- logger.error("No data to plot combined graph")
- fig = go.Figure()
- fig.add_annotation(
- text="No data available",
- x=0.5, y=0.5,
- showarrow=False, font=dict(size=20)
- )
- return fig
-
- # IMPORTANT: Force data types to ensure consistency
- df['apr'] = df['apr'].astype(float) # Ensure APR is float
- df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
-
- # Get min and max time for shapes
- min_time = df['timestamp'].min()
- max_time = df['timestamp'].max()
-
- # Use April 17th, 2025 as the fixed start date for APR graph
- x_start_date = datetime(2025, 4, 17)
-
- # CRITICAL: Log the exact dataframe we're using for plotting to help debug
- logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}")
- logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}")
- logger.info("Graph data - all positive APR values only")
- logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}")
-
- # Export full dataframe to CSV for debugging
- debug_csv = "debug_graph_data.csv"
- df.to_csv(debug_csv)
- logger.info(f"Exported graph data to {debug_csv} for debugging")
-
- # Write detailed data report
- with open("debug_graph_data_report.txt", "w") as f:
- f.write("==== GRAPH DATA REPORT ====\n\n")
- f.write(f"Total data points: {len(df)}\n")
- f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n")
-
- # Output per-agent details
- unique_agents = df['agent_id'].unique()
- f.write(f"Number of agents: {len(unique_agents)}\n\n")
-
- for agent_id in unique_agents:
- agent_data = df[df['agent_id'] == agent_id]
- agent_name = agent_data['agent_name'].iloc[0]
-
- f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n")
- f.write(f" Total data points: {len(agent_data)}\n")
-
- apr_data = agent_data[agent_data['metric_type'] == 'APR']
-
- f.write(f" APR data points: {len(apr_data)}\n")
-
- if not apr_data.empty:
- f.write(f" APR values: {apr_data['apr'].tolist()}\n")
- f.write(f" APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n")
-
- f.write("\n")
-
- logger.info("Generated detailed graph data report")
-
- # ENSURE THERE ARE NO CONFLICTING AXES OR TRACES
- # Create Plotly figure in a clean state
- fig = go.Figure()
-
- # Enable autoscaling instead of fixed ranges
- logger.info("Using autoscaling for axes ranges")
-
- # Add background shapes for APR and Performance regions
- min_time = df['timestamp'].min()
- max_time = df['timestamp'].max()
-
- # Add shape for positive APR region (above zero)
- fig.add_shape(
- type="rect",
- fillcolor="rgba(230, 243, 255, 0.3)",
- line=dict(width=0),
- y0=0, y1=100, # Use a fixed positive value
- x0=min_time, x1=max_time,
- layer="below"
- )
-
- # Add shape for negative APR region (below zero)
- fig.add_shape(
- type="rect",
- fillcolor="rgba(255, 230, 230, 0.3)",
- line=dict(width=0),
- y0=-100, y1=0, # Use a fixed negative value
- x0=min_time, x1=max_time,
- layer="below"
- )
-
- # Add zero line
- fig.add_shape(
- type="line",
- line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0,
- x0=min_time, x1=max_time
- )
-
- # MODIFIED: Calculate average APR values across all agents for each timestamp
- # Filter for APR data only
- apr_data = df[df['metric_type'] == 'APR'].copy()
-
- # Filter out outliers (APR values above 200 or below -200)
- outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
- apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
-
- # Log the outliers for better debugging
- if len(outlier_data) > 0:
- excluded_count = len(outlier_data)
- logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
-
- # Group outliers by agent for detailed logging
- outlier_agents = outlier_data.groupby('agent_name')
- for agent_name, agent_outliers in outlier_agents:
- logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
- for idx, row in agent_outliers.iterrows():
- logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}")
-
- # Use the filtered data for all subsequent operations
- apr_data = apr_data_filtered
-
- # Group by timestamp and calculate mean APR
- avg_apr_data = apr_data.groupby('timestamp')['apr'].mean().reset_index()
-
- # Sort by timestamp
- avg_apr_data = avg_apr_data.sort_values('timestamp')
-
- # Log the average APR data
- logger.info(f"Calculated average APR data with {len(avg_apr_data)} points")
- for idx, row in avg_apr_data.iterrows():
- logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}")
-
- # Calculate moving average based on a time window (2 hours)
- # Sort data by timestamp
- apr_data_sorted = apr_data.sort_values('timestamp')
-
- # Create a new dataframe for the moving average
- avg_apr_data_with_ma = avg_apr_data.copy()
- avg_apr_data_with_ma['moving_avg'] = None # Initialize the moving average column
-
- # Define the time window for the moving average (3 days)
- time_window = pd.Timedelta(days=3)
- logger.info(f"Calculating moving average with time window of {time_window}")
-
- # Calculate moving averages: one for APR and one for adjusted APR
- avg_apr_data_with_ma['moving_avg'] = None # 3-day window for APR
- avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
-
- # Keep track of the last valid adjusted_moving_avg value to handle gaps
- last_valid_adjusted_moving_avg = None
-
- # Calculate the moving averages for each timestamp
- for i, row in avg_apr_data_with_ma.iterrows():
- current_time = row['timestamp']
- window_start = current_time - time_window
-
- # Get all data points within the 3-day time window
- window_data = apr_data_sorted[
- (apr_data_sorted['timestamp'] >= window_start) &
- (apr_data_sorted['timestamp'] <= current_time)
- ]
-
- # Calculate the average APR for the 3-day time window
- if not window_data.empty:
- avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
- logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
-
- # Calculate adjusted APR moving average if data exists
- has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any()
- if has_adjusted_apr:
- adjusted_avg = window_data['adjusted_apr'].dropna().mean()
- avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg
- last_valid_adjusted_moving_avg = adjusted_avg
- logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}")
- else:
- # If we don't have adjusted_apr data in this window but had some previously,
- # use the last valid value to maintain continuity in the graph
- if last_valid_adjusted_moving_avg is not None:
- avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
- logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
- else:
- # If no data points in the window, use the current value
- avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
- logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}")
-
- logger.info(f"Calculated time-based moving averages with {len(avg_apr_data_with_ma)} points")
-
- # Find the last date where we have valid moving average data
- last_valid_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_apr_data_with_ma['moving_avg'].dropna().empty else None
-
- # Find the last date where we have valid adjusted moving average data
- last_valid_adj_ma_date = None
- if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
- last_valid_adj_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['adjusted_moving_avg'].notna()]['timestamp'].max()
-
- # Determine the last valid date for either moving average
- last_valid_date = last_valid_ma_date
- if last_valid_adj_ma_date is not None:
- last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date
-
- # If we don't have any valid moving average data, use the max time from the original data
- if last_valid_date is None:
- last_valid_date = df['timestamp'].max()
-
- logger.info(f"Last valid moving average date: {last_valid_ma_date}")
- logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}")
- logger.info(f"Using last valid date for graph: {last_valid_date}")
-
- # Plot individual agent data points with agent names in hover, but limit display for scalability
- if not apr_data.empty:
- # Group by agent to use different colors for each agent
- unique_agents = apr_data['agent_name'].unique()
- colors = px.colors.qualitative.Plotly[:len(unique_agents)]
-
- # Create a color map for agents
- color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
-
- # Calculate the total number of data points per agent to determine which are most active
- agent_counts = apr_data['agent_name'].value_counts()
-
- # Determine how many agents to show individually (limit to top 5 most active)
- MAX_VISIBLE_AGENTS = 5
- top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
-
- logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
-
- # Add data points for each agent, but only make top agents visible by default
- for agent_name in unique_agents:
- agent_data = apr_data[apr_data['agent_name'] == agent_name]
-
- # Explicitly convert to Python lists
- x_values = agent_data['timestamp'].tolist()
- y_values = agent_data['apr'].tolist()
-
- # Change default visibility to False to hide all agent data points
- is_visible = False
-
- # Add data points as markers for APR
- fig.add_trace(
- go.Scatter(
- x=x_values,
- y=y_values,
- mode='markers', # Only markers for original data
- marker=dict(
- color=color_map[agent_name],
- symbol='circle',
- size=10,
- line=dict(width=1, color='black')
- ),
- name=f'Agent: {agent_name} (APR)',
- hovertemplate='Time: %{x}
APR: %{y:.2f}
Agent: ' + agent_name + '',
- visible=is_visible # All agents hidden by default
- )
- )
- logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
-
- # Add data points for adjusted APR if it exists
- if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any():
- x_values_adj = agent_data['timestamp'].tolist()
- y_values_adj = agent_data['adjusted_apr'].tolist()
-
- fig.add_trace(
- go.Scatter(
- x=x_values_adj,
- y=y_values_adj,
- mode='markers', # Only markers for original data
- marker=dict(
- color=color_map[agent_name],
- symbol='diamond', # Different symbol for adjusted APR
- size=10,
- line=dict(width=1, color='black')
- ),
- name=f'Agent: {agent_name} (Adjusted APR)',
- hovertemplate='Time: %{x}
Adjusted APR: %{y:.2f}
Agent: ' + agent_name + '',
- visible=is_visible # All agents hidden by default
- )
- )
- logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
-
- # Add APR moving average as a smooth line
- x_values_ma = avg_apr_data_with_ma['timestamp'].tolist()
- y_values_ma = avg_apr_data_with_ma['moving_avg'].tolist()
-
- # Create hover template for the APR moving average line
- hover_data_apr = []
- for idx, row in avg_apr_data_with_ma.iterrows():
- timestamp = row['timestamp']
- # Format timestamp to show only up to seconds (not milliseconds)
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-
- # Calculate number of active agents in the last 24 hours
- # Use ROI data after April 25th, 2025, and APR data before that date
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
- april_25_2025 = datetime(2025, 4, 25)
-
- if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
- # After April 25th, 2025: Use ROI data
- roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
- (global_roi_df['timestamp'] <= timestamp)]
- active_agents = len(roi_window_data['agent_id'].unique())
- logger.debug(f"Using ROI data for active agent count at {timestamp} (after Apr 25): {active_agents} agents")
- else:
- # Before April 25th, 2025 or if ROI data is not available: Use APR data
- active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
- (apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
- logger.debug(f"Using APR data for active agent count at {timestamp} (before Apr 25): {active_agents} agents")
-
- hover_data_apr.append(
- f"Time: {formatted_timestamp}
Avg APR (3d window): {row['moving_avg']:.2f}
Active agents (24h): {active_agents}"
- )
-
- fig.add_trace(
- go.Scatter(
- x=x_values_ma,
- y=y_values_ma,
- mode='lines', # Only lines for moving average
- line=dict(color='red', width=2), # Thinner line
- name='Average APR (3d window)',
- hovertext=hover_data_apr,
- hoverinfo='text',
- visible=True # Visible by default
- )
- )
- logger.info(f"Added 3-day moving average APR trace with {len(x_values_ma)} points")
-
- # Add adjusted APR moving average line if it exists
- if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
- # Create a copy of the dataframe with forward-filled adjusted_moving_avg values
- # to ensure the line continues even when we have missing data
- filled_avg_apr_data = avg_apr_data_with_ma.copy()
- filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
-
- # Use the filled dataframe for the adjusted APR line
- x_values_adj = filled_avg_apr_data['timestamp'].tolist()
- y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
-
- # Create hover template for the adjusted APR moving average line
- hover_data_adj = []
- for idx, row in filled_avg_apr_data.iterrows():
- timestamp = row['timestamp']
- # Format timestamp to show only up to seconds (not milliseconds)
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-
- # Calculate number of active agents in the last 24 hours
- # Use ROI data after April 25th, 2025, and APR data before that date
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
- april_25_2025 = datetime(2025, 4, 25)
-
- if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
- # After April 25th, 2025: Use ROI data
- roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
- (global_roi_df['timestamp'] <= timestamp)]
- active_agents = len(roi_window_data['agent_id'].unique())
- logger.debug(f"Using ROI data for adjusted APR active agent count at {timestamp} (after Apr 25)")
- else:
- # Before April 25th, 2025 or if ROI data is not available: Use APR data
- active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
- (apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
- logger.debug(f"Using APR data for adjusted APR active agent count at {timestamp} (before Apr 25)")
-
- if pd.notna(row['adjusted_moving_avg']):
- hover_data_adj.append(
- f"Time: {formatted_timestamp}
Avg ETH Adjusted APR (3d window): {row['adjusted_moving_avg']:.2f}
Active agents (24h): {active_agents}"
- )
- else:
- hover_data_adj.append(
- f"Time: {formatted_timestamp}
Avg ETH Adjusted APR (3d window): N/A
Active agents (24h): {active_agents}"
- )
-
- fig.add_trace(
- go.Scatter(
- x=x_values_adj,
- y=y_values_adj_ma,
- mode='lines', # Only lines for moving average
- line=dict(color='green', width=4), # Thicker solid line for adjusted APR
- name='Average ETH Adjusted APR (3d window)',
- hovertext=hover_data_adj,
- hoverinfo='text',
- visible=True # Visible by default
- )
- )
- logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)")
- else:
- logger.warning("No adjusted APR moving average data available to plot")
-
- # Removed cumulative APR as requested
- logger.info("Cumulative APR graph line has been removed as requested")
-
- # Update layout - use simple boolean values everywhere
- # Make chart responsive instead of fixed width
- fig.update_layout(
- title=dict(
- text="Modius Agents",
- font=dict(
- family="Arial, sans-serif",
- size=22,
- color="black",
- weight="bold"
- )
- ),
- xaxis_title=None, # Remove x-axis title to use annotation instead
- yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
- template="plotly_white",
- height=600, # Reduced height for better fit on smaller screens
- # Removed fixed width to enable responsiveness
- autosize=True, # Enable auto-sizing for responsiveness
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1,
- groupclick="toggleitem"
- ),
- margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
- hovermode="closest"
- )
-
- # Add annotations for y-axis regions
- fig.add_annotation(
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
- y=-25, # Middle of the negative region
- xref="paper",
- yref="y",
- text="Percent drawdown [%]",
- showarrow=False,
- font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- textangle=-90, # Rotate text to be vertical
- align="center"
- )
-
- fig.add_annotation(
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
- y=50, # Middle of the positive region
- xref="paper",
- yref="y",
- text="Agent APR [%]",
- showarrow=False,
- font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- textangle=-90, # Rotate text to be vertical
- align="center"
- )
-
- # Remove x-axis title annotation
- # fig.add_annotation(
- # x=0.5, # Center of the x-axis
- # y=-0.15, # Below the x-axis
- # xref="paper",
- # yref="paper",
- # text="Date",
- # showarrow=False,
- # font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- # align="center"
- # )
-
- # Update layout for legend
- fig.update_layout(
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="right",
- x=1,
- groupclick="toggleitem",
- font=dict(
- family="Arial, sans-serif",
- size=14, # Adjusted font size
- color="black",
- weight="bold"
- )
- )
- )
-
- # Update y-axis with fixed range of -50 to +100 for psychological effect
- fig.update_yaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- # Use fixed range instead of autoscaling
- autorange=False, # Disable autoscaling
- range=[-50, 100], # Set fixed range from -50 to +100
- tickformat=".2f", # Format tick labels with 2 decimal places
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove the built-in axis title since we're using annotations
- )
-
- # Update x-axis with better formatting and fixed range
- fig.update_xaxes(
- showgrid=True,
- gridwidth=1,
- gridcolor='rgba(0,0,0,0.1)',
- # Set fixed range with April 17 as start date and ending at the last valid date
- autorange=False, # Disable autoscaling
- range=[x_start_date, last_valid_date], # Set fixed range from April 17 to last valid date
- tickformat="%b %d", # Simplified date format without time
- tickangle=-30, # Angle the labels for better readability
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove built-in title to use annotation instead
- )
-
- # SIMPLIFIED APPROACH: Do a direct plot without markers for comparison
- # This creates a simple, reliable fallback plot if the advanced one fails
- try:
- # Write detailed debug information before saving the figure
- write_debug_info(df, fig)
-
- # Save the figure (still useful for reference)
- graph_file = "modius_apr_combined_graph.html"
- fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
-
- # Also save as image for compatibility
- img_file = "modius_apr_combined_graph.png"
- try:
- fig.write_image(img_file)
- logger.info(f"Combined graph saved to {graph_file} and {img_file}")
- except Exception as e:
- logger.error(f"Error saving image: {e}")
- logger.info(f"Combined graph saved to {graph_file} only")
-
- # Return the figure object for direct use in Gradio
- return fig
- except Exception as e:
- # If the complex graph approach fails, create a simpler one
- logger.error(f"Error creating advanced graph: {e}")
- logger.info("Falling back to Simpler graph")
-
- # Create a simpler graph as fallback
- simple_fig = go.Figure()
-
- # Add zero line
- simple_fig.add_shape(
- type="line",
- line=dict(dash="solid", width=1.5, color="black"),
- y0=0, y1=0,
- x0=min_time, x1=max_time
- )
-
- # Define colors for the fallback graph
- fallback_colors = px.colors.qualitative.Plotly
-
- # Simply plot the average APR data with moving average
- if not avg_apr_data.empty:
- # Sort by timestamp
- avg_apr_data = avg_apr_data.sort_values('timestamp')
-
- # Calculate both moving averages for the fallback graph
- avg_apr_data_with_ma = avg_apr_data.copy()
- avg_apr_data_with_ma['moving_avg'] = None # 2-hour window
- avg_apr_data_with_ma['infinite_avg'] = None # Infinite window
-
- # Define the time window (6 hours)
- time_window = pd.Timedelta(hours=6)
-
- # Calculate the moving averages for each timestamp
- for i, row in avg_apr_data_with_ma.iterrows():
- current_time = row['timestamp']
- window_start = current_time - time_window
-
- # Get all data points within the 2-hour time window
- window_data = apr_data[
- (apr_data['timestamp'] >= window_start) &
- (apr_data['timestamp'] <= current_time)
- ]
-
- # Get all data points up to the current timestamp (infinite window)
- infinite_window_data = apr_data[
- apr_data['timestamp'] <= current_time
- ]
-
- # Calculate the average APR for the 2-hour time window
- if not window_data.empty:
- avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
- else:
- # If no data points in the window, use the current value
- avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
-
- # Calculate the average APR for the infinite window
- if not infinite_window_data.empty:
- avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean()
- else:
- avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr']
-
- # Add data points for each agent, but only make top agents visible by default
- unique_agents = apr_data['agent_name'].unique()
- colors = px.colors.qualitative.Plotly[:len(unique_agents)]
- color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
-
- # Calculate the total number of data points per agent
- agent_counts = apr_data['agent_name'].value_counts()
-
- # Determine how many agents to show individually (limit to top 5 most active)
- MAX_VISIBLE_AGENTS = 5
- top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
-
- for agent_name in unique_agents:
- agent_data = apr_data[apr_data['agent_name'] == agent_name]
-
- # Determine if this agent should be visible by default
- is_visible = agent_name in top_agents
-
- # Add data points as markers
- simple_fig.add_trace(
- go.Scatter(
- x=agent_data['timestamp'],
- y=agent_data['apr'],
- mode='markers',
- name=f'Agent: {agent_name}',
- marker=dict(
- size=10,
- color=color_map[agent_name]
- ),
- hovertemplate='Time: %{x}
APR: %{y:.2f}
Agent: ' + agent_name + '',
- visible=is_visible # Only top agents visible by default
- )
- )
-
- # Add 2-hour moving average as a line
- simple_fig.add_trace(
- go.Scatter(
- x=avg_apr_data_with_ma['timestamp'],
- y=avg_apr_data_with_ma['moving_avg'],
- mode='lines',
- name='Average APR (6h window)',
- line=dict(width=2, color='red') # Thinner line
- )
- )
-
- # Add infinite window moving average as another line
- simple_fig.add_trace(
- go.Scatter(
- x=avg_apr_data_with_ma['timestamp'],
- y=avg_apr_data_with_ma['infinite_avg'],
- mode='lines',
- name='Cumulative Average APR (all data)',
- line=dict(width=4, color='green') # Thicker solid line
- )
- )
-
- # Simplified layout with adjusted y-axis range and increased size
- simple_fig.update_layout(
- title=dict(
- text="Modius Agents",
- font=dict(
- family="Arial, sans-serif",
- size=22,
- color="black",
- weight="bold"
- )
- ),
- xaxis_title=None, # Remove x-axis title to use annotation instead
- yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
- yaxis=dict(
- # No fixed range - let Plotly autoscale
- autorange=True, # Explicitly enable autoscaling
- tickformat=".2f", # Format tick labels with 2 decimal places
- tickfont=dict(size=12) # Larger font for tick labels
- ),
- height=600, # Reduced height for better fit
- # Removed fixed width to enable responsiveness
- autosize=True, # Enable auto-sizing for responsiveness
- template="plotly_white", # Use a cleaner template
- margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title
- )
-
- # Add annotations for y-axis regions in the fallback graph
- simple_fig.add_annotation(
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
- y=-25, # Middle of the negative region
- xref="paper",
- yref="y",
- text="Percent drawdown [%]",
- showarrow=False,
- font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- textangle=-90, # Rotate text to be vertical
- align="center"
- )
-
- simple_fig.add_annotation(
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
- y=50, # Middle of the positive region
- xref="paper",
- yref="y",
- text="Agent APR [%]",
- showarrow=False,
- font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- textangle=-90, # Rotate text to be vertical
- align="center"
- )
-
- # Remove x-axis title annotation
- # simple_fig.add_annotation(
- # x=0.5, # Center of the x-axis
- # y=-0.15, # Below the x-axis
- # xref="paper",
- # yref="paper",
- # text="Date",
- # showarrow=False,
- # font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- # align="center"
- # )
-
- # Update legend font for fallback graph
- simple_fig.update_layout(
- legend=dict(
- font=dict(
- family="Arial, sans-serif",
- size=14, # Adjusted font size
- color="black",
- weight="bold"
- )
- )
- )
-
- # Apply fixed range to the x-axis for the fallback graph
- simple_fig.update_xaxes(
- autorange=False, # Disable autoscaling
- range=[x_start_date, max_time], # Set fixed range from April 17
- tickformat="%b %d", # Simplified date format without time
- tickangle=-30,
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
- title=None # Remove built-in title to use annotation instead
- )
-
- # Update y-axis tick font for fallback graph
- simple_fig.update_yaxes(
- tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
- )
-
- # Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS
- if len(unique_agents) > MAX_VISIBLE_AGENTS:
- simple_fig.add_annotation(
- text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.",
- xref="paper", yref="paper",
- x=0.5, y=1.05,
- showarrow=False,
- font=dict(size=12, color="gray"),
- align="center"
- )
-
- # Return the simple figure
- return simple_fig
-
-def save_to_csv(df):
- """Save the APR data DataFrame to a CSV file and return the file path"""
- if df.empty:
- logger.error("No APR data to save to CSV")
- return None
-
- # Define the CSV file path
- csv_file = "modius_apr_values.csv"
-
- # Save to CSV
- df.to_csv(csv_file, index=False)
- logger.info(f"APR data saved to {csv_file}")
-
- # Also generate a statistics CSV file
- stats_df = generate_statistics_from_data(df)
- stats_csv = "modius_apr_statistics.csv"
- stats_df.to_csv(stats_csv, index=False)
- logger.info(f"Statistics saved to {stats_csv}")
-
- # Log detailed statistics about adjusted APR
- if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any():
- adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()]
- logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}")
-
- for _, row in adjusted_stats.iterrows():
- if row['agent_id'] != 'ALL': # Skip the overall stats row
- logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}")
-
- # Log overall adjusted APR stats
- overall_row = stats_df[stats_df['agent_id'] == 'ALL']
- if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]):
- logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}")
-
- return csv_file
-
-def generate_statistics_from_data(df):
- """Generate statistics from the APR data"""
- if df.empty:
- return pd.DataFrame()
-
- # Get unique agents
- unique_agents = df['agent_id'].unique()
- stats_list = []
-
- # Generate per-agent statistics
- for agent_id in unique_agents:
- agent_data = df[df['agent_id'] == agent_id]
- agent_name = agent_data['agent_name'].iloc[0]
-
- # APR statistics
- apr_data = agent_data[agent_data['metric_type'] == 'APR']
- real_apr = apr_data[apr_data['is_dummy'] == False]
-
- # Performance statistics
- perf_data = agent_data[agent_data['metric_type'] == 'Performance']
- real_perf = perf_data[perf_data['is_dummy'] == False]
-
- # Check if adjusted_apr exists and has non-null values
- has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any()
-
- stats = {
- 'agent_id': agent_id,
- 'agent_name': agent_name,
- 'total_points': len(agent_data),
- 'apr_points': len(apr_data),
- 'performance_points': len(perf_data),
- 'real_apr_points': len(real_apr),
- 'real_performance_points': len(real_perf),
- 'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None,
- 'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None,
- 'max_apr': apr_data['apr'].max() if not apr_data.empty else None,
- 'min_apr': apr_data['apr'].min() if not apr_data.empty else None,
- 'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None,
- 'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None,
- 'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None,
- 'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None
- }
- stats_list.append(stats)
-
- # Generate overall statistics
- apr_only = df[df['metric_type'] == 'APR']
- perf_only = df[df['metric_type'] == 'Performance']
-
- # Check if adjusted_apr exists and has non-null values for overall stats
- has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any()
-
- overall_stats = {
- 'agent_id': 'ALL',
- 'agent_name': 'All Agents',
- 'total_points': len(df),
- 'apr_points': len(apr_only),
- 'performance_points': len(perf_only),
- 'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]),
- 'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]),
- 'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None,
- 'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None,
- 'max_apr': apr_only['apr'].max() if not apr_only.empty else None,
- 'min_apr': apr_only['apr'].min() if not apr_only.empty else None,
- 'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None,
- 'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None,
- 'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None,
- 'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None
- }
- stats_list.append(overall_stats)
-
- return pd.DataFrame(stats_list)
-
-# Create dummy functions for the commented out imports
-def create_transcation_visualizations():
- """Dummy implementation that returns a placeholder graph"""
- fig = go.Figure()
- fig.add_annotation(
- text="Blockchain data loading disabled - placeholder visualization",
- x=0.5, y=0.5, xref="paper", yref="paper",
- showarrow=False, font=dict(size=20)
- )
- return fig
-
-def create_active_agents_visualizations():
- """Dummy implementation that returns a placeholder graph"""
- fig = go.Figure()
- fig.add_annotation(
- text="Blockchain data loading disabled - placeholder visualization",
- x=0.5, y=0.5, xref="paper", yref="paper",
- showarrow=False, font=dict(size=20)
- )
- return fig
-
-# Comment out the blockchain connection code
"""
-# Load environment variables from .env file
-# RPC URLs
-OPTIMISM_RPC_URL = os.getenv('OPTIMISM_RPC_URL')
-MODE_RPC_URL = os.getenv('MODE_RPC_URL')
-
-# Initialize Web3 instances
-web3_instances = {
- 'optimism': Web3(Web3.HTTPProvider(OPTIMISM_RPC_URL)),
- 'mode': Web3(Web3.HTTPProvider(MODE_RPC_URL))
-}
-
-# Contract addresses for service registries
-contract_addresses = {
- 'optimism': '0x3d77596beb0f130a4415df3D2D8232B3d3D31e44',
- 'mode': '0x3C1fF68f5aa342D296d4DEe4Bb1cACCA912D95fE'
-}
-
-# Load the ABI from the provided JSON file
-with open('./contracts/service_registry_abi.json', 'r') as abi_file:
- contract_abi = json.load(abi_file)
+Optimized Modius Agent Performance Dashboard - Main Application Entry Point
-# Create the contract instances
-service_registries = {
- chain_name: web3.eth.contract(address=contract_addresses[chain_name], abi=contract_abi)
- for chain_name, web3 in web3_instances.items()
-}
-
-# Check if connections are successful
-for chain_name, web3_instance in web3_instances.items():
- if not web3_instance.is_connected():
- raise Exception(f"Failed to connect to the {chain_name.capitalize()} network.")
- else:
- print(f"Successfully connected to the {chain_name.capitalize()} network.")
+This is the refactored version of the original app.py with improved modularity,
+maintainability, and performance.
"""
+import os
+import sys
-# Dummy blockchain functions to replace the commented ones
-def get_transfers(integrator: str, wallet: str) -> str:
- """Dummy function that returns an empty result"""
- return {"transfers": []}
-
-def fetch_and_aggregate_transactions():
- """Dummy function that returns empty data"""
- return [], {}
-
-# Function to parse the transaction data and prepare it for visualization
-def process_transactions_and_agents(data):
- """Dummy function that returns empty dataframes"""
- df_transactions = pd.DataFrame()
- df_agents = pd.DataFrame(columns=['date', 'agent_count'])
- df_agents_weekly = pd.DataFrame()
- return df_transactions, df_agents, df_agents_weekly
-
-# Function to create visualizations based on the metrics
-def create_visualizations():
- """
- # Commenting out the original visualization code temporarily for debugging
- transactions_data = fetch_and_aggregate_transactions()
- df_transactions, df_agents, df_agents_weekly = process_transactions_and_agents(transactions_data)
-
- # Fetch daily value locked data
- df_tvl = pd.read_csv('daily_value_locked.csv')
-
- # Calculate total value locked per chain per day
- df_tvl["total_value_locked_usd"] = df_tvl["amount0_usd"] + df_tvl["amount1_usd"]
- df_tvl_daily = df_tvl.groupby(["date", "chain_name"])["total_value_locked_usd"].sum().reset_index()
- df_tvl_daily['date'] = pd.to_datetime(df_tvl_daily['date'])
-
- # Filter out dates with zero total value locked
- df_tvl_daily = df_tvl_daily[df_tvl_daily["total_value_locked_usd"] > 0]
-
- chain_name_map = {
- "mode": "Mode",
- "base": "Base",
- "ethereum": "Ethereum",
- "optimism": "Optimism"
- }
- df_tvl_daily["chain_name"] = df_tvl_daily["chain_name"].map(chain_name_map)
-
- # Plot total value locked
- fig_tvl = px.bar(
- df_tvl_daily,
- x="date",
- y="total_value_locked_usd",
- color="chain_name",
- opacity=0.7,
- title="Total Volume Invested in Pools in Different Chains Daily",
- labels={"date": "Date","chain_name": "Transaction Chain", "total_value_locked_usd": "Total Volume Invested (USD)"},
- barmode='stack',
- color_discrete_map={
- "Mode": "orange",
- "Base": "purple",
- "Ethereum": "darkgreen",
- "Optimism": "blue"
- }
- )
- fig_tvl.update_layout(
- xaxis_title="Date",
-
- yaxis=dict(tickmode='linear', tick0=0, dtick=4),
- xaxis=dict(
- tickmode='array',
- tickvals=df_tvl_daily['date'],
- ticktext=df_tvl_daily['date'].dt.strftime('%b %d'),
- tickangle=-45,
- ),
- bargap=0.6, # Increase gap between bar groups (0-1)
- bargroupgap=0.1, # Decrease gap between bars in a group (0-1)
- height=600,
- width=1200, # Specify width to prevent bars from being too wide
- showlegend=True,
- template='plotly_white'
- )
- fig_tvl.update_xaxes(tickformat="%b %d")
-
- chain_name_map = {
- 10: "Optimism",
- 8453: "Base",
- 1: "Ethereum",
- 34443: "Mode"
- }
-
- df_transactions["sending_chain"] = df_transactions["sending_chain"].map(chain_name_map)
- df_transactions["receiving_chain"] = df_transactions["receiving_chain"].map(chain_name_map)
-
- df_transactions["sending_chain"] = df_transactions["sending_chain"].astype(str)
- df_transactions["receiving_chain"] = df_transactions["receiving_chain"].astype(str)
- df_transactions['date'] = pd.to_datetime(df_transactions['date'])
- df_transactions["is_swap"] = df_transactions.apply(lambda x: x["sending_chain"] == x["receiving_chain"], axis=1)
-
- swaps_per_chain = df_transactions[df_transactions["is_swap"]].groupby(["date", "sending_chain"]).size().reset_index(name="swap_count")
- fig_swaps_chain = px.bar(
- swaps_per_chain,
- x="date",
- y="swap_count",
- color="sending_chain",
- title="Chain Daily Activity: Swaps",
- labels={"sending_chain": "Transaction Chain", "swap_count": "Daily Swap Nr"},
- barmode="stack",
- opacity=0.7,
- color_discrete_map={
- "Optimism": "blue",
- "Ethereum": "darkgreen",
- "Base": "purple",
- "Mode": "orange"
- }
- )
- fig_swaps_chain.update_layout(
- xaxis_title="Date",
- yaxis_title="Daily Swap Count",
- yaxis=dict(tickmode='linear', tick0=0, dtick=1),
- xaxis=dict(
- tickmode='array',
- tickvals=[d for d in swaps_per_chain['date']],
- ticktext=[d.strftime('%m-%d') for d in swaps_per_chain['date']],
- tickangle=-45,
- ),
- bargap=0.6,
- bargroupgap=0.1,
- height=600,
- width=1200,
- margin=dict(l=50, r=50, t=50, b=50),
- showlegend=True,
- legend=dict(
- yanchor="top",
- y=0.99,
- xanchor="right",
- x=0.99
- ),
- template='plotly_white'
- )
- fig_swaps_chain.update_xaxes(tickformat="%m-%d")
-
- df_transactions["is_bridge"] = df_transactions.apply(lambda x: x["sending_chain"] != x["receiving_chain"], axis=1)
-
- bridges_per_chain = df_transactions[df_transactions["is_bridge"]].groupby(["date", "sending_chain"]).size().reset_index(name="bridge_count")
- fig_bridges_chain = px.bar(
- bridges_per_chain,
- x="date",
- y="bridge_count",
- color="sending_chain",
- title="Chain Daily Activity: Bridges",
- labels={"sending_chain": "Transaction Chain", "bridge_count": "Daily Bridge Nr"},
- barmode="stack",
- opacity=0.7,
- color_discrete_map={
- "Optimism": "blue",
- "Ethereum": "darkgreen",
- "Base": "purple",
- "Mode": "orange"
- }
- )
- fig_bridges_chain.update_layout(
- xaxis_title="Date",
- yaxis_title="Daily Bridge Count",
- yaxis=dict(tickmode='linear', tick0=0, dtick=1),
- xaxis=dict(
- tickmode='array',
- tickvals=[d for d in bridges_per_chain['date']],
- ticktext=[d.strftime('%m-%d') for d in bridges_per_chain['date']],
- tickangle=-45,
- ),
- bargap=0.6,
- bargroupgap=0.1,
- height=600,
- width=1200,
- margin=dict(l=50, r=50, t=50, b=50),
- showlegend=True,
- legend=dict(
- yanchor="top",
- y=0.99,
- xanchor="right",
- x=0.99
- ),
- template='plotly_white'
- )
- fig_bridges_chain.update_xaxes(tickformat="%m-%d")
- df_agents['date'] = pd.to_datetime(df_agents['date'])
+# Add the modius_performance package to the path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
- daily_agents_df = df_agents.groupby('date').agg({'agent_count': 'sum'}).reset_index()
- daily_agents_df.rename(columns={'agent_count': 'daily_agent_count'}, inplace=True)
- # Sort by date to ensure proper running total calculation
- daily_agents_df = daily_agents_df.sort_values('date')
-
- # Create week column
- daily_agents_df['week'] = daily_agents_df['date'].dt.to_period('W').apply(lambda r: r.start_time)
-
- # Calculate running total within each week
- daily_agents_df['running_weekly_total'] = daily_agents_df.groupby('week')['daily_agent_count'].cumsum()
-
- # Create final merged dataframe
- weekly_merged_df = daily_agents_df.copy()
- adjustment_date = pd.to_datetime('2024-11-15')
- weekly_merged_df.loc[weekly_merged_df['date'] == adjustment_date, 'daily_agent_count'] -= 1
- weekly_merged_df.loc[weekly_merged_df['date'] == adjustment_date, 'running_weekly_total'] -= 1
- fig_agents_registered = go.Figure(data=[
- go.Bar(
- name='Daily nr of Registered Agents',
- x=weekly_merged_df['date'].dt.strftime("%b %d"),
- y=weekly_merged_df['daily_agent_count'],
- opacity=0.7,
- marker_color='blue'
- ),
- go.Bar(
- name='Weekly Nr of Registered Agents',
- x=weekly_merged_df['date'].dt.strftime("%b %d"),
- y=weekly_merged_df['running_weekly_total'],
- opacity=0.7,
- marker_color='purple'
- )
- ])
+from modius_performance.utils.logging_config import setup_logging
+from modius_performance.ui.dashboard import create_dashboard
- fig_agents_registered.update_layout(
- xaxis_title='Date',
- yaxis_title='Number of Agents',
- title="Nr of Agents Registered",
- barmode='group',
- yaxis=dict(tickmode='linear', tick0=0, dtick=1),
- xaxis=dict(
- categoryorder='array',
- categoryarray=weekly_merged_df['date'].dt.strftime("%b %d"),
- tickangle=-45
- ),
- bargap=0.3,
- height=600,
- width=1200,
- showlegend=True,
- legend=dict(
- yanchor="top",
- xanchor="right",
- ),
- template='plotly_white',
- )
- return fig_swaps_chain, fig_bridges_chain, fig_agents_registered,fig_tvl
- """
- # Placeholder figures for testing
- fig_swaps_chain = go.Figure()
- fig_swaps_chain.add_annotation(
- text="Blockchain data loading disabled - placeholder visualization",
- x=0.5, y=0.5, xref="paper", yref="paper",
- showarrow=False, font=dict(size=20)
- )
-
- fig_bridges_chain = go.Figure()
- fig_bridges_chain.add_annotation(
- text="Blockchain data loading disabled - placeholder visualization",
- x=0.5, y=0.5, xref="paper", yref="paper",
- showarrow=False, font=dict(size=20)
- )
+def main():
+ """Main application entry point."""
+ # Set up logging
+ setup_logging()
- fig_agents_registered = go.Figure()
- fig_agents_registered.add_annotation(
- text="Blockchain data loading disabled - placeholder visualization",
- x=0.5, y=0.5, xref="paper", yref="paper",
- showarrow=False, font=dict(size=20)
- )
+ # Create and launch the dashboard
+ dashboard = create_dashboard()
- fig_tvl = go.Figure()
- fig_tvl.add_annotation(
- text="Blockchain data loading disabled - placeholder visualization",
- x=0.5, y=0.5, xref="paper", yref="paper",
- showarrow=False, font=dict(size=20)
+ # Launch the application with automatic port selection
+ dashboard.launch(
+ server_name="0.0.0.0",
+ server_port=None, # Let Gradio find an available port
+ share=False,
+ debug=False,
+ show_error=True
)
-
- return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl
-# Modify dashboard function to make the plot container responsive
-def dashboard():
- with gr.Blocks() as demo:
- gr.Markdown("# Average Modius Agent Performance")
-
- # Create tabs for APR, ROI, Volume, and APR vs Agent Hash metrics
- with gr.Tabs():
- # APR Metrics tab
- with gr.Tab("APR Metrics"):
- with gr.Column():
- refresh_apr_btn = gr.Button("Refresh APR Data")
-
- # Create container for plotly figure with responsive sizing
- with gr.Column():
- combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot")
-
- # Create compact toggle controls at the bottom of the graph
- with gr.Row(visible=True):
- gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title")
-
- with gr.Row():
- with gr.Column():
- with gr.Row(elem_id="apr_toggle_container"):
- with gr.Column(scale=1, min_width=150):
- apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle")
-
- with gr.Column(scale=1, min_width=150):
- adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle")
-
- # Add a text area for status messages
- apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
-
- # ROI Metrics tab
- with gr.Tab("ROI Metrics"):
- with gr.Column():
- refresh_roi_btn = gr.Button("Refresh ROI Data")
-
- # Create container for plotly figure with responsive sizing
- with gr.Column():
- combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot")
-
- # Create compact toggle controls at the bottom of the graph
- with gr.Row(visible=True):
- gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title")
-
- with gr.Row():
- with gr.Column():
- with gr.Row(elem_id="roi_toggle_container"):
- with gr.Column(scale=1, min_width=150):
- roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle")
-
- # Add a text area for status messages
- roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
-
- # Volume Metrics tab
- with gr.Tab("Volume Metrics"):
- with gr.Column():
- refresh_volume_btn = gr.Button("Refresh Volume Data")
-
- # Create container for plotly figure with responsive sizing
- with gr.Column():
- combined_volume_graph = gr.Plot(label="Volume for All Agents", elem_id="responsive_volume_plot")
-
- # Create compact toggle controls at the bottom of the graph
- with gr.Row(visible=True):
- gr.Markdown("##### Toggle Graph Lines", elem_id="volume_toggle_title")
-
- with gr.Row():
- with gr.Column():
- with gr.Row(elem_id="volume_toggle_container"):
- with gr.Column(scale=1, min_width=150):
- volume_toggle = gr.Checkbox(label="Volume Average", value=True, elem_id="volume_toggle")
-
- # Add a text area for status messages
- volume_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
-
- # Performance Graph tab
- with gr.Tab("Performance Graph"):
- with gr.Column():
- refresh_apr_hash_btn = gr.Button("Refresh APR vs Agent Hash Data")
-
- # Create container for plotly figure with responsive sizing
- with gr.Column():
- apr_vs_agent_hash_graph = gr.Plot(label="APR vs Agent Hash", elem_id="responsive_apr_hash_plot")
-
- # Add a text area for status messages
- apr_hash_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
-
- # Add custom CSS for making the plots responsive
- gr.HTML("""
-
- """)
-
- # Function to update the APR graph
- def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True):
- # Generate visualization and get figure object directly
- try:
- combined_fig, _ = generate_apr_visualizations()
-
- # Update visibility of traces based on toggle values
- for i, trace in enumerate(combined_fig.data):
- # Check if this is a moving average trace
- if trace.name == 'Average APR (3d window)':
- trace.visible = show_apr_ma
- elif trace.name == 'Average ETH Adjusted APR (3d window)':
- trace.visible = show_adjusted_apr_ma
-
- return combined_fig
- except Exception as e:
- logger.exception("Error generating APR visualization")
- # Create error figure
- error_fig = go.Figure()
- error_fig.add_annotation(
- text=f"Error: {str(e)}",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15, color="red")
- )
- return error_fig
-
- # Function to update the ROI graph
- def update_roi_graph(show_roi_ma=True):
- # Generate visualization and get figure object directly
- try:
- combined_fig, _ = generate_roi_visualizations()
-
- # Update visibility of traces based on toggle values
- for i, trace in enumerate(combined_fig.data):
- # Check if this is a moving average trace
- if trace.name == 'Average ROI (3d window)':
- trace.visible = show_roi_ma
-
- return combined_fig
- except Exception as e:
- logger.exception("Error generating ROI visualization")
- # Create error figure
- error_fig = go.Figure()
- error_fig.add_annotation(
- text=f"Error: {str(e)}",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15, color="red")
- )
- return error_fig
-
- # Function to update the Volume graph
- def update_volume_graph(show_volume_ma=True):
- # Generate visualization and get figure object directly
- try:
- combined_fig, _ = generate_volume_visualizations()
-
- # Update visibility of traces based on toggle values
- for i, trace in enumerate(combined_fig.data):
- # Check if this is a moving average trace
- if trace.name == 'Average Volume (3d window)':
- trace.visible = show_volume_ma
-
- return combined_fig
- except Exception as e:
- logger.exception("Error generating Volume visualization")
- # Create error figure
- error_fig = go.Figure()
- error_fig.add_annotation(
- text=f"Error: {str(e)}",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15, color="red")
- )
- return error_fig
-
- # Initialize the APR graph on load with a placeholder
- apr_placeholder_fig = go.Figure()
- apr_placeholder_fig.add_annotation(
- text="Click 'Refresh APR Data' to load APR graph",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15)
- )
- combined_apr_graph.value = apr_placeholder_fig
-
- # Initialize the ROI graph on load with a placeholder
- roi_placeholder_fig = go.Figure()
- roi_placeholder_fig.add_annotation(
- text="Click 'Refresh ROI Data' to load ROI graph",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15)
- )
- combined_roi_graph.value = roi_placeholder_fig
-
- # Initialize the Volume graph on load with a placeholder
- volume_placeholder_fig = go.Figure()
- volume_placeholder_fig.add_annotation(
- text="Click 'Refresh Volume Data' to load Volume graph",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15)
- )
- combined_volume_graph.value = volume_placeholder_fig
-
- # Initialize the APR vs Agent Hash graph on load with a placeholder
- apr_hash_placeholder_fig = go.Figure()
- apr_hash_placeholder_fig.add_annotation(
- text="Click 'Refresh APR vs Agent Hash Data' to load APR vs Agent Hash graph",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15)
- )
- apr_vs_agent_hash_graph.value = apr_hash_placeholder_fig
-
- # Function to update the APR graph based on toggle states
- def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible):
- return update_apr_graph(apr_visible, adjusted_apr_visible)
-
- # Function to update the ROI graph based on toggle states
- def update_roi_graph_with_toggles(roi_visible):
- return update_roi_graph(roi_visible)
-
- # Function to refresh APR data
- def refresh_apr_data():
- """Refresh APR data from the database and update the visualization"""
- try:
- # Fetch new APR data
- logger.info("Manually refreshing APR data...")
- fetch_apr_data_from_db()
-
- # Verify data was fetched successfully
- if global_df is None or len(global_df) == 0:
- logger.error("Failed to fetch APR data")
- return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
-
- # Log info about fetched data with focus on adjusted_apr
- may_10_2025 = datetime(2025, 5, 10)
- if 'timestamp' in global_df and 'adjusted_apr' in global_df:
- after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
- with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
-
- logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
- logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
-
- # Generate new visualization
- logger.info("Generating new APR visualization...")
- new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
- return new_graph, "APR data refreshed successfully"
- except Exception as e:
- logger.error(f"Error refreshing APR data: {e}")
- return combined_apr_graph.value, f"Error: {str(e)}"
-
- # Function to refresh ROI data
- def refresh_roi_data():
- """Refresh ROI data from the database and update the visualization"""
- try:
- # Fetch new ROI data
- logger.info("Manually refreshing ROI data...")
- fetch_apr_data_from_db() # This also fetches ROI data
-
- # Verify data was fetched successfully
- if global_roi_df is None or len(global_roi_df) == 0:
- logger.error("Failed to fetch ROI data")
- return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details."
-
- # Generate new visualization
- logger.info("Generating new ROI visualization...")
- new_graph = update_roi_graph(roi_toggle.value)
- return new_graph, "ROI data refreshed successfully"
- except Exception as e:
- logger.error(f"Error refreshing ROI data: {e}")
- return combined_roi_graph.value, f"Error: {str(e)}"
-
- # Set up the button click event for APR refresh
- refresh_apr_btn.click(
- fn=refresh_apr_data,
- inputs=[],
- outputs=[combined_apr_graph, apr_status_text]
- )
-
- # Set up the button click event for ROI refresh
- refresh_roi_btn.click(
- fn=refresh_roi_data,
- inputs=[],
- outputs=[combined_roi_graph, roi_status_text]
- )
-
- # Set up the toggle switch events for APR
- apr_toggle.change(
- fn=update_apr_graph_with_toggles,
- inputs=[apr_toggle, adjusted_apr_toggle],
- outputs=[combined_apr_graph]
- )
-
- adjusted_apr_toggle.change(
- fn=update_apr_graph_with_toggles,
- inputs=[apr_toggle, adjusted_apr_toggle],
- outputs=[combined_apr_graph]
- )
-
- # Set up the toggle switch events for ROI
- roi_toggle.change(
- fn=update_roi_graph_with_toggles,
- inputs=[roi_toggle],
- outputs=[combined_roi_graph]
- )
-
- # Function to refresh volume data
- def refresh_volume_data():
- """Refresh volume data from the database and update the visualization"""
- try:
- # Fetch new volume data
- logger.info("Manually refreshing volume data...")
- fetch_apr_data_from_db() # This also fetches volume data
-
- # Verify data was fetched successfully
- if global_df is None or len(global_df) == 0:
- logger.error("Failed to fetch volume data")
- return combined_volume_graph.value, "Error: Failed to fetch volume data. Check the logs for details."
-
- # Generate new visualization
- logger.info("Generating new volume visualization...")
- new_graph = update_volume_graph(volume_toggle.value)
- return new_graph, "Volume data refreshed successfully"
- except Exception as e:
- logger.error(f"Error refreshing volume data: {e}")
- return combined_volume_graph.value, f"Error: {str(e)}"
-
- # Set up the button click event for volume refresh
- refresh_volume_btn.click(
- fn=refresh_volume_data,
- inputs=[],
- outputs=[combined_volume_graph, volume_status_text]
- )
-
- # Set up the toggle switch events for volume
- volume_toggle.change(
- fn=update_volume_graph,
- inputs=[volume_toggle],
- outputs=[combined_volume_graph]
- )
-
- # Function to update the APR vs Agent Hash graph
- def update_apr_vs_agent_hash_graph():
- """Update the APR vs Agent Hash graph"""
- try:
- # Generate visualization and get figure object directly
- fig, _ = generate_apr_vs_agent_hash_visualizations(global_df)
- return fig
- except Exception as e:
- logger.exception("Error generating APR vs Agent Hash visualization")
- # Create error figure
- error_fig = go.Figure()
- error_fig.add_annotation(
- text=f"Error: {str(e)}",
- x=0.5, y=0.5,
- showarrow=False,
- font=dict(size=15, color="red")
- )
- return error_fig
-
- # Function to refresh APR vs Agent Hash data
- def refresh_apr_vs_agent_hash_data():
- """Refresh APR vs Agent Hash data from the database and update the visualization"""
- try:
- # Fetch new APR data if not already fetched
- logger.info("Manually refreshing APR vs Agent Hash data...")
- if global_df is None or global_df.empty:
- fetch_apr_data_from_db()
-
- # Verify data was fetched successfully
- if global_df is None or len(global_df) == 0:
- logger.error("Failed to fetch APR data for APR vs Agent Hash visualization")
- return apr_vs_agent_hash_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
-
- # Check if agent_hash column exists
- if 'agent_hash' not in global_df.columns:
- logger.error("agent_hash column not found in DataFrame")
- return apr_vs_agent_hash_graph.value, "Error: agent_hash column not found in data. Check the logs for details."
-
- # Generate new visualization
- logger.info("Generating new APR vs Agent Hash visualization...")
- new_graph = update_apr_vs_agent_hash_graph()
- return new_graph, "APR vs Agent Hash data refreshed successfully"
- except Exception as e:
- logger.error(f"Error refreshing APR vs Agent Hash data: {e}")
- return apr_vs_agent_hash_graph.value, f"Error: {str(e)}"
-
- # Set up the button click event for APR vs Agent Hash refresh
- refresh_apr_hash_btn.click(
- fn=refresh_apr_vs_agent_hash_data,
- inputs=[],
- outputs=[apr_vs_agent_hash_graph, apr_hash_status_text]
- )
-
- return demo
-# Launch the dashboard
if __name__ == "__main__":
- dashboard().launch()
-
-def generate_adjusted_apr_report():
- """
- Generate a detailed report about adjusted_apr data availability and save it to a file.
- Returns the path to the generated report file.
- """
- global global_df
-
- if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
- logger.warning("No adjusted_apr data available for report generation")
- return None
-
- # Create a report file
- report_path = "adjusted_apr_report.txt"
-
- with open(report_path, "w") as f:
- f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
-
- # Summary statistics
- total_records = len(global_df)
- records_with_adjusted = global_df['adjusted_apr'].notna().sum()
- pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
-
- f.write(f"Total APR records: {total_records}\n")
- f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
-
- # First and last data points
- if records_with_adjusted > 0:
- has_adjusted = global_df[global_df['adjusted_apr'].notna()]
- first_date = has_adjusted['timestamp'].min()
- last_date = has_adjusted['timestamp'].max()
- f.write(f"First adjusted_apr record: {first_date}\n")
- f.write(f"Last adjusted_apr record: {last_date}\n")
- f.write(f"Date range: {(last_date - first_date).days} days\n\n")
-
- # Agent statistics
- f.write("===== AGENT STATISTICS =====\n\n")
-
- # Group by agent
- agent_stats = []
-
- for agent_id in global_df['agent_id'].unique():
- agent_data = global_df[global_df['agent_id'] == agent_id]
- agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
-
- total_agent_records = len(agent_data)
- agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
- coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
-
- agent_stats.append({
- 'agent_id': agent_id,
- 'agent_name': agent_name,
- 'total_records': total_agent_records,
- 'with_adjusted': agent_with_adjusted,
- 'coverage_pct': coverage_pct
- })
-
- # Sort by coverage percentage (descending)
- agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
-
- # Write agent statistics
- for agent in agent_stats:
- f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
- f.write(f" Records: {agent['total_records']}\n")
- f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
-
- # If agent has adjusted data, show date range
- agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
- agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
-
- if not agent_adjusted.empty:
- first = agent_adjusted['timestamp'].min()
- last = agent_adjusted['timestamp'].max()
- f.write(f" First adjusted_apr: {first}\n")
- f.write(f" Last adjusted_apr: {last}\n")
-
- f.write("\n")
-
- # Check for May 10th cutoff issue
- f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
- may_10_2025 = datetime(2025, 5, 10)
-
- before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
- after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
-
- # Calculate coverage before and after
- before_total = len(before_cutoff)
- before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
- before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
-
- after_total = len(after_cutoff)
- after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
- after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
-
- f.write(f"Before May 10th, 2025:\n")
- f.write(f" Records: {before_total}\n")
- f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
-
- f.write(f"After May 10th, 2025:\n")
- f.write(f" Records: {after_total}\n")
- f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
-
- # Check for agents that had data before but not after
- if before_total > 0 and after_total > 0:
- agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
- agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
-
- missing_after = agents_before - agents_after
- new_after = agents_after - agents_before
-
- if missing_after:
- f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
-
- # For each missing agent, show the last date with adjusted_apr
- for agent_id in missing_after:
- agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) &
- (before_cutoff['adjusted_apr'].notna())]
- if not agent_data.empty:
- last_date = agent_data['timestamp'].max()
- agent_name = agent_data['agent_name'].iloc[0]
- f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
-
- if new_after:
- f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
-
- logger.info(f"Adjusted APR report generated: {report_path}")
- return report_path
+ main()