gauravlochab
feat: enhance hover data to include formatted timestamps and active agent counts for the last 24 hours
fdcd7fd
raw
history blame
126 kB
import requests
import pandas as pd
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import json
# Commenting out blockchain-related imports that cause loading issues
# from web3 import Web3
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import random
import logging
from typing import List, Dict, Any, Optional
# Comment out the import for now and replace with dummy functions
# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
# APR visualization functions integrated directly
# Set up logging with appropriate verbosity
logging.basicConfig(
level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("app_debug.log"), # Log to file for persistence
logging.StreamHandler() # Also log to console
]
)
logger = logging.getLogger(__name__)
# Reduce third-party library logging
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
# Log the startup information
logger.info("============= APPLICATION STARTING =============")
logger.info(f"Running from directory: {os.getcwd()}")
# Global variables to store the data for reuse
global_df = None
global_roi_df = None
# Configuration
API_BASE_URL = "https://afmdb.autonolas.tech"
logger.info(f"Using API endpoint: {API_BASE_URL}")
def get_agent_type_by_name(type_name: str) -> Dict[str, Any]:
"""Get agent type by name"""
url = f"{API_BASE_URL}/api/agent-types/name/{type_name}"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"Agent type '{type_name}' not found")
return None
response.raise_for_status()
result = response.json()
logger.debug(f"Agent type response: {result}")
return result
except Exception as e:
logger.error(f"Error in get_agent_type_by_name: {e}")
return None
def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]:
"""Get attribute definition by name"""
url = f"{API_BASE_URL}/api/attributes/name/{attr_name}"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"Attribute definition '{attr_name}' not found")
return None
response.raise_for_status()
result = response.json()
logger.debug(f"Attribute definition response: {result}")
return result
except Exception as e:
logger.error(f"Error in get_attribute_definition_by_name: {e}")
return None
def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]:
"""Get all agents of a specific type"""
url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"No agents found for type ID {type_id}")
return []
response.raise_for_status()
result = response.json()
logger.debug(f"Agents count: {len(result)}")
logger.debug(f"First few agents: {result[:2] if result else []}")
return result
except Exception as e:
logger.error(f"Error in get_agents_by_type: {e}")
return []
def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]:
"""Get all attribute values for a specific attribute definition across all agents of a given list"""
all_attributes = []
logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}")
# For each agent, get their attributes and filter for the one we want
for agent in agents:
agent_id = agent["agent_id"]
# Call the /api/agents/{agent_id}/attributes/ endpoint
url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/"
logger.debug(f"Calling API for agent {agent_id}: {url}")
try:
response = requests.get(url, params={"limit": 1000})
if response.status_code == 404:
logger.error(f"No attributes found for agent ID {agent_id}")
continue
response.raise_for_status()
agent_attrs = response.json()
logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes")
# Filter for the specific attribute definition ID
filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id]
logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes")
if filtered_attrs:
logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}")
all_attributes.extend(filtered_attrs)
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}")
logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}")
return all_attributes
def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str:
"""Get agent name from agent ID"""
for agent in agents:
if agent["agent_id"] == agent_id:
return agent["agent_name"]
return "Unknown"
def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]:
"""Extract APR value, adjusted APR value, ROI value, and timestamp from JSON value"""
try:
agent_id = attr.get("agent_id", "unknown")
logger.debug(f"Extracting APR value for agent {agent_id}")
# The APR value is stored in the json_value field
if attr["json_value"] is None:
logger.debug(f"Agent {agent_id}: json_value is None")
return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False}
# If json_value is a string, parse it
if isinstance(attr["json_value"], str):
logger.debug(f"Agent {agent_id}: json_value is string, parsing")
json_data = json.loads(attr["json_value"])
else:
json_data = attr["json_value"]
apr = json_data.get("apr")
adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present
timestamp = json_data.get("timestamp")
# Extract ROI (f_i_ratio) from calculation_metrics if it exists
roi = None
if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None:
roi = json_data["calculation_metrics"].get("f_i_ratio")
logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, timestamp: {timestamp}")
# Convert timestamp to datetime if it exists
timestamp_dt = None
if timestamp:
timestamp_dt = datetime.fromtimestamp(timestamp)
result = {
"apr": apr,
"adjusted_apr": adjusted_apr,
"roi": roi,
"timestamp": timestamp_dt,
"agent_id": agent_id,
"is_dummy": False
}
logger.debug(f"Agent {agent_id}: Extracted result: {result}")
return result
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}")
logger.error(f"Problematic json_value: {attr.get('json_value')}")
return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False}
def fetch_apr_data_from_db():
"""
Fetch APR data from database using the API.
"""
global global_df
global global_roi_df
logger.info("==== Starting APR data fetch ====")
try:
# Step 1: Find the Modius agent type
logger.info("Finding Modius agent type")
modius_type = get_agent_type_by_name("Modius")
if not modius_type:
logger.error("Modius agent type not found, using placeholder data")
global_df = pd.DataFrame([])
return global_df
type_id = modius_type["type_id"]
logger.info(f"Found Modius agent type with ID: {type_id}")
# Step 2: Find the APR attribute definition
logger.info("Finding APR attribute definition")
apr_attr_def = get_attribute_definition_by_name("APR")
if not apr_attr_def:
logger.error("APR attribute definition not found, using placeholder data")
global_df = pd.DataFrame([])
return global_df
attr_def_id = apr_attr_def["attr_def_id"]
logger.info(f"Found APR attribute definition with ID: {attr_def_id}")
# Step 3: Get all agents of type Modius
logger.info(f"Getting all agents of type Modius (type_id: {type_id})")
modius_agents = get_agents_by_type(type_id)
if not modius_agents:
logger.error("No agents of type 'Modius' found")
global_df = pd.DataFrame([])
return global_df
logger.info(f"Found {len(modius_agents)} Modius agents")
logger.debug(f"Modius agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in modius_agents]}")
# Step 4: Fetch all APR values for Modius agents
logger.info(f"Fetching APR values for all Modius agents (attr_def_id: {attr_def_id})")
apr_attributes = get_attribute_values_by_type_and_attr(modius_agents, attr_def_id)
if not apr_attributes:
logger.error("No APR values found for 'Modius' agents")
global_df = pd.DataFrame([])
return global_df
logger.info(f"Found {len(apr_attributes)} APR attributes total")
# Step 5: Extract APR and ROI data
logger.info("Extracting APR and ROI data from attributes")
apr_data_list = []
roi_data_list = []
for attr in apr_attributes:
data = extract_apr_value(attr)
if data["timestamp"] is not None:
# Get agent name
agent_name = get_agent_name(attr["agent_id"], modius_agents)
# Add agent name to the data
data["agent_name"] = agent_name
# Add is_dummy flag (all real data)
data["is_dummy"] = False
# Process APR data
if data["apr"] is not None:
# Include all APR values (including negative ones) EXCEPT zero and -100
if data["apr"] != 0 and data["apr"] != -100:
apr_entry = data.copy()
apr_entry["metric_type"] = "APR"
logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}")
# Add to the APR data list
apr_data_list.append(apr_entry)
else:
# Log that we're skipping zero or -100 values
logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)")
# Process ROI data
if data["roi"] is not None:
# Include all ROI values except extreme outliers
if data["roi"] > -10 and data["roi"] < 10: # Filter extreme outliers
roi_entry = {
"roi": data["roi"],
"timestamp": data["timestamp"],
"agent_id": data["agent_id"],
"agent_name": agent_name,
"is_dummy": False,
"metric_type": "ROI"
}
logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}")
# Add to the ROI data list
roi_data_list.append(roi_entry)
else:
# Log that we're skipping extreme outlier values
logger.debug(f"Skipping ROI value for agent {agent_name} ({attr['agent_id']}): {data['roi']} (extreme outlier)")
logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points")
# Added debug for adjusted APR data after May 10th
may_10_2025 = datetime(2025, 5, 10)
after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
# Log detailed information about when data began
first_adjusted = None
if with_adjusted_after_may_10:
first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
# Check all data for first adjusted_apr
all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
if all_with_adjusted:
first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
# Calculate overall coverage
adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
# Log per-agent adjusted APR statistics
agent_stats = {}
for record in apr_data_list:
agent_id = record['agent_id']
has_adjusted = record['adjusted_apr'] is not None
if agent_id not in agent_stats:
agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
agent_stats[agent_id]['total'] += 1
if has_adjusted:
agent_stats[agent_id]['adjusted'] += 1
# Log stats for agents with meaningful data
for agent_id, stats in agent_stats.items():
if stats['total'] > 0:
coverage = (stats['adjusted'] / stats['total']) * 100
if coverage > 0: # Only log agents that have at least some adjusted data
logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
# Check for gaps in adjusted APR data
for agent_id in agent_stats:
# Get all records for this agent
agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
# Sort by timestamp
agent_records.sort(key=lambda x: x['timestamp'])
# Find where adjusted APR starts and if there are gaps
has_adjusted = False
gap_count = 0
streak_length = 0
for record in agent_records:
if record['adjusted_apr'] is not None:
if not has_adjusted:
has_adjusted = True
logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
streak_length += 1
elif has_adjusted:
# We had adjusted data but now it's missing
gap_count += 1
if streak_length > 0:
logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
streak_length = 0
if gap_count > 0:
logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
elif has_adjusted:
logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
# Provide summary statistics
agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
agents_with_gaps = sum(1 for agent_id in agent_stats if
any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and
apr_data_list[i+1]['adjusted_apr'] is None
for i in range(len(apr_data_list)-1)))
logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
if agents_with_gaps > 0:
logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
else:
logger.info("No gaps detected in adjusted APR data - graph should be continuous")
if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
# Log agent IDs with missing adjusted_apr after May 10th
agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
# Check these same agents before May 10th
before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
# Agents that had adjusted_apr before but not after
missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
if missing_adjusted:
logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
# Find the last valid adjusted_apr date for these agents
for agent_id in missing_adjusted:
agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
if agent_data:
last_entry = max(agent_data, key=lambda d: d['timestamp'])
logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
# Look at the first entry after the cutoff without adjusted_apr
agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
if agent_after:
first_after = min(agent_after, key=lambda d: d['timestamp'])
logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
# If the agent data has the 'adjusted_apr_key' field, log that info
if 'adjusted_apr_key' in first_after:
logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
# Add debug logic to check for any adjusted_apr after May 10th and which agents have it
elif len(with_adjusted_after_may_10) > 0:
logger.info("Found adjusted_apr values after May 10th, 2025")
# Group by agent and log
agent_counts = {}
for item in with_adjusted_after_may_10:
agent_id = item['agent_id']
if agent_id in agent_counts:
agent_counts[agent_id] += 1
else:
agent_counts[agent_id] = 1
logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
# Log adjusted_apr keys used
keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
if keys_used:
logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
# Convert to DataFrames
if not apr_data_list:
logger.error("No valid APR data extracted")
global_df = pd.DataFrame([])
else:
# Convert list of dictionaries to DataFrame for APR
global_df = pd.DataFrame(apr_data_list)
if not roi_data_list:
logger.error("No valid ROI data extracted")
global_roi_df = pd.DataFrame([])
else:
# Convert list of dictionaries to DataFrame for ROI
global_roi_df = pd.DataFrame(roi_data_list)
# Log the resulting dataframe
logger.info(f"Created DataFrame with {len(global_df)} rows")
logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
# Log adjusted APR statistics if available
if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any():
logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}")
logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}")
# Log the difference between APR and adjusted APR
valid_rows = global_df[global_df['adjusted_apr'].notna()]
if not valid_rows.empty:
avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
# All values are APR type (excluding zero and -100 values)
logger.info("All values are APR type (excluding zero and -100 values)")
logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}")
# Log the entire dataframe for debugging
logger.debug("Final DataFrame contents:")
for idx, row in global_df.iterrows():
logger.debug(f"Row {idx}: {row.to_dict()}")
# Add this at the end, right before returning
logger.info("Analyzing adjusted_apr data availability...")
log_adjusted_apr_availability(global_df)
return global_df, global_roi_df
except requests.exceptions.RequestException as e:
logger.error(f"API request error: {e}")
global_df = pd.DataFrame([])
global_roi_df = pd.DataFrame([])
return global_df, global_roi_df
except Exception as e:
logger.error(f"Error fetching APR data: {e}")
logger.exception("Exception traceback:")
global_df = pd.DataFrame([])
global_roi_df = pd.DataFrame([])
return global_df, global_roi_df
def log_adjusted_apr_availability(df):
"""
Analyzes and logs detailed information about adjusted_apr data availability.
Args:
df: DataFrame containing the APR data with adjusted_apr column
"""
if df.empty or 'adjusted_apr' not in df.columns:
logger.warning("No adjusted_apr data available for analysis")
return
# Get only rows with valid adjusted_apr values
has_adjusted = df[df['adjusted_apr'].notna()]
if has_adjusted.empty:
logger.warning("No valid adjusted_apr values found in the dataset")
return
# 1. When did adjusted_apr data start?
first_adjusted = has_adjusted['timestamp'].min()
last_adjusted = has_adjusted['timestamp'].max()
logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
# Calculate coverage percentage
total_records = len(df)
records_with_adjusted = len(has_adjusted)
coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
# 2. How many agents are providing adjusted_apr?
agents_with_adjusted = has_adjusted['agent_id'].unique()
logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
# 3. May 10th cutoff analysis
may_10_2025 = datetime(2025, 5, 10)
before_cutoff = df[df['timestamp'] < may_10_2025]
after_cutoff = df[df['timestamp'] >= may_10_2025]
if not before_cutoff.empty and not after_cutoff.empty:
before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
before_pct = (before_with_adjusted / len(before_cutoff)) * 100
after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
after_pct = (after_with_adjusted / len(after_cutoff)) * 100
logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
# Check which agents had data before and after
agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
missing_after = agents_before - agents_after
if missing_after:
logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
new_after = agents_after - agents_before
if new_after:
logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
# 4. Find date ranges for missing adjusted_apr
# Group by agent to analyze per-agent data availability
logger.info("=== DETAILED AGENT ANALYSIS ===")
for agent_id in df['agent_id'].unique():
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
# Get the valid adjusted_apr values for this agent
agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
if agent_adjusted.empty:
logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
continue
# Get the date range for this agent's data
agent_start = agent_data['timestamp'].min()
agent_end = agent_data['timestamp'].max()
# Get the date range for adjusted_apr data
adjusted_start = agent_adjusted['timestamp'].min()
adjusted_end = agent_adjusted['timestamp'].max()
total_agent_records = len(agent_data)
agent_with_adjusted = len(agent_adjusted)
coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
# Calculate if this agent had data before/after May 10th
if not before_cutoff.empty and not after_cutoff.empty:
agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
if has_before and not has_after:
last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
elif not has_before and has_after:
first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
# Check for gaps in adjusted_apr (periods of 24+ hours without data)
if len(agent_adjusted) < 2:
continue
# Sort by timestamp
sorted_data = agent_adjusted.sort_values('timestamp')
# Calculate time differences between consecutive data points
time_diffs = sorted_data['timestamp'].diff()
# Find gaps larger than 24 hours
gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
if not gaps.empty:
logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
# Log the gaps
for i, row in gaps.iterrows():
# Find the previous timestamp before the gap
prev_idx = sorted_data.index.get_loc(i) - 1
prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
if prev_time:
gap_start = prev_time
gap_end = row['timestamp']
gap_duration = gap_end - gap_start
logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
def generate_apr_visualizations():
"""Generate APR visualizations with real data only (no dummy data)"""
global global_df
# Fetch data from database
df, _ = fetch_apr_data_from_db()
# If we got no data at all, return placeholder figures
if df.empty:
logger.info("No APR data available. Using fallback visualization.")
# Create empty visualizations with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No APR data available",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
# Save as static file for reference
fig.write_html("modius_apr_combined_graph.html")
fig.write_image("modius_apr_combined_graph.png")
csv_file = None
return fig, csv_file
# No longer generating dummy data
# Set global_df for access by other functions
global_df = df
# Save to CSV before creating visualizations
csv_file = save_to_csv(df)
# Only create combined time series graph
combined_fig = create_combined_time_series_graph(df)
return combined_fig, csv_file
def generate_roi_visualizations():
"""Generate ROI visualizations with real data only (no dummy data)"""
global global_roi_df
# Fetch data from database if not already fetched
if global_roi_df is None or global_roi_df.empty:
_, df_roi = fetch_apr_data_from_db()
else:
df_roi = global_roi_df
# If we got no data at all, return placeholder figures
if df_roi.empty:
logger.info("No ROI data available. Using fallback visualization.")
# Create empty visualizations with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No ROI data available",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
# Save as static file for reference
fig.write_html("modius_roi_graph.html")
fig.write_image("modius_roi_graph.png")
csv_file = None
return fig, csv_file
# Set global_roi_df for access by other functions
global_roi_df = df_roi
# Save to CSV before creating visualizations
csv_file = save_roi_to_csv(df_roi)
# Create combined time series graph for ROI
combined_fig = create_combined_roi_time_series_graph(df_roi)
return combined_fig, csv_file
def create_combined_roi_time_series_graph(df):
"""Create a time series graph showing average ROI values across all agents"""
if len(df) == 0:
logger.error("No data to plot combined ROI graph")
fig = go.Figure()
fig.add_annotation(
text="No ROI data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# IMPORTANT: Force data types to ensure consistency
df['roi'] = df['roi'].astype(float) # Ensure ROI is float
# Convert ROI values to percentages (multiply by 100)
df['roi'] = df['roi'] * 100
df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Use the actual start date from the data instead of a fixed date
x_start_date = min_time
# CRITICAL: Log the exact dataframe we're using for plotting to help debug
logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}")
logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}")
logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}")
# Export full dataframe to CSV for debugging
debug_csv = "debug_roi_data.csv"
df.to_csv(debug_csv)
logger.info(f"Exported ROI graph data to {debug_csv} for debugging")
# Create Plotly figure in a clean state
fig = go.Figure()
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Add background shapes for positive and negative regions
# Add shape for positive ROI region (above zero)
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=100, # Use a fixed positive value (percentage)
x0=min_time, x1=max_time,
layer="below"
)
# Add shape for negative ROI region (below zero)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=-100, y1=0, # Use a fixed negative value (percentage)
x0=min_time, x1=max_time,
layer="below"
)
# Add zero line
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Filter out outliers (ROI values above 200% or below -200%)
outlier_data = df[(df['roi'] > 200) | (df['roi'] < -200)].copy()
df_filtered = df[(df['roi'] <= 200) & (df['roi'] >= -200)].copy()
# Log the outliers for better debugging
if len(outlier_data) > 0:
excluded_count = len(outlier_data)
logger.info(f"Excluded {excluded_count} data points with outlier ROI values (>200% or <-200%)")
# Group outliers by agent for detailed logging
outlier_agents = outlier_data.groupby('agent_name')
for agent_name, agent_outliers in outlier_agents:
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
for idx, row in agent_outliers.iterrows():
logger.info(f" - ROI: {row['roi']}, timestamp: {row['timestamp']}")
# Use the filtered data for all subsequent operations
df = df_filtered
# Group by timestamp and calculate mean ROI
avg_roi_data = df.groupby('timestamp')['roi'].mean().reset_index()
# Sort by timestamp
avg_roi_data = avg_roi_data.sort_values('timestamp')
# Log the average ROI data
logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points")
for idx, row in avg_roi_data.iterrows():
logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}")
# Calculate moving average based on a time window (3 days)
# Sort data by timestamp
df_sorted = df.sort_values('timestamp')
# Create a new dataframe for the moving average
avg_roi_data_with_ma = avg_roi_data.copy()
avg_roi_data_with_ma['moving_avg'] = None # Initialize the moving average column
# Define the time window for the moving average (3 days)
time_window = pd.Timedelta(days=3)
logger.info(f"Calculating moving average with time window of {time_window}")
# Calculate the moving averages for each timestamp
for i, row in avg_roi_data_with_ma.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all data points within the 3-day time window
window_data = df_sorted[
(df_sorted['timestamp'] >= window_start) &
(df_sorted['timestamp'] <= current_time)
]
# Calculate the average ROI for the 3-day time window
if not window_data.empty:
avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean()
logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}")
else:
# If no data points in the window, use the current value
avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi']
logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}")
logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points")
# Find the last date where we have valid moving average data
last_valid_ma_date = avg_roi_data_with_ma[avg_roi_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_roi_data_with_ma['moving_avg'].dropna().empty else None
# If we don't have any valid moving average data, use the max time from the original data
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
logger.info(f"Using last valid date for graph: {last_valid_date}")
# Plot individual agent data points with agent names in hover, but limit display for scalability
if not df.empty:
# Group by agent to use different colors for each agent
unique_agents = df['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
# Create a color map for agents
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent to determine which are most active
agent_counts = df['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
# Add data points for each agent, but only make top agents visible by default
for agent_name in unique_agents:
agent_data = df[df['agent_name'] == agent_name]
# Explicitly convert to Python lists
x_values = agent_data['timestamp'].tolist()
y_values = agent_data['roi'].tolist()
# Change default visibility to False to hide all agent data points
is_visible = False
# Add data points as markers for ROI
fig.add_trace(
go.Scatter(
x=x_values,
y=y_values,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='circle',
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (ROI)',
hovertemplate='Time: %{x}<br>ROI: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
# Add ROI moving average as a smooth line
x_values_ma = avg_roi_data_with_ma['timestamp'].tolist()
y_values_ma = avg_roi_data_with_ma['moving_avg'].tolist()
# Create hover template for the ROI moving average line
hover_data_roi = []
for idx, row in avg_roi_data_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only up to seconds (not milliseconds)
formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
# Calculate number of active agents in the last 24 hours
time_24h_ago = timestamp - pd.Timedelta(hours=24)
active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
(df['timestamp'] <= timestamp)]['agent_id'].unique())
hover_data_roi.append(
f"Time: {formatted_timestamp}<br>Avg ROI (3d window): {row['moving_avg']:.2f}%<br>Active agents (24h): {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_ma,
y=y_values_ma,
mode='lines', # Only lines for moving average
line=dict(color='blue', width=2), # Thinner line
name='Average ROI (3d window)',
hovertext=hover_data_roi,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 3-day moving average ROI trace with {len(x_values_ma)} points")
# Update layout
fig.update_layout(
title=dict(
text="Modius Agents ROI",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=600, # Reduced height for better fit on smaller screens
autosize=True, # Enable auto-sizing for responsiveness
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem"
),
margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
hovermode="closest"
)
# Add single annotation for y-axis
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=0, # Center of the y-axis
xref="paper",
yref="y",
text="ROI [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Update layout for legend
fig.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem",
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Update y-axis with fixed range of -100% to +100% for ROI
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use fixed range instead of autoscaling
autorange=False, # Disable autoscaling
range=[-100, 100], # Set fixed range from -100% to +100%
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and fixed range
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Set fixed range with start date and ending at the last valid date
autorange=False, # Disable autoscaling
range=[x_start_date, last_valid_date], # Set fixed range from start date to last valid date
tickformat="%b %d", # Simplified date format without time
tickangle=-30, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
try:
# Save the figure
graph_file = "modius_roi_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "modius_roi_graph.png"
try:
fig.write_image(img_file)
logger.info(f"ROI graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving ROI image: {e}")
logger.info(f"ROI graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
# If the complex graph approach fails, create a simpler one
logger.error(f"Error creating advanced ROI graph: {e}")
logger.info("Falling back to Simpler ROI graph")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Simply plot the average ROI data with moving average
if not avg_roi_data.empty:
# Add moving average as a line
simple_fig.add_trace(
go.Scatter(
x=avg_roi_data_with_ma['timestamp'],
y=avg_roi_data_with_ma['moving_avg'],
mode='lines',
name='Average ROI (3d window)',
line=dict(width=2, color='blue') # Thinner line
)
)
# Simplified layout with adjusted y-axis range
simple_fig.update_layout(
title=dict(
text="Modius Agents ROI",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None,
yaxis_title=None,
template="plotly_white",
height=600,
autosize=True,
margin=dict(r=30, l=120, t=40, b=50)
)
# Update y-axis with fixed range of -100% to +100% for ROI
simple_fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
autorange=False,
range=[-100, 100],
tickformat=".2f",
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and fixed range
simple_fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
autorange=False,
range=[x_start_date, max_time],
tickformat="%b %d",
tickangle=-30,
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")
)
# Save the figure
graph_file = "modius_roi_graph.html"
simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Return the simple figure
return simple_fig
def save_roi_to_csv(df):
"""Save the ROI data DataFrame to a CSV file and return the file path"""
if df.empty:
logger.error("No ROI data to save to CSV")
return None
# Define the CSV file path
csv_file = "modius_roi_values.csv"
# Save to CSV
df.to_csv(csv_file, index=False)
logger.info(f"ROI data saved to {csv_file}")
return csv_file
def create_time_series_graph_per_agent(df):
"""Create a time series graph for each agent using Plotly"""
# Get unique agents
unique_agents = df['agent_id'].unique()
if len(unique_agents) == 0:
logger.error("No agent data to plot")
fig = go.Figure()
fig.add_annotation(
text="No agent data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Create a subplot figure for each agent
fig = make_subplots(rows=len(unique_agents), cols=1,
subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}"
for agent_id in unique_agents],
vertical_spacing=0.1)
# Plot data for each agent
for i, agent_id in enumerate(unique_agents):
agent_data = df[df['agent_id'] == agent_id].copy()
agent_name = agent_data['agent_name'].iloc[0]
row = i + 1
# Add zero line to separate APR and Performance
fig.add_shape(
type="line", line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1
)
# Add background colors
fig.add_shape(
type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0),
y0=0, y1=1000, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1, layer="below"
)
fig.add_shape(
type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0),
y0=-1000, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1, layer="below"
)
# Create separate dataframes for different data types
apr_data = agent_data[agent_data['metric_type'] == 'APR']
perf_data = agent_data[agent_data['metric_type'] == 'Performance']
# Sort all data by timestamp for the line plots
combined_agent_data = agent_data.sort_values('timestamp')
# Add main line connecting all points
fig.add_trace(
go.Scatter(
x=combined_agent_data['timestamp'],
y=combined_agent_data['apr'],
mode='lines',
line=dict(color='purple', width=2),
name=f'{agent_name}',
legendgroup=agent_name,
showlegend=(i == 0), # Only show in legend once
hovertemplate='Time: %{x}<br>Value: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Add scatter points for APR values
if not apr_data.empty:
fig.add_trace(
go.Scatter(
x=apr_data['timestamp'],
y=apr_data['apr'],
mode='markers',
marker=dict(color='blue', size=10, symbol='circle'),
name='APR',
legendgroup='APR',
showlegend=(i == 0),
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Add scatter points for Performance values
if not perf_data.empty:
fig.add_trace(
go.Scatter(
x=perf_data['timestamp'],
y=perf_data['apr'],
mode='markers',
marker=dict(color='red', size=10, symbol='square'),
name='Performance',
legendgroup='Performance',
showlegend=(i == 0),
hovertemplate='Time: %{x}<br>Performance: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Update axes
fig.update_xaxes(title_text="Time", row=row, col=1)
fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)')
# Update layout
fig.update_layout(
height=400 * len(unique_agents),
width=1000,
title_text="APR and Performance Values per Agent",
template="plotly_white",
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
margin=dict(r=20, l=20, t=30, b=20),
hovermode="closest"
)
# Save the figure (still useful for reference)
graph_file = "modius_apr_per_agent_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "modius_apr_per_agent_graph.png"
fig.write_image(img_file)
logger.info(f"Per-agent graph saved to {graph_file} and {img_file}")
# Return the figure object for direct use in Gradio
return fig
def write_debug_info(df, fig):
"""Minimal debug info function"""
try:
# Just log minimal information
logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces")
return True
except Exception as e:
logger.error(f"Error writing debug info: {e}")
return False
def create_combined_time_series_graph(df):
"""Create a time series graph showing average APR values across all agents"""
if len(df) == 0:
logger.error("No data to plot combined graph")
fig = go.Figure()
fig.add_annotation(
text="No data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# IMPORTANT: Force data types to ensure consistency
df['apr'] = df['apr'].astype(float) # Ensure APR is float
df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Use April 17th, 2025 as the fixed start date for APR graph
x_start_date = datetime(2025, 4, 17)
# CRITICAL: Log the exact dataframe we're using for plotting to help debug
logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}")
logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}")
logger.info("Graph data - all positive APR values only")
logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}")
# Export full dataframe to CSV for debugging
debug_csv = "debug_graph_data.csv"
df.to_csv(debug_csv)
logger.info(f"Exported graph data to {debug_csv} for debugging")
# Write detailed data report
with open("debug_graph_data_report.txt", "w") as f:
f.write("==== GRAPH DATA REPORT ====\n\n")
f.write(f"Total data points: {len(df)}\n")
f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n")
# Output per-agent details
unique_agents = df['agent_id'].unique()
f.write(f"Number of agents: {len(unique_agents)}\n\n")
for agent_id in unique_agents:
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n")
f.write(f" Total data points: {len(agent_data)}\n")
apr_data = agent_data[agent_data['metric_type'] == 'APR']
f.write(f" APR data points: {len(apr_data)}\n")
if not apr_data.empty:
f.write(f" APR values: {apr_data['apr'].tolist()}\n")
f.write(f" APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n")
f.write("\n")
logger.info("Generated detailed graph data report")
# ENSURE THERE ARE NO CONFLICTING AXES OR TRACES
# Create Plotly figure in a clean state
fig = go.Figure()
# Enable autoscaling instead of fixed ranges
logger.info("Using autoscaling for axes ranges")
# Add background shapes for APR and Performance regions
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Add shape for positive APR region (above zero)
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=100, # Use a fixed positive value
x0=min_time, x1=max_time,
layer="below"
)
# Add shape for negative APR region (below zero)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=-100, y1=0, # Use a fixed negative value
x0=min_time, x1=max_time,
layer="below"
)
# Add zero line
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# MODIFIED: Calculate average APR values across all agents for each timestamp
# Filter for APR data only
apr_data = df[df['metric_type'] == 'APR'].copy()
# Filter out outliers (APR values above 200 or below -200)
outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
# Log the outliers for better debugging
if len(outlier_data) > 0:
excluded_count = len(outlier_data)
logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
# Group outliers by agent for detailed logging
outlier_agents = outlier_data.groupby('agent_name')
for agent_name, agent_outliers in outlier_agents:
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
for idx, row in agent_outliers.iterrows():
logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}")
# Use the filtered data for all subsequent operations
apr_data = apr_data_filtered
# Group by timestamp and calculate mean APR
avg_apr_data = apr_data.groupby('timestamp')['apr'].mean().reset_index()
# Sort by timestamp
avg_apr_data = avg_apr_data.sort_values('timestamp')
# Log the average APR data
logger.info(f"Calculated average APR data with {len(avg_apr_data)} points")
for idx, row in avg_apr_data.iterrows():
logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}")
# Calculate moving average based on a time window (2 hours)
# Sort data by timestamp
apr_data_sorted = apr_data.sort_values('timestamp')
# Create a new dataframe for the moving average
avg_apr_data_with_ma = avg_apr_data.copy()
avg_apr_data_with_ma['moving_avg'] = None # Initialize the moving average column
# Define the time window for the moving average (3 days)
time_window = pd.Timedelta(days=3)
logger.info(f"Calculating moving average with time window of {time_window}")
# Calculate moving averages: one for APR and one for adjusted APR
avg_apr_data_with_ma['moving_avg'] = None # 3-day window for APR
avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
# Keep track of the last valid adjusted_moving_avg value to handle gaps
last_valid_adjusted_moving_avg = None
# Calculate the moving averages for each timestamp
for i, row in avg_apr_data_with_ma.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all data points within the 3-day time window
window_data = apr_data_sorted[
(apr_data_sorted['timestamp'] >= window_start) &
(apr_data_sorted['timestamp'] <= current_time)
]
# Calculate the average APR for the 3-day time window
if not window_data.empty:
avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
# Calculate adjusted APR moving average if data exists
has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any()
if has_adjusted_apr:
adjusted_avg = window_data['adjusted_apr'].dropna().mean()
avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg
last_valid_adjusted_moving_avg = adjusted_avg
logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}")
else:
# If we don't have adjusted_apr data in this window but had some previously,
# use the last valid value to maintain continuity in the graph
if last_valid_adjusted_moving_avg is not None:
avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
else:
# If no data points in the window, use the current value
avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}")
logger.info(f"Calculated time-based moving averages with {len(avg_apr_data_with_ma)} points")
# Find the last date where we have valid moving average data
last_valid_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_apr_data_with_ma['moving_avg'].dropna().empty else None
# Find the last date where we have valid adjusted moving average data
last_valid_adj_ma_date = None
if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
last_valid_adj_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['adjusted_moving_avg'].notna()]['timestamp'].max()
# Determine the last valid date for either moving average
last_valid_date = last_valid_ma_date
if last_valid_adj_ma_date is not None:
last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date
# If we don't have any valid moving average data, use the max time from the original data
if last_valid_date is None:
last_valid_date = df['timestamp'].max()
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}")
logger.info(f"Using last valid date for graph: {last_valid_date}")
# Plot individual agent data points with agent names in hover, but limit display for scalability
if not apr_data.empty:
# Group by agent to use different colors for each agent
unique_agents = apr_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
# Create a color map for agents
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent to determine which are most active
agent_counts = apr_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
# Add data points for each agent, but only make top agents visible by default
for agent_name in unique_agents:
agent_data = apr_data[apr_data['agent_name'] == agent_name]
# Explicitly convert to Python lists
x_values = agent_data['timestamp'].tolist()
y_values = agent_data['apr'].tolist()
# Change default visibility to False to hide all agent data points
is_visible = False
# Add data points as markers for APR
fig.add_trace(
go.Scatter(
x=x_values,
y=y_values,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='circle',
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (APR)',
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
# Add data points for adjusted APR if it exists
if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any():
x_values_adj = agent_data['timestamp'].tolist()
y_values_adj = agent_data['adjusted_apr'].tolist()
fig.add_trace(
go.Scatter(
x=x_values_adj,
y=y_values_adj,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='diamond', # Different symbol for adjusted APR
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (Adjusted APR)',
hovertemplate='Time: %{x}<br>Adjusted APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
# Add APR moving average as a smooth line
x_values_ma = avg_apr_data_with_ma['timestamp'].tolist()
y_values_ma = avg_apr_data_with_ma['moving_avg'].tolist()
# Create hover template for the APR moving average line
hover_data_apr = []
for idx, row in avg_apr_data_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only up to seconds (not milliseconds)
formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
# Calculate number of active agents in the last 24 hours
time_24h_ago = timestamp - pd.Timedelta(hours=24)
active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
(apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
hover_data_apr.append(
f"Time: {formatted_timestamp}<br>Avg APR (3d window): {row['moving_avg']:.2f}<br>Active agents (24h): {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_ma,
y=y_values_ma,
mode='lines', # Only lines for moving average
line=dict(color='red', width=2), # Thinner line
name='Average APR (3d window)',
hovertext=hover_data_apr,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 3-day moving average APR trace with {len(x_values_ma)} points")
# Add adjusted APR moving average line if it exists
if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
# Create a copy of the dataframe with forward-filled adjusted_moving_avg values
# to ensure the line continues even when we have missing data
filled_avg_apr_data = avg_apr_data_with_ma.copy()
filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
# Use the filled dataframe for the adjusted APR line
x_values_adj = filled_avg_apr_data['timestamp'].tolist()
y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
# Create hover template for the adjusted APR moving average line
hover_data_adj = []
for idx, row in filled_avg_apr_data.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only up to seconds (not milliseconds)
formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
# Calculate number of active agents in the last 24 hours
time_24h_ago = timestamp - pd.Timedelta(hours=24)
active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
(apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
if pd.notna(row['adjusted_moving_avg']):
hover_data_adj.append(
f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): {row['adjusted_moving_avg']:.2f}<br>Active agents (24h): {active_agents}"
)
else:
hover_data_adj.append(
f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): N/A<br>Active agents (24h): {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_adj,
y=y_values_adj_ma,
mode='lines', # Only lines for moving average
line=dict(color='green', width=4), # Thicker solid line for adjusted APR
name='Average ETH Adjusted APR (3d window)',
hovertext=hover_data_adj,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)")
else:
logger.warning("No adjusted APR moving average data available to plot")
# Removed cumulative APR as requested
logger.info("Cumulative APR graph line has been removed as requested")
# Update layout - use simple boolean values everywhere
# Make chart responsive instead of fixed width
fig.update_layout(
title=dict(
text="Modius Agents",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=600, # Reduced height for better fit on smaller screens
# Removed fixed width to enable responsiveness
autosize=True, # Enable auto-sizing for responsiveness
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem"
),
margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
hovermode="closest"
)
# Add annotations for y-axis regions
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Remove x-axis title annotation
# fig.add_annotation(
# x=0.5, # Center of the x-axis
# y=-0.15, # Below the x-axis
# xref="paper",
# yref="paper",
# text="Date",
# showarrow=False,
# font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
# align="center"
# )
# Update layout for legend
fig.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem",
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Update y-axis with fixed range of -50 to +100 for psychological effect
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use fixed range instead of autoscaling
autorange=False, # Disable autoscaling
range=[-50, 100], # Set fixed range from -50 to +100
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and fixed range
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Set fixed range with April 17 as start date and ending at the last valid date
autorange=False, # Disable autoscaling
range=[x_start_date, last_valid_date], # Set fixed range from April 17 to last valid date
tickformat="%b %d", # Simplified date format without time
tickangle=-30, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
# SIMPLIFIED APPROACH: Do a direct plot without markers for comparison
# This creates a simple, reliable fallback plot if the advanced one fails
try:
# Write detailed debug information before saving the figure
write_debug_info(df, fig)
# Save the figure (still useful for reference)
graph_file = "modius_apr_combined_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "modius_apr_combined_graph.png"
try:
fig.write_image(img_file)
logger.info(f"Combined graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving image: {e}")
logger.info(f"Combined graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
# If the complex graph approach fails, create a simpler one
logger.error(f"Error creating advanced graph: {e}")
logger.info("Falling back to Simpler graph")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Define colors for the fallback graph
fallback_colors = px.colors.qualitative.Plotly
# Simply plot the average APR data with moving average
if not avg_apr_data.empty:
# Sort by timestamp
avg_apr_data = avg_apr_data.sort_values('timestamp')
# Calculate both moving averages for the fallback graph
avg_apr_data_with_ma = avg_apr_data.copy()
avg_apr_data_with_ma['moving_avg'] = None # 2-hour window
avg_apr_data_with_ma['infinite_avg'] = None # Infinite window
# Define the time window (6 hours)
time_window = pd.Timedelta(hours=6)
# Calculate the moving averages for each timestamp
for i, row in avg_apr_data_with_ma.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all data points within the 2-hour time window
window_data = apr_data[
(apr_data['timestamp'] >= window_start) &
(apr_data['timestamp'] <= current_time)
]
# Get all data points up to the current timestamp (infinite window)
infinite_window_data = apr_data[
apr_data['timestamp'] <= current_time
]
# Calculate the average APR for the 2-hour time window
if not window_data.empty:
avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
else:
# If no data points in the window, use the current value
avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
# Calculate the average APR for the infinite window
if not infinite_window_data.empty:
avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean()
else:
avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr']
# Add data points for each agent, but only make top agents visible by default
unique_agents = apr_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent
agent_counts = apr_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
for agent_name in unique_agents:
agent_data = apr_data[apr_data['agent_name'] == agent_name]
# Determine if this agent should be visible by default
is_visible = agent_name in top_agents
# Add data points as markers
simple_fig.add_trace(
go.Scatter(
x=agent_data['timestamp'],
y=agent_data['apr'],
mode='markers',
name=f'Agent: {agent_name}',
marker=dict(
size=10,
color=color_map[agent_name]
),
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # Only top agents visible by default
)
)
# Add 2-hour moving average as a line
simple_fig.add_trace(
go.Scatter(
x=avg_apr_data_with_ma['timestamp'],
y=avg_apr_data_with_ma['moving_avg'],
mode='lines',
name='Average APR (6h window)',
line=dict(width=2, color='red') # Thinner line
)
)
# Add infinite window moving average as another line
simple_fig.add_trace(
go.Scatter(
x=avg_apr_data_with_ma['timestamp'],
y=avg_apr_data_with_ma['infinite_avg'],
mode='lines',
name='Cumulative Average APR (all data)',
line=dict(width=4, color='green') # Thicker solid line
)
)
# Simplified layout with adjusted y-axis range and increased size
simple_fig.update_layout(
title=dict(
text="Modius Agents",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
yaxis=dict(
# No fixed range - let Plotly autoscale
autorange=True, # Explicitly enable autoscaling
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=12) # Larger font for tick labels
),
height=600, # Reduced height for better fit
# Removed fixed width to enable responsiveness
autosize=True, # Enable auto-sizing for responsiveness
template="plotly_white", # Use a cleaner template
margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title
)
# Add annotations for y-axis regions in the fallback graph
simple_fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
simple_fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Remove x-axis title annotation
# simple_fig.add_annotation(
# x=0.5, # Center of the x-axis
# y=-0.15, # Below the x-axis
# xref="paper",
# yref="paper",
# text="Date",
# showarrow=False,
# font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
# align="center"
# )
# Update legend font for fallback graph
simple_fig.update_layout(
legend=dict(
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Apply fixed range to the x-axis for the fallback graph
simple_fig.update_xaxes(
autorange=False, # Disable autoscaling
range=[x_start_date, max_time], # Set fixed range from April 17
tickformat="%b %d", # Simplified date format without time
tickangle=-30,
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
# Update y-axis tick font for fallback graph
simple_fig.update_yaxes(
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
)
# Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS
if len(unique_agents) > MAX_VISIBLE_AGENTS:
simple_fig.add_annotation(
text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.",
xref="paper", yref="paper",
x=0.5, y=1.05,
showarrow=False,
font=dict(size=12, color="gray"),
align="center"
)
# Return the simple figure
return simple_fig
def save_to_csv(df):
"""Save the APR data DataFrame to a CSV file and return the file path"""
if df.empty:
logger.error("No APR data to save to CSV")
return None
# Define the CSV file path
csv_file = "modius_apr_values.csv"
# Save to CSV
df.to_csv(csv_file, index=False)
logger.info(f"APR data saved to {csv_file}")
# Also generate a statistics CSV file
stats_df = generate_statistics_from_data(df)
stats_csv = "modius_apr_statistics.csv"
stats_df.to_csv(stats_csv, index=False)
logger.info(f"Statistics saved to {stats_csv}")
# Log detailed statistics about adjusted APR
if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any():
adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()]
logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}")
for _, row in adjusted_stats.iterrows():
if row['agent_id'] != 'ALL': # Skip the overall stats row
logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}")
# Log overall adjusted APR stats
overall_row = stats_df[stats_df['agent_id'] == 'ALL']
if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]):
logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}")
return csv_file
def generate_statistics_from_data(df):
"""Generate statistics from the APR data"""
if df.empty:
return pd.DataFrame()
# Get unique agents
unique_agents = df['agent_id'].unique()
stats_list = []
# Generate per-agent statistics
for agent_id in unique_agents:
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
# APR statistics
apr_data = agent_data[agent_data['metric_type'] == 'APR']
real_apr = apr_data[apr_data['is_dummy'] == False]
# Performance statistics
perf_data = agent_data[agent_data['metric_type'] == 'Performance']
real_perf = perf_data[perf_data['is_dummy'] == False]
# Check if adjusted_apr exists and has non-null values
has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any()
stats = {
'agent_id': agent_id,
'agent_name': agent_name,
'total_points': len(agent_data),
'apr_points': len(apr_data),
'performance_points': len(perf_data),
'real_apr_points': len(real_apr),
'real_performance_points': len(real_perf),
'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None,
'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None,
'max_apr': apr_data['apr'].max() if not apr_data.empty else None,
'min_apr': apr_data['apr'].min() if not apr_data.empty else None,
'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None,
'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None,
'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None,
'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None
}
stats_list.append(stats)
# Generate overall statistics
apr_only = df[df['metric_type'] == 'APR']
perf_only = df[df['metric_type'] == 'Performance']
# Check if adjusted_apr exists and has non-null values for overall stats
has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any()
overall_stats = {
'agent_id': 'ALL',
'agent_name': 'All Agents',
'total_points': len(df),
'apr_points': len(apr_only),
'performance_points': len(perf_only),
'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]),
'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]),
'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None,
'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None,
'max_apr': apr_only['apr'].max() if not apr_only.empty else None,
'min_apr': apr_only['apr'].min() if not apr_only.empty else None,
'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None,
'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None,
'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None,
'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None
}
stats_list.append(overall_stats)
return pd.DataFrame(stats_list)
# Create dummy functions for the commented out imports
def create_transcation_visualizations():
"""Dummy implementation that returns a placeholder graph"""
fig = go.Figure()
fig.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig
def create_active_agents_visualizations():
"""Dummy implementation that returns a placeholder graph"""
fig = go.Figure()
fig.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig
# Comment out the blockchain connection code
"""
# Load environment variables from .env file
# RPC URLs
OPTIMISM_RPC_URL = os.getenv('OPTIMISM_RPC_URL')
MODE_RPC_URL = os.getenv('MODE_RPC_URL')
# Initialize Web3 instances
web3_instances = {
'optimism': Web3(Web3.HTTPProvider(OPTIMISM_RPC_URL)),
'mode': Web3(Web3.HTTPProvider(MODE_RPC_URL))
}
# Contract addresses for service registries
contract_addresses = {
'optimism': '0x3d77596beb0f130a4415df3D2D8232B3d3D31e44',
'mode': '0x3C1fF68f5aa342D296d4DEe4Bb1cACCA912D95fE'
}
# Load the ABI from the provided JSON file
with open('./contracts/service_registry_abi.json', 'r') as abi_file:
contract_abi = json.load(abi_file)
# Create the contract instances
service_registries = {
chain_name: web3.eth.contract(address=contract_addresses[chain_name], abi=contract_abi)
for chain_name, web3 in web3_instances.items()
}
# Check if connections are successful
for chain_name, web3_instance in web3_instances.items():
if not web3_instance.is_connected():
raise Exception(f"Failed to connect to the {chain_name.capitalize()} network.")
else:
print(f"Successfully connected to the {chain_name.capitalize()} network.")
"""
# Dummy blockchain functions to replace the commented ones
def get_transfers(integrator: str, wallet: str) -> str:
"""Dummy function that returns an empty result"""
return {"transfers": []}
def fetch_and_aggregate_transactions():
"""Dummy function that returns empty data"""
return [], {}
# Function to parse the transaction data and prepare it for visualization
def process_transactions_and_agents(data):
"""Dummy function that returns empty dataframes"""
df_transactions = pd.DataFrame()
df_agents = pd.DataFrame(columns=['date', 'agent_count'])
df_agents_weekly = pd.DataFrame()
return df_transactions, df_agents, df_agents_weekly
# Function to create visualizations based on the metrics
def create_visualizations():
"""
# Commenting out the original visualization code temporarily for debugging
transactions_data = fetch_and_aggregate_transactions()
df_transactions, df_agents, df_agents_weekly = process_transactions_and_agents(transactions_data)
# Fetch daily value locked data
df_tvl = pd.read_csv('daily_value_locked.csv')
# Calculate total value locked per chain per day
df_tvl["total_value_locked_usd"] = df_tvl["amount0_usd"] + df_tvl["amount1_usd"]
df_tvl_daily = df_tvl.groupby(["date", "chain_name"])["total_value_locked_usd"].sum().reset_index()
df_tvl_daily['date'] = pd.to_datetime(df_tvl_daily['date'])
# Filter out dates with zero total value locked
df_tvl_daily = df_tvl_daily[df_tvl_daily["total_value_locked_usd"] > 0]
chain_name_map = {
"mode": "Mode",
"base": "Base",
"ethereum": "Ethereum",
"optimism": "Optimism"
}
df_tvl_daily["chain_name"] = df_tvl_daily["chain_name"].map(chain_name_map)
# Plot total value locked
fig_tvl = px.bar(
df_tvl_daily,
x="date",
y="total_value_locked_usd",
color="chain_name",
opacity=0.7,
title="Total Volume Invested in Pools in Different Chains Daily",
labels={"date": "Date","chain_name": "Transaction Chain", "total_value_locked_usd": "Total Volume Invested (USD)"},
barmode='stack',
color_discrete_map={
"Mode": "orange",
"Base": "purple",
"Ethereum": "darkgreen",
"Optimism": "blue"
}
)
fig_tvl.update_layout(
xaxis_title="Date",
yaxis=dict(tickmode='linear', tick0=0, dtick=4),
xaxis=dict(
tickmode='array',
tickvals=df_tvl_daily['date'],
ticktext=df_tvl_daily['date'].dt.strftime('%b %d'),
tickangle=-45,
),
bargap=0.6, # Increase gap between bar groups (0-1)
bargroupgap=0.1, # Decrease gap between bars in a group (0-1)
height=600,
width=1200, # Specify width to prevent bars from being too wide
showlegend=True,
template='plotly_white'
)
fig_tvl.update_xaxes(tickformat="%b %d")
chain_name_map = {
10: "Optimism",
8453: "Base",
1: "Ethereum",
34443: "Mode"
}
df_transactions["sending_chain"] = df_transactions["sending_chain"].map(chain_name_map)
df_transactions["receiving_chain"] = df_transactions["receiving_chain"].map(chain_name_map)
df_transactions["sending_chain"] = df_transactions["sending_chain"].astype(str)
df_transactions["receiving_chain"] = df_transactions["receiving_chain"].astype(str)
df_transactions['date'] = pd.to_datetime(df_transactions['date'])
df_transactions["is_swap"] = df_transactions.apply(lambda x: x["sending_chain"] == x["receiving_chain"], axis=1)
swaps_per_chain = df_transactions[df_transactions["is_swap"]].groupby(["date", "sending_chain"]).size().reset_index(name="swap_count")
fig_swaps_chain = px.bar(
swaps_per_chain,
x="date",
y="swap_count",
color="sending_chain",
title="Chain Daily Activity: Swaps",
labels={"sending_chain": "Transaction Chain", "swap_count": "Daily Swap Nr"},
barmode="stack",
opacity=0.7,
color_discrete_map={
"Optimism": "blue",
"Ethereum": "darkgreen",
"Base": "purple",
"Mode": "orange"
}
)
fig_swaps_chain.update_layout(
xaxis_title="Date",
yaxis_title="Daily Swap Count",
yaxis=dict(tickmode='linear', tick0=0, dtick=1),
xaxis=dict(
tickmode='array',
tickvals=[d for d in swaps_per_chain['date']],
ticktext=[d.strftime('%m-%d') for d in swaps_per_chain['date']],
tickangle=-45,
),
bargap=0.6,
bargroupgap=0.1,
height=600,
width=1200,
margin=dict(l=50, r=50, t=50, b=50),
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="right",
x=0.99
),
template='plotly_white'
)
fig_swaps_chain.update_xaxes(tickformat="%m-%d")
df_transactions["is_bridge"] = df_transactions.apply(lambda x: x["sending_chain"] != x["receiving_chain"], axis=1)
bridges_per_chain = df_transactions[df_transactions["is_bridge"]].groupby(["date", "sending_chain"]).size().reset_index(name="bridge_count")
fig_bridges_chain = px.bar(
bridges_per_chain,
x="date",
y="bridge_count",
color="sending_chain",
title="Chain Daily Activity: Bridges",
labels={"sending_chain": "Transaction Chain", "bridge_count": "Daily Bridge Nr"},
barmode="stack",
opacity=0.7,
color_discrete_map={
"Optimism": "blue",
"Ethereum": "darkgreen",
"Base": "purple",
"Mode": "orange"
}
)
fig_bridges_chain.update_layout(
xaxis_title="Date",
yaxis_title="Daily Bridge Count",
yaxis=dict(tickmode='linear', tick0=0, dtick=1),
xaxis=dict(
tickmode='array',
tickvals=[d for d in bridges_per_chain['date']],
ticktext=[d.strftime('%m-%d') for d in bridges_per_chain['date']],
tickangle=-45,
),
bargap=0.6,
bargroupgap=0.1,
height=600,
width=1200,
margin=dict(l=50, r=50, t=50, b=50),
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="right",
x=0.99
),
template='plotly_white'
)
fig_bridges_chain.update_xaxes(tickformat="%m-%d")
df_agents['date'] = pd.to_datetime(df_agents['date'])
daily_agents_df = df_agents.groupby('date').agg({'agent_count': 'sum'}).reset_index()
daily_agents_df.rename(columns={'agent_count': 'daily_agent_count'}, inplace=True)
# Sort by date to ensure proper running total calculation
daily_agents_df = daily_agents_df.sort_values('date')
# Create week column
daily_agents_df['week'] = daily_agents_df['date'].dt.to_period('W').apply(lambda r: r.start_time)
# Calculate running total within each week
daily_agents_df['running_weekly_total'] = daily_agents_df.groupby('week')['daily_agent_count'].cumsum()
# Create final merged dataframe
weekly_merged_df = daily_agents_df.copy()
adjustment_date = pd.to_datetime('2024-11-15')
weekly_merged_df.loc[weekly_merged_df['date'] == adjustment_date, 'daily_agent_count'] -= 1
weekly_merged_df.loc[weekly_merged_df['date'] == adjustment_date, 'running_weekly_total'] -= 1
fig_agents_registered = go.Figure(data=[
go.Bar(
name='Daily nr of Registered Agents',
x=weekly_merged_df['date'].dt.strftime("%b %d"),
y=weekly_merged_df['daily_agent_count'],
opacity=0.7,
marker_color='blue'
),
go.Bar(
name='Weekly Nr of Registered Agents',
x=weekly_merged_df['date'].dt.strftime("%b %d"),
y=weekly_merged_df['running_weekly_total'],
opacity=0.7,
marker_color='purple'
)
])
fig_agents_registered.update_layout(
xaxis_title='Date',
yaxis_title='Number of Agents',
title="Nr of Agents Registered",
barmode='group',
yaxis=dict(tickmode='linear', tick0=0, dtick=1),
xaxis=dict(
categoryorder='array',
categoryarray=weekly_merged_df['date'].dt.strftime("%b %d"),
tickangle=-45
),
bargap=0.3,
height=600,
width=1200,
showlegend=True,
legend=dict(
yanchor="top",
xanchor="right",
),
template='plotly_white',
)
return fig_swaps_chain, fig_bridges_chain, fig_agents_registered,fig_tvl
"""
# Placeholder figures for testing
fig_swaps_chain = go.Figure()
fig_swaps_chain.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_bridges_chain = go.Figure()
fig_bridges_chain.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_agents_registered = go.Figure()
fig_agents_registered.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_tvl = go.Figure()
fig_tvl.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl
# Modify dashboard function to make the plot container responsive
def dashboard():
with gr.Blocks() as demo:
gr.Markdown("# Average Modius Agent Performance")
# Create tabs for APR and ROI metrics
with gr.Tabs():
# APR Metrics tab
with gr.Tab("APR Metrics"):
with gr.Column():
refresh_apr_btn = gr.Button("Refresh APR Data")
# Create container for plotly figure with responsive sizing
with gr.Column():
combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot")
# Create compact toggle controls at the bottom of the graph
with gr.Row(visible=True):
gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title")
with gr.Row():
with gr.Column():
with gr.Row(elem_id="apr_toggle_container"):
with gr.Column(scale=1, min_width=150):
apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle")
with gr.Column(scale=1, min_width=150):
adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle")
# Add a text area for status messages
apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
# ROI Metrics tab
with gr.Tab("ROI Metrics"):
with gr.Column():
refresh_roi_btn = gr.Button("Refresh ROI Data")
# Create container for plotly figure with responsive sizing
with gr.Column():
combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot")
# Create compact toggle controls at the bottom of the graph
with gr.Row(visible=True):
gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title")
with gr.Row():
with gr.Column():
with gr.Row(elem_id="roi_toggle_container"):
with gr.Column(scale=1, min_width=150):
roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle")
# Add a text area for status messages
roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
# Add custom CSS for making the plots responsive
gr.HTML("""
<style>
/* Make plots responsive */
#responsive_apr_plot, #responsive_roi_plot {
width: 100% !important;
max-width: 100% !important;
}
#responsive_apr_plot > div, #responsive_roi_plot > div {
width: 100% !important;
height: auto !important;
min-height: 500px !important;
}
/* Toggle checkbox styling */
#apr_toggle .gr-checkbox {
accent-color: #e74c3c !important;
}
#adjusted_apr_toggle .gr-checkbox {
accent-color: #2ecc71 !important;
}
#roi_toggle .gr-checkbox {
accent-color: #3498db !important;
}
/* Make the toggle section more compact */
#apr_toggle_title, #roi_toggle_title {
margin-bottom: 0;
margin-top: 10px;
}
#apr_toggle_container, #roi_toggle_container {
margin-top: 5px;
}
/* Style the checkbox labels */
.gr-form.gr-box {
border: none !important;
background: transparent !important;
}
/* Make checkboxes and labels appear on the same line */
.gr-checkbox-container {
display: flex !important;
align-items: center !important;
}
/* Add colored indicators */
#apr_toggle .gr-checkbox-label::before {
content: "●";
color: #e74c3c;
margin-right: 5px;
}
#adjusted_apr_toggle .gr-checkbox-label::before {
content: "●";
color: #2ecc71;
margin-right: 5px;
}
#roi_toggle .gr-checkbox-label::before {
content: "●";
color: #3498db;
margin-right: 5px;
}
</style>
""")
# Function to update the APR graph
def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True):
# Generate visualization and get figure object directly
try:
combined_fig, _ = generate_apr_visualizations()
# Update visibility of traces based on toggle values
for i, trace in enumerate(combined_fig.data):
# Check if this is a moving average trace
if trace.name == 'Average APR (3d window)':
trace.visible = show_apr_ma
elif trace.name == 'Average ETH Adjusted APR (3d window)':
trace.visible = show_adjusted_apr_ma
return combined_fig
except Exception as e:
logger.exception("Error generating APR visualization")
# Create error figure
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return error_fig
# Function to update the ROI graph
def update_roi_graph(show_roi_ma=True):
# Generate visualization and get figure object directly
try:
combined_fig, _ = generate_roi_visualizations()
# Update visibility of traces based on toggle values
for i, trace in enumerate(combined_fig.data):
# Check if this is a moving average trace
if trace.name == 'Average ROI (3d window)':
trace.visible = show_roi_ma
return combined_fig
except Exception as e:
logger.exception("Error generating ROI visualization")
# Create error figure
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return error_fig
# Initialize the APR graph on load with a placeholder
apr_placeholder_fig = go.Figure()
apr_placeholder_fig.add_annotation(
text="Click 'Refresh APR Data' to load APR graph",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15)
)
combined_apr_graph.value = apr_placeholder_fig
# Initialize the ROI graph on load with a placeholder
roi_placeholder_fig = go.Figure()
roi_placeholder_fig.add_annotation(
text="Click 'Refresh ROI Data' to load ROI graph",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15)
)
combined_roi_graph.value = roi_placeholder_fig
# Function to update the APR graph based on toggle states
def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible):
return update_apr_graph(apr_visible, adjusted_apr_visible)
# Function to update the ROI graph based on toggle states
def update_roi_graph_with_toggles(roi_visible):
return update_roi_graph(roi_visible)
# Function to refresh APR data
def refresh_apr_data():
"""Refresh APR data from the database and update the visualization"""
try:
# Fetch new APR data
logger.info("Manually refreshing APR data...")
fetch_apr_data_from_db()
# Verify data was fetched successfully
if global_df is None or len(global_df) == 0:
logger.error("Failed to fetch APR data")
return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
# Log info about fetched data with focus on adjusted_apr
may_10_2025 = datetime(2025, 5, 10)
if 'timestamp' in global_df and 'adjusted_apr' in global_df:
after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
# Generate new visualization
logger.info("Generating new APR visualization...")
new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
return new_graph, "APR data refreshed successfully"
except Exception as e:
logger.error(f"Error refreshing APR data: {e}")
return combined_apr_graph.value, f"Error: {str(e)}"
# Function to refresh ROI data
def refresh_roi_data():
"""Refresh ROI data from the database and update the visualization"""
try:
# Fetch new ROI data
logger.info("Manually refreshing ROI data...")
fetch_apr_data_from_db() # This also fetches ROI data
# Verify data was fetched successfully
if global_roi_df is None or len(global_roi_df) == 0:
logger.error("Failed to fetch ROI data")
return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details."
# Generate new visualization
logger.info("Generating new ROI visualization...")
new_graph = update_roi_graph(roi_toggle.value)
return new_graph, "ROI data refreshed successfully"
except Exception as e:
logger.error(f"Error refreshing ROI data: {e}")
return combined_roi_graph.value, f"Error: {str(e)}"
# Set up the button click event for APR refresh
refresh_apr_btn.click(
fn=refresh_apr_data,
inputs=[],
outputs=[combined_apr_graph, apr_status_text]
)
# Set up the button click event for ROI refresh
refresh_roi_btn.click(
fn=refresh_roi_data,
inputs=[],
outputs=[combined_roi_graph, roi_status_text]
)
# Set up the toggle switch events for APR
apr_toggle.change(
fn=update_apr_graph_with_toggles,
inputs=[apr_toggle, adjusted_apr_toggle],
outputs=[combined_apr_graph]
)
adjusted_apr_toggle.change(
fn=update_apr_graph_with_toggles,
inputs=[apr_toggle, adjusted_apr_toggle],
outputs=[combined_apr_graph]
)
# Set up the toggle switch events for ROI
roi_toggle.change(
fn=update_roi_graph_with_toggles,
inputs=[roi_toggle],
outputs=[combined_roi_graph]
)
return demo
# Launch the dashboard
if __name__ == "__main__":
dashboard().launch()
def generate_adjusted_apr_report():
"""
Generate a detailed report about adjusted_apr data availability and save it to a file.
Returns the path to the generated report file.
"""
global global_df
if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
logger.warning("No adjusted_apr data available for report generation")
return None
# Create a report file
report_path = "adjusted_apr_report.txt"
with open(report_path, "w") as f:
f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
# Summary statistics
total_records = len(global_df)
records_with_adjusted = global_df['adjusted_apr'].notna().sum()
pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
f.write(f"Total APR records: {total_records}\n")
f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
# First and last data points
if records_with_adjusted > 0:
has_adjusted = global_df[global_df['adjusted_apr'].notna()]
first_date = has_adjusted['timestamp'].min()
last_date = has_adjusted['timestamp'].max()
f.write(f"First adjusted_apr record: {first_date}\n")
f.write(f"Last adjusted_apr record: {last_date}\n")
f.write(f"Date range: {(last_date - first_date).days} days\n\n")
# Agent statistics
f.write("===== AGENT STATISTICS =====\n\n")
# Group by agent
agent_stats = []
for agent_id in global_df['agent_id'].unique():
agent_data = global_df[global_df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
total_agent_records = len(agent_data)
agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
agent_stats.append({
'agent_id': agent_id,
'agent_name': agent_name,
'total_records': total_agent_records,
'with_adjusted': agent_with_adjusted,
'coverage_pct': coverage_pct
})
# Sort by coverage percentage (descending)
agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
# Write agent statistics
for agent in agent_stats:
f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
f.write(f" Records: {agent['total_records']}\n")
f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
# If agent has adjusted data, show date range
agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
if not agent_adjusted.empty:
first = agent_adjusted['timestamp'].min()
last = agent_adjusted['timestamp'].max()
f.write(f" First adjusted_apr: {first}\n")
f.write(f" Last adjusted_apr: {last}\n")
f.write("\n")
# Check for May 10th cutoff issue
f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
may_10_2025 = datetime(2025, 5, 10)
before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
# Calculate coverage before and after
before_total = len(before_cutoff)
before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
after_total = len(after_cutoff)
after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
f.write(f"Before May 10th, 2025:\n")
f.write(f" Records: {before_total}\n")
f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
f.write(f"After May 10th, 2025:\n")
f.write(f" Records: {after_total}\n")
f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
# Check for agents that had data before but not after
if before_total > 0 and after_total > 0:
agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
missing_after = agents_before - agents_after
new_after = agents_after - agents_before
if missing_after:
f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
# For each missing agent, show the last date with adjusted_apr
for agent_id in missing_after:
agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) &
(before_cutoff['adjusted_apr'].notna())]
if not agent_data.empty:
last_date = agent_data['timestamp'].max()
agent_name = agent_data['agent_name'].iloc[0]
f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
if new_after:
f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
logger.info(f"Adjusted APR report generated: {report_path}")
return report_path