import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import logging
import json
import os
# Set up logging
logger = logging.getLogger(__name__)
def create_apr_vs_agent_hash_graph(df):
"""
Create a box plot showing APR values distribution for each agent hash version.
Args:
df: DataFrame containing the APR data with agent_hash column
Returns:
A Plotly figure object
"""
if len(df) == 0 or 'agent_hash' not in df.columns:
logger.error("No data or agent_hash column not found to plot APR vs agent hash graph")
fig = go.Figure()
fig.add_annotation(
text="No agent hash data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Filter for APR data only and ensure agent_hash is not null
apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
if len(apr_data) == 0:
logger.error("No valid APR data with agent_hash found")
fig = go.Figure()
fig.add_annotation(
text="No valid APR data with agent_hash found",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Filter out outliers (APR values above 200 or below -200)
outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
# Log the outliers for better debugging
if len(outlier_data) > 0:
excluded_count = len(outlier_data)
logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
# Group outliers by agent for detailed logging
outlier_agents = outlier_data.groupby('agent_name')
for agent_name, agent_outliers in outlier_agents:
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
for idx, row in agent_outliers.iterrows():
logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}")
# Use the filtered data for all subsequent operations
apr_data = apr_data_filtered
# Create Plotly figure
fig = go.Figure()
# Add a zero line that spans the entire width
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=-0.5, x1=10, # Will be adjusted later based on number of boxes
layer="below"
)
# Add background shapes for positive and negative regions
# These will be adjusted later based on the actual x-axis range
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=100, # Use a fixed positive value
x0=-0.5, x1=10, # Will be adjusted later
layer="below"
)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=-100, y1=0, # Use a fixed negative value
x0=-0.5, x1=10, # Will be adjusted later
layer="below"
)
# Group by agent_hash
unique_hashes = apr_data['agent_hash'].unique()
# Map for version labels based on hash endings
version_map = {}
for hash_val in unique_hashes:
if hash_val.endswith("tby"):
version_map[hash_val] = "v0.4.1"
elif hash_val.endswith("vq"):
version_map[hash_val] = "v0.4.2"
else:
# For any other hashes, use the last 6 characters
version_map[hash_val] = f"Hash: {hash_val[-6:]}"
# Sort hashes by version (v0.4.1 first, then v0.4.2)
sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h)
# Colors for different versions
version_colors = {
"v0.4.1": "rgba(31, 119, 180, 0.7)", # Blue
"v0.4.2": "rgba(44, 160, 44, 0.7)", # Green
}
# Default color for other hashes
default_color = "rgba(214, 39, 40, 0.7)" # Red
# Prepare data for box plots and statistics
box_data = []
version_stats = {}
# X-axis positions and labels
x_positions = []
x_labels = []
# Process each hash to create box plot data
for i, agent_hash in enumerate(sorted_hashes):
hash_data = apr_data[apr_data['agent_hash'] == agent_hash]
# Get agent name for this hash (should be the same for all records with this hash)
agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown"
# Get version label
version = version_map[agent_hash]
# Choose color based on version
if version in version_colors:
color = version_colors[version]
else:
color = default_color
# Calculate statistics for this hash
apr_values = hash_data['apr'].tolist()
median_apr = hash_data['apr'].median()
mean_apr = hash_data['apr'].mean()
min_apr = hash_data['apr'].min()
max_apr = hash_data['apr'].max()
count = len(apr_values)
# Store statistics for later use
if version not in version_stats:
version_stats[version] = {
'apr_values': [],
'count': 0,
'hashes': []
}
version_stats[version]['apr_values'].extend(apr_values)
version_stats[version]['count'] += count
version_stats[version]['hashes'].append(agent_hash)
# Create label with version only (no hash)
label = f"{version}"
# Add to x-axis positions and labels
x_positions.append(i)
x_labels.append(label)
# Create hover text with detailed statistics
hover_text = (
f"Version: {version}
"
f"Agent: {agent_name}
"
f"Hash: {agent_hash}
"
f"Median APR: {median_apr:.2f}%
"
f"Mean APR: {mean_apr:.2f}%
"
f"Min APR: {min_apr:.2f}%
"
f"Max APR: {max_apr:.2f}%
"
f"Data points: {count}"
)
# Add box plot for this hash
fig.add_trace(
go.Box(
y=apr_values,
x=[i] * len(apr_values), # Position on x-axis
name=label,
boxpoints='outliers', # Show only outlier points instead of all points
jitter=0.1, # Reduced jitter for less horizontal spread
pointpos=0, # Position of points relative to box
marker=dict(
color=color,
size=6, # Smaller point size
opacity=0.7, # Add transparency
line=dict(width=1, color='black')
),
line=dict(
color='black',
width=2 # Thicker line for better visibility
),
fillcolor=color,
hoverinfo='text',
hovertext=hover_text,
showlegend=False,
boxmean=True, # Show mean as a dashed line
whiskerwidth=0.8, # Slightly thinner whiskers
width=0.6 # Wider boxes
)
)
logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points")
# Add text annotation with median value above each box
fig.add_annotation(
x=i,
y=median_apr + 5, # Position above the box
text=f"{median_apr:.1f}%",
showarrow=False,
font=dict(
family="Arial, sans-serif",
size=12,
color="black",
weight="bold"
)
)
# Calculate improvement metrics between versions
if "v0.4.1" in version_stats and "v0.4.2" in version_stats:
v041_values = version_stats["v0.4.1"]["apr_values"]
v042_values = version_stats["v0.4.2"]["apr_values"]
v041_median = pd.Series(v041_values).median()
v042_median = pd.Series(v042_values).median()
improvement = v042_median - v041_median
improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf')
# Determine if the change is positive or negative
is_improvement = improvement > 0
change_color = "green" if is_improvement else "red"
change_text = "improvement" if is_improvement else "decrease"
# Add annotation showing improvement with better styling (black color)
fig.add_annotation(
x=(len(sorted_hashes) - 1) / 2, # Center of the x-axis
y=90, # Top of the chart
text=f"Version Comparison: {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2",
showarrow=False,
font=dict(
family="Arial, sans-serif",
size=16,
color="black", # Changed to black
weight="bold"
),
bgcolor="rgba(255, 255, 255, 0.9)",
bordercolor="black", # Changed to black
borderwidth=2,
borderpad=6,
opacity=0.9
)
# Update the shapes to match the actual x-axis range
num_boxes = len(sorted_hashes)
fig.update_shapes(
dict(x0=-0.5, x1=num_boxes - 0.5),
selector=dict(type='rect')
)
fig.update_shapes(
dict(x0=-0.5, x1=num_boxes - 0.5),
selector=dict(type='line')
)
# Update layout with improved styling
fig.update_layout(
title=dict(
text="Performance Graph",
font=dict(
family="Arial, sans-serif",
size=24, # Larger title
color="black",
weight="bold"
),
x=0.5, # Center the title
y=0.95 # Position slightly higher
),
xaxis_title=dict(
text="Agent Version",
font=dict(
family="Arial, sans-serif",
size=18, # Larger axis title
color="black",
weight="bold"
)
),
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=900, # Increased height for better visualization and more vertical space
# width parameter removed to allow full responsiveness
autosize=True, # Enable auto-sizing for responsiveness
boxmode='group', # Group boxes together
margin=dict(r=50, l=120, t=100, b=100), # Reduced right margin since guide was removed
hovermode="closest",
plot_bgcolor='rgba(250,250,250,0.9)', # Slightly off-white background
paper_bgcolor='white',
font=dict(
family="Arial, sans-serif",
size=14,
color="black"
),
showlegend=False
)
# Add single annotation for y-axis
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=0, # Center of the y-axis
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Box plot guide removed as per user request
# Update y-axis with autoscaling
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use autoscaling
autorange=True, # Enable autoscaling
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with custom labels
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
tickmode='array',
tickvals=x_positions,
ticktext=x_labels,
tickangle=-45, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
)
try:
# Save the figure
graph_file = "modius_apr_vs_agent_hash_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "modius_apr_vs_agent_hash_graph.png"
try:
fig.write_image(img_file)
logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving image: {e}")
logger.info(f"APR vs agent hash graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
logger.error(f"Error creating APR vs agent hash graph: {e}")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=-0.5, x1=1.5 # Fixed values for error case
)
# Add a note about the error
simple_fig.add_annotation(
text=f"Error creating graph: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return simple_fig
def save_apr_vs_agent_hash_to_csv(df):
"""
Save the APR vs agent hash data to a CSV file.
Args:
df: DataFrame containing the APR data with agent_hash column
Returns:
The path to the saved CSV file, or None if no data was saved
"""
if df.empty or 'agent_hash' not in df.columns:
logger.error("No data or agent_hash column not found to save to CSV")
return None
# Filter for APR data only and ensure agent_hash is not null
apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
if apr_data.empty:
logger.error("No valid APR data with agent_hash found to save to CSV")
return None
# Define the CSV file path
csv_file = "modius_apr_vs_agent_hash.csv"
# Save to CSV
apr_data.to_csv(csv_file, index=False)
logger.info(f"APR vs agent hash data saved to {csv_file}")
return csv_file
def generate_apr_vs_agent_hash_visualizations(df):
"""
Generate APR vs agent hash visualizations.
Args:
df: DataFrame containing the APR data
Returns:
A tuple containing the Plotly figure object and the path to the saved CSV file
"""
if df.empty:
logger.info("No APR data available for agent hash visualization.")
# Create empty visualization with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No APR data available for agent hash visualization",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
# Check if agent_hash column exists
if 'agent_hash' not in df.columns:
logger.error("agent_hash column not found in DataFrame")
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="agent_hash column not found in data",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
# Save to CSV before creating visualization
csv_file = save_apr_vs_agent_hash_to_csv(df)
# Create the visualization
fig = create_apr_vs_agent_hash_graph(df)
return fig, csv_file