Spaces:

valory
/

Modius-Agent-Performance

Running

Modius-Agent-Performance / apr_vs_agent_hash.py

gauravlochab

feat: enhance responsiveness and styling of APR vs Agent Hash graph

e165379 9 days ago

16.7 kB

	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	from datetime import datetime
	import logging
	import json
	import os

	# Set up logging
	logger = logging.getLogger(__name__)

	def create_apr_vs_agent_hash_graph(df):
	"""
	Create a box plot showing APR values distribution for each agent hash version.

	Args:
	df: DataFrame containing the APR data with agent_hash column

	Returns:
	A Plotly figure object
	"""
	if len(df) == 0 or 'agent_hash' not in df.columns:
	logger.error("No data or agent_hash column not found to plot APR vs agent hash graph")
	fig = go.Figure()
	fig.add_annotation(
	text="No agent hash data available",
	x=0.5, y=0.5,
	showarrow=False, font=dict(size=20)
	)
	return fig

	# Filter for APR data only and ensure agent_hash is not null
	apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()

	if len(apr_data) == 0:
	logger.error("No valid APR data with agent_hash found")
	fig = go.Figure()
	fig.add_annotation(
	text="No valid APR data with agent_hash found",
	x=0.5, y=0.5,
	showarrow=False, font=dict(size=20)
	)
	return fig

	# Filter out outliers (APR values above 200 or below -200)
	outlier_data = apr_data[(apr_data['apr'] > 200) \| (apr_data['apr'] < -200)].copy()
	apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()

	# Log the outliers for better debugging
	if len(outlier_data) > 0:
	excluded_count = len(outlier_data)
	logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")

	# Group outliers by agent for detailed logging
	outlier_agents = outlier_data.groupby('agent_name')
	for agent_name, agent_outliers in outlier_agents:
	logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
	for idx, row in agent_outliers.iterrows():
	logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}")

	# Use the filtered data for all subsequent operations
	apr_data = apr_data_filtered

	# Create Plotly figure
	fig = go.Figure()

	# Add a zero line that spans the entire width
	fig.add_shape(
	type="line",
	line=dict(dash="solid", width=1.5, color="black"),
	y0=0, y1=0,
	x0=-0.5, x1=10, # Will be adjusted later based on number of boxes
	layer="below"
	)

	# Add background shapes for positive and negative regions
	# These will be adjusted later based on the actual x-axis range
	fig.add_shape(
	type="rect",
	fillcolor="rgba(230, 243, 255, 0.3)",
	line=dict(width=0),
	y0=0, y1=100, # Use a fixed positive value
	x0=-0.5, x1=10, # Will be adjusted later
	layer="below"
	)

	fig.add_shape(
	type="rect",
	fillcolor="rgba(255, 230, 230, 0.3)",
	line=dict(width=0),
	y0=-100, y1=0, # Use a fixed negative value
	x0=-0.5, x1=10, # Will be adjusted later
	layer="below"
	)

	# Group by agent_hash
	unique_hashes = apr_data['agent_hash'].unique()

	# Map for version labels based on hash endings
	version_map = {}
	for hash_val in unique_hashes:
	if hash_val.endswith("tby"):
	version_map[hash_val] = "v0.4.1"
	elif hash_val.endswith("vq"):
	version_map[hash_val] = "v0.4.2"
	else:
	# For any other hashes, use the last 6 characters
	version_map[hash_val] = f"Hash: {hash_val[-6:]}"

	# Sort hashes by version (v0.4.1 first, then v0.4.2)
	sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h)

	# Colors for different versions
	version_colors = {
	"v0.4.1": "rgba(31, 119, 180, 0.7)", # Blue
	"v0.4.2": "rgba(44, 160, 44, 0.7)", # Green
	}

	# Default color for other hashes
	default_color = "rgba(214, 39, 40, 0.7)" # Red

	# Prepare data for box plots and statistics
	box_data = []
	version_stats = {}

	# X-axis positions and labels
	x_positions = []
	x_labels = []

	# Process each hash to create box plot data
	for i, agent_hash in enumerate(sorted_hashes):
	hash_data = apr_data[apr_data['agent_hash'] == agent_hash]

	# Get agent name for this hash (should be the same for all records with this hash)
	agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown"

	# Get version label
	version = version_map[agent_hash]

	# Choose color based on version
	if version in version_colors:
	color = version_colors[version]
	else:
	color = default_color

	# Calculate statistics for this hash
	apr_values = hash_data['apr'].tolist()
	median_apr = hash_data['apr'].median()
	mean_apr = hash_data['apr'].mean()
	min_apr = hash_data['apr'].min()
	max_apr = hash_data['apr'].max()
	count = len(apr_values)

	# Store statistics for later use
	if version not in version_stats:
	version_stats[version] = {
	'apr_values': [],
	'count': 0,
	'hashes': []
	}

	version_stats[version]['apr_values'].extend(apr_values)
	version_stats[version]['count'] += count
	version_stats[version]['hashes'].append(agent_hash)

	# Create label with version only (no hash)
	label = f"{version}"

	# Add to x-axis positions and labels
	x_positions.append(i)
	x_labels.append(label)

	# Create hover text with detailed statistics
	hover_text = (
	f"Version: {version}<br>"
	f"Agent: {agent_name}<br>"
	f"Hash: {agent_hash}<br>"
	f"Median APR: {median_apr:.2f}%<br>"
	f"Mean APR: {mean_apr:.2f}%<br>"
	f"Min APR: {min_apr:.2f}%<br>"
	f"Max APR: {max_apr:.2f}%<br>"
	f"Data points: {count}"
	)

	# Add box plot for this hash
	fig.add_trace(
	go.Box(
	y=apr_values,
	x=[i] * len(apr_values), # Position on x-axis
	name=label,
	boxpoints='outliers', # Show only outlier points instead of all points
	jitter=0.1, # Reduced jitter for less horizontal spread
	pointpos=0, # Position of points relative to box
	marker=dict(
	color=color,
	size=6, # Smaller point size
	opacity=0.7, # Add transparency
	line=dict(width=1, color='black')
	),
	line=dict(
	color='black',
	width=2 # Thicker line for better visibility
	),
	fillcolor=color,
	hoverinfo='text',
	hovertext=hover_text,
	showlegend=False,
	boxmean=True, # Show mean as a dashed line
	whiskerwidth=0.8, # Slightly thinner whiskers
	width=0.6 # Wider boxes
	)
	)

	logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points")

	# Add text annotation with median value above each box
	fig.add_annotation(
	x=i,
	y=median_apr + 5, # Position above the box
	text=f"{median_apr:.1f}%",
	showarrow=False,
	font=dict(
	family="Arial, sans-serif",
	size=12,
	color="black",
	weight="bold"
	)
	)

	# Calculate improvement metrics between versions
	if "v0.4.1" in version_stats and "v0.4.2" in version_stats:
	v041_values = version_stats["v0.4.1"]["apr_values"]
	v042_values = version_stats["v0.4.2"]["apr_values"]

	v041_median = pd.Series(v041_values).median()
	v042_median = pd.Series(v042_values).median()

	improvement = v042_median - v041_median
	improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf')

	# Determine if the change is positive or negative
	is_improvement = improvement > 0
	change_color = "green" if is_improvement else "red"
	change_text = "improvement" if is_improvement else "decrease"

	# Add annotation showing improvement with better styling (black color)
	fig.add_annotation(
	x=(len(sorted_hashes) - 1) / 2, # Center of the x-axis
	y=90, # Top of the chart
	text=f"<b>Version Comparison:</b> {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2",
	showarrow=False,
	font=dict(
	family="Arial, sans-serif",
	size=16,
	color="black", # Changed to black
	weight="bold"
	),
	bgcolor="rgba(255, 255, 255, 0.9)",
	bordercolor="black", # Changed to black
	borderwidth=2,
	borderpad=6,
	opacity=0.9
	)

	# Update the shapes to match the actual x-axis range
	num_boxes = len(sorted_hashes)
	fig.update_shapes(
	dict(x0=-0.5, x1=num_boxes - 0.5),
	selector=dict(type='rect')
	)
	fig.update_shapes(
	dict(x0=-0.5, x1=num_boxes - 0.5),
	selector=dict(type='line')
	)

	# Update layout with improved styling
	fig.update_layout(
	title=dict(
	text="Performance Graph",
	font=dict(
	family="Arial, sans-serif",
	size=24, # Larger title
	color="black",
	weight="bold"
	),
	x=0.5, # Center the title
	y=0.95 # Position slightly higher
	),
	xaxis_title=dict(
	text="Agent Version",
	font=dict(
	family="Arial, sans-serif",
	size=18, # Larger axis title
	color="black",
	weight="bold"
	)
	),
	yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
	template="plotly_white",
	height=900, # Increased height for better visualization and more vertical space
	# width parameter removed to allow full responsiveness
	autosize=True, # Enable auto-sizing for responsiveness
	boxmode='group', # Group boxes together
	margin=dict(r=50, l=120, t=100, b=100), # Reduced right margin since guide was removed
	hovermode="closest",
	plot_bgcolor='rgba(250,250,250,0.9)', # Slightly off-white background
	paper_bgcolor='white',
	font=dict(
	family="Arial, sans-serif",
	size=14,
	color="black"
	),
	showlegend=False
	)

	# Add single annotation for y-axis
	fig.add_annotation(
	x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
	y=0, # Center of the y-axis
	xref="paper",
	yref="y",
	text="Agent APR [%]",
	showarrow=False,
	font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
	textangle=-90, # Rotate text to be vertical
	align="center"
	)

	# Box plot guide removed as per user request

	# Update y-axis with autoscaling
	fig.update_yaxes(
	showgrid=True,
	gridwidth=1,
	gridcolor='rgba(0,0,0,0.1)',
	# Use autoscaling
	autorange=True, # Enable autoscaling
	tickformat=".2f", # Format tick labels with 2 decimal places
	tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
	title=None # Remove the built-in axis title since we're using annotations
	)

	# Update x-axis with custom labels
	fig.update_xaxes(
	showgrid=True,
	gridwidth=1,
	gridcolor='rgba(0,0,0,0.1)',
	tickmode='array',
	tickvals=x_positions,
	ticktext=x_labels,
	tickangle=-45, # Angle the labels for better readability
	tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
	)

	try:
	# Save the figure
	graph_file = "modius_apr_vs_agent_hash_graph.html"
	fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)

	# Also save as image for compatibility
	img_file = "modius_apr_vs_agent_hash_graph.png"
	try:
	fig.write_image(img_file)
	logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}")
	except Exception as e:
	logger.error(f"Error saving image: {e}")
	logger.info(f"APR vs agent hash graph saved to {graph_file} only")

	# Return the figure object for direct use in Gradio
	return fig
	except Exception as e:
	logger.error(f"Error creating APR vs agent hash graph: {e}")

	# Create a simpler graph as fallback
	simple_fig = go.Figure()

	# Add zero line
	simple_fig.add_shape(
	type="line",
	line=dict(dash="solid", width=1.5, color="black"),
	y0=0, y1=0,
	x0=-0.5, x1=1.5 # Fixed values for error case
	)

	# Add a note about the error
	simple_fig.add_annotation(
	text=f"Error creating graph: {str(e)}",
	x=0.5, y=0.5,
	showarrow=False,
	font=dict(size=15, color="red")
	)

	return simple_fig

	def save_apr_vs_agent_hash_to_csv(df):
	"""
	Save the APR vs agent hash data to a CSV file.

	Args:
	df: DataFrame containing the APR data with agent_hash column

	Returns:
	The path to the saved CSV file, or None if no data was saved
	"""
	if df.empty or 'agent_hash' not in df.columns:
	logger.error("No data or agent_hash column not found to save to CSV")
	return None

	# Filter for APR data only and ensure agent_hash is not null
	apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()

	if apr_data.empty:
	logger.error("No valid APR data with agent_hash found to save to CSV")
	return None

	# Define the CSV file path
	csv_file = "modius_apr_vs_agent_hash.csv"

	# Save to CSV
	apr_data.to_csv(csv_file, index=False)
	logger.info(f"APR vs agent hash data saved to {csv_file}")

	return csv_file

	def generate_apr_vs_agent_hash_visualizations(df):
	"""
	Generate APR vs agent hash visualizations.

	Args:
	df: DataFrame containing the APR data

	Returns:
	A tuple containing the Plotly figure object and the path to the saved CSV file
	"""
	if df.empty:
	logger.info("No APR data available for agent hash visualization.")
	# Create empty visualization with a message using Plotly
	fig = go.Figure()
	fig.add_annotation(
	x=0.5, y=0.5,
	text="No APR data available for agent hash visualization",
	font=dict(size=20),
	showarrow=False
	)
	fig.update_layout(
	xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
	)

	return fig, None

	# Check if agent_hash column exists
	if 'agent_hash' not in df.columns:
	logger.error("agent_hash column not found in DataFrame")
	fig = go.Figure()
	fig.add_annotation(
	x=0.5, y=0.5,
	text="agent_hash column not found in data",
	font=dict(size=20),
	showarrow=False
	)
	fig.update_layout(
	xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
	)

	return fig, None

	# Save to CSV before creating visualization
	csv_file = save_apr_vs_agent_hash_to_csv(df)

	# Create the visualization
	fig = create_apr_vs_agent_hash_graph(df)

	return fig, csv_file