Spaces:
Running
Running
import pandas as pd | |
import plotly.graph_objects as go | |
import plotly.express as px | |
from datetime import datetime | |
import logging | |
import json | |
import os | |
# Set up logging | |
logger = logging.getLogger(__name__) | |
def create_apr_vs_agent_hash_graph(df): | |
""" | |
Create a box plot showing APR values distribution for each agent hash version. | |
Args: | |
df: DataFrame containing the APR data with agent_hash column | |
Returns: | |
A Plotly figure object | |
""" | |
if len(df) == 0 or 'agent_hash' not in df.columns: | |
logger.error("No data or agent_hash column not found to plot APR vs agent hash graph") | |
fig = go.Figure() | |
fig.add_annotation( | |
text="No agent hash data available", | |
x=0.5, y=0.5, | |
showarrow=False, font=dict(size=20) | |
) | |
return fig | |
# Filter for APR data only and ensure agent_hash is not null | |
apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy() | |
if len(apr_data) == 0: | |
logger.error("No valid APR data with agent_hash found") | |
fig = go.Figure() | |
fig.add_annotation( | |
text="No valid APR data with agent_hash found", | |
x=0.5, y=0.5, | |
showarrow=False, font=dict(size=20) | |
) | |
return fig | |
# Filter out outliers (APR values above 200 or below -200) | |
outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy() | |
apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy() | |
# Log the outliers for better debugging | |
if len(outlier_data) > 0: | |
excluded_count = len(outlier_data) | |
logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)") | |
# Group outliers by agent for detailed logging | |
outlier_agents = outlier_data.groupby('agent_name') | |
for agent_name, agent_outliers in outlier_agents: | |
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:") | |
for idx, row in agent_outliers.iterrows(): | |
logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}") | |
# Use the filtered data for all subsequent operations | |
apr_data = apr_data_filtered | |
# Create Plotly figure | |
fig = go.Figure() | |
# Add a zero line that spans the entire width | |
fig.add_shape( | |
type="line", | |
line=dict(dash="solid", width=1.5, color="black"), | |
y0=0, y1=0, | |
x0=-0.5, x1=10, # Will be adjusted later based on number of boxes | |
layer="below" | |
) | |
# Add background shapes for positive and negative regions | |
# These will be adjusted later based on the actual x-axis range | |
fig.add_shape( | |
type="rect", | |
fillcolor="rgba(230, 243, 255, 0.3)", | |
line=dict(width=0), | |
y0=0, y1=100, # Use a fixed positive value | |
x0=-0.5, x1=10, # Will be adjusted later | |
layer="below" | |
) | |
fig.add_shape( | |
type="rect", | |
fillcolor="rgba(255, 230, 230, 0.3)", | |
line=dict(width=0), | |
y0=-100, y1=0, # Use a fixed negative value | |
x0=-0.5, x1=10, # Will be adjusted later | |
layer="below" | |
) | |
# Group by agent_hash | |
unique_hashes = apr_data['agent_hash'].unique() | |
# Map for version labels based on hash endings | |
version_map = {} | |
for hash_val in unique_hashes: | |
if hash_val.endswith("tby"): | |
version_map[hash_val] = "v0.4.1" | |
elif hash_val.endswith("vq"): | |
version_map[hash_val] = "v0.4.2" | |
else: | |
# For any other hashes, use the last 6 characters | |
version_map[hash_val] = f"Hash: {hash_val[-6:]}" | |
# Sort hashes by version (v0.4.1 first, then v0.4.2) | |
sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h) | |
# Colors for different versions | |
version_colors = { | |
"v0.4.1": "rgba(31, 119, 180, 0.7)", # Blue | |
"v0.4.2": "rgba(44, 160, 44, 0.7)", # Green | |
} | |
# Default color for other hashes | |
default_color = "rgba(214, 39, 40, 0.7)" # Red | |
# Prepare data for box plots and statistics | |
box_data = [] | |
version_stats = {} | |
# X-axis positions and labels | |
x_positions = [] | |
x_labels = [] | |
# Process each hash to create box plot data | |
for i, agent_hash in enumerate(sorted_hashes): | |
hash_data = apr_data[apr_data['agent_hash'] == agent_hash] | |
# Get agent name for this hash (should be the same for all records with this hash) | |
agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown" | |
# Get version label | |
version = version_map[agent_hash] | |
# Choose color based on version | |
if version in version_colors: | |
color = version_colors[version] | |
else: | |
color = default_color | |
# Calculate statistics for this hash | |
apr_values = hash_data['apr'].tolist() | |
median_apr = hash_data['apr'].median() | |
mean_apr = hash_data['apr'].mean() | |
min_apr = hash_data['apr'].min() | |
max_apr = hash_data['apr'].max() | |
count = len(apr_values) | |
# Store statistics for later use | |
if version not in version_stats: | |
version_stats[version] = { | |
'apr_values': [], | |
'count': 0, | |
'hashes': [] | |
} | |
version_stats[version]['apr_values'].extend(apr_values) | |
version_stats[version]['count'] += count | |
version_stats[version]['hashes'].append(agent_hash) | |
# Create label with version only (no hash) | |
label = f"{version}" | |
# Add to x-axis positions and labels | |
x_positions.append(i) | |
x_labels.append(label) | |
# Create hover text with detailed statistics | |
hover_text = ( | |
f"Version: {version}<br>" | |
f"Agent: {agent_name}<br>" | |
f"Hash: {agent_hash}<br>" | |
f"Median APR: {median_apr:.2f}%<br>" | |
f"Mean APR: {mean_apr:.2f}%<br>" | |
f"Min APR: {min_apr:.2f}%<br>" | |
f"Max APR: {max_apr:.2f}%<br>" | |
f"Data points: {count}" | |
) | |
# Add box plot for this hash | |
fig.add_trace( | |
go.Box( | |
y=apr_values, | |
x=[i] * len(apr_values), # Position on x-axis | |
name=label, | |
boxpoints='outliers', # Show only outlier points instead of all points | |
jitter=0.1, # Reduced jitter for less horizontal spread | |
pointpos=0, # Position of points relative to box | |
marker=dict( | |
color=color, | |
size=6, # Smaller point size | |
opacity=0.7, # Add transparency | |
line=dict(width=1, color='black') | |
), | |
line=dict( | |
color='black', | |
width=2 # Thicker line for better visibility | |
), | |
fillcolor=color, | |
hoverinfo='text', | |
hovertext=hover_text, | |
showlegend=False, | |
boxmean=True, # Show mean as a dashed line | |
whiskerwidth=0.8, # Slightly thinner whiskers | |
width=0.6 # Wider boxes | |
) | |
) | |
logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points") | |
# Add text annotation with median value above each box | |
fig.add_annotation( | |
x=i, | |
y=median_apr + 5, # Position above the box | |
text=f"{median_apr:.1f}%", | |
showarrow=False, | |
font=dict( | |
family="Arial, sans-serif", | |
size=12, | |
color="black", | |
weight="bold" | |
) | |
) | |
# Calculate improvement metrics between versions | |
if "v0.4.1" in version_stats and "v0.4.2" in version_stats: | |
v041_values = version_stats["v0.4.1"]["apr_values"] | |
v042_values = version_stats["v0.4.2"]["apr_values"] | |
v041_median = pd.Series(v041_values).median() | |
v042_median = pd.Series(v042_values).median() | |
improvement = v042_median - v041_median | |
improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf') | |
# Determine if the change is positive or negative | |
is_improvement = improvement > 0 | |
change_color = "green" if is_improvement else "red" | |
change_text = "improvement" if is_improvement else "decrease" | |
# Add annotation showing improvement with better styling (black color) | |
fig.add_annotation( | |
x=(len(sorted_hashes) - 1) / 2, # Center of the x-axis | |
y=90, # Top of the chart | |
text=f"<b>Version Comparison:</b> {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2", | |
showarrow=False, | |
font=dict( | |
family="Arial, sans-serif", | |
size=16, | |
color="black", # Changed to black | |
weight="bold" | |
), | |
bgcolor="rgba(255, 255, 255, 0.9)", | |
bordercolor="black", # Changed to black | |
borderwidth=2, | |
borderpad=6, | |
opacity=0.9 | |
) | |
# Update the shapes to match the actual x-axis range | |
num_boxes = len(sorted_hashes) | |
fig.update_shapes( | |
dict(x0=-0.5, x1=num_boxes - 0.5), | |
selector=dict(type='rect') | |
) | |
fig.update_shapes( | |
dict(x0=-0.5, x1=num_boxes - 0.5), | |
selector=dict(type='line') | |
) | |
# Update layout with improved styling | |
fig.update_layout( | |
title=dict( | |
text="Performance Graph", | |
font=dict( | |
family="Arial, sans-serif", | |
size=24, # Larger title | |
color="black", | |
weight="bold" | |
), | |
x=0.5, # Center the title | |
y=0.95 # Position slightly higher | |
), | |
xaxis_title=dict( | |
text="Agent Version", | |
font=dict( | |
family="Arial, sans-serif", | |
size=18, # Larger axis title | |
color="black", | |
weight="bold" | |
) | |
), | |
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead | |
template="plotly_white", | |
height=900, # Increased height for better visualization and more vertical space | |
# width parameter removed to allow full responsiveness | |
autosize=True, # Enable auto-sizing for responsiveness | |
boxmode='group', # Group boxes together | |
margin=dict(r=50, l=120, t=100, b=100), # Reduced right margin since guide was removed | |
hovermode="closest", | |
plot_bgcolor='rgba(250,250,250,0.9)', # Slightly off-white background | |
paper_bgcolor='white', | |
font=dict( | |
family="Arial, sans-serif", | |
size=14, | |
color="black" | |
), | |
showlegend=False | |
) | |
# Add single annotation for y-axis | |
fig.add_annotation( | |
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels | |
y=0, # Center of the y-axis | |
xref="paper", | |
yref="y", | |
text="Agent APR [%]", | |
showarrow=False, | |
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
textangle=-90, # Rotate text to be vertical | |
align="center" | |
) | |
# Box plot guide removed as per user request | |
# Update y-axis with autoscaling | |
fig.update_yaxes( | |
showgrid=True, | |
gridwidth=1, | |
gridcolor='rgba(0,0,0,0.1)', | |
# Use autoscaling | |
autorange=True, # Enable autoscaling | |
tickformat=".2f", # Format tick labels with 2 decimal places | |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size | |
title=None # Remove the built-in axis title since we're using annotations | |
) | |
# Update x-axis with custom labels | |
fig.update_xaxes( | |
showgrid=True, | |
gridwidth=1, | |
gridcolor='rgba(0,0,0,0.1)', | |
tickmode='array', | |
tickvals=x_positions, | |
ticktext=x_labels, | |
tickangle=-45, # Angle the labels for better readability | |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size | |
) | |
try: | |
# Save the figure | |
graph_file = "modius_apr_vs_agent_hash_graph.html" | |
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) | |
# Also save as image for compatibility | |
img_file = "modius_apr_vs_agent_hash_graph.png" | |
try: | |
fig.write_image(img_file) | |
logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}") | |
except Exception as e: | |
logger.error(f"Error saving image: {e}") | |
logger.info(f"APR vs agent hash graph saved to {graph_file} only") | |
# Return the figure object for direct use in Gradio | |
return fig | |
except Exception as e: | |
logger.error(f"Error creating APR vs agent hash graph: {e}") | |
# Create a simpler graph as fallback | |
simple_fig = go.Figure() | |
# Add zero line | |
simple_fig.add_shape( | |
type="line", | |
line=dict(dash="solid", width=1.5, color="black"), | |
y0=0, y1=0, | |
x0=-0.5, x1=1.5 # Fixed values for error case | |
) | |
# Add a note about the error | |
simple_fig.add_annotation( | |
text=f"Error creating graph: {str(e)}", | |
x=0.5, y=0.5, | |
showarrow=False, | |
font=dict(size=15, color="red") | |
) | |
return simple_fig | |
def save_apr_vs_agent_hash_to_csv(df): | |
""" | |
Save the APR vs agent hash data to a CSV file. | |
Args: | |
df: DataFrame containing the APR data with agent_hash column | |
Returns: | |
The path to the saved CSV file, or None if no data was saved | |
""" | |
if df.empty or 'agent_hash' not in df.columns: | |
logger.error("No data or agent_hash column not found to save to CSV") | |
return None | |
# Filter for APR data only and ensure agent_hash is not null | |
apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy() | |
if apr_data.empty: | |
logger.error("No valid APR data with agent_hash found to save to CSV") | |
return None | |
# Define the CSV file path | |
csv_file = "modius_apr_vs_agent_hash.csv" | |
# Save to CSV | |
apr_data.to_csv(csv_file, index=False) | |
logger.info(f"APR vs agent hash data saved to {csv_file}") | |
return csv_file | |
def generate_apr_vs_agent_hash_visualizations(df): | |
""" | |
Generate APR vs agent hash visualizations. | |
Args: | |
df: DataFrame containing the APR data | |
Returns: | |
A tuple containing the Plotly figure object and the path to the saved CSV file | |
""" | |
if df.empty: | |
logger.info("No APR data available for agent hash visualization.") | |
# Create empty visualization with a message using Plotly | |
fig = go.Figure() | |
fig.add_annotation( | |
x=0.5, y=0.5, | |
text="No APR data available for agent hash visualization", | |
font=dict(size=20), | |
showarrow=False | |
) | |
fig.update_layout( | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) | |
) | |
return fig, None | |
# Check if agent_hash column exists | |
if 'agent_hash' not in df.columns: | |
logger.error("agent_hash column not found in DataFrame") | |
fig = go.Figure() | |
fig.add_annotation( | |
x=0.5, y=0.5, | |
text="agent_hash column not found in data", | |
font=dict(size=20), | |
showarrow=False | |
) | |
fig.update_layout( | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) | |
) | |
return fig, None | |
# Save to CSV before creating visualization | |
csv_file = save_apr_vs_agent_hash_to_csv(df) | |
# Create the visualization | |
fig = create_apr_vs_agent_hash_graph(df) | |
return fig, csv_file | |