import pandas as pd import plotly.graph_objects as go import plotly.express as px from datetime import datetime import logging import json import os # Set up logging logger = logging.getLogger(__name__) def create_apr_vs_agent_hash_graph(df): """ Create a box plot showing APR values distribution for each agent hash version. Args: df: DataFrame containing the APR data with agent_hash column Returns: A Plotly figure object """ if len(df) == 0 or 'agent_hash' not in df.columns: logger.error("No data or agent_hash column not found to plot APR vs agent hash graph") fig = go.Figure() fig.add_annotation( text="No agent hash data available", x=0.5, y=0.5, showarrow=False, font=dict(size=20) ) return fig # Filter for APR data only and ensure agent_hash is not null apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy() if len(apr_data) == 0: logger.error("No valid APR data with agent_hash found") fig = go.Figure() fig.add_annotation( text="No valid APR data with agent_hash found", x=0.5, y=0.5, showarrow=False, font=dict(size=20) ) return fig # Filter out outliers (APR values above 200 or below -200) outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy() apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy() # Log the outliers for better debugging if len(outlier_data) > 0: excluded_count = len(outlier_data) logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)") # Group outliers by agent for detailed logging outlier_agents = outlier_data.groupby('agent_name') for agent_name, agent_outliers in outlier_agents: logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:") for idx, row in agent_outliers.iterrows(): logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}") # Use the filtered data for all subsequent operations apr_data = apr_data_filtered # Create Plotly figure fig = go.Figure() # Add a zero line that spans the entire width fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=-0.5, x1=10, # Will be adjusted later based on number of boxes layer="below" ) # Add background shapes for positive and negative regions # These will be adjusted later based on the actual x-axis range fig.add_shape( type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), y0=0, y1=100, # Use a fixed positive value x0=-0.5, x1=10, # Will be adjusted later layer="below" ) fig.add_shape( type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), y0=-100, y1=0, # Use a fixed negative value x0=-0.5, x1=10, # Will be adjusted later layer="below" ) # Group by agent_hash unique_hashes = apr_data['agent_hash'].unique() # Map for version labels based on hash endings version_map = {} for hash_val in unique_hashes: if hash_val.endswith("tby"): version_map[hash_val] = "v0.4.1" elif hash_val.endswith("vq"): version_map[hash_val] = "v0.4.2" else: # For any other hashes, use the last 6 characters version_map[hash_val] = f"Hash: {hash_val[-6:]}" # Sort hashes by version (v0.4.1 first, then v0.4.2) sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h) # Colors for different versions version_colors = { "v0.4.1": "rgba(31, 119, 180, 0.7)", # Blue "v0.4.2": "rgba(44, 160, 44, 0.7)", # Green } # Default color for other hashes default_color = "rgba(214, 39, 40, 0.7)" # Red # Prepare data for box plots and statistics box_data = [] version_stats = {} # X-axis positions and labels x_positions = [] x_labels = [] # Process each hash to create box plot data for i, agent_hash in enumerate(sorted_hashes): hash_data = apr_data[apr_data['agent_hash'] == agent_hash] # Get agent name for this hash (should be the same for all records with this hash) agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown" # Get version label version = version_map[agent_hash] # Choose color based on version if version in version_colors: color = version_colors[version] else: color = default_color # Calculate statistics for this hash apr_values = hash_data['apr'].tolist() median_apr = hash_data['apr'].median() mean_apr = hash_data['apr'].mean() min_apr = hash_data['apr'].min() max_apr = hash_data['apr'].max() count = len(apr_values) # Store statistics for later use if version not in version_stats: version_stats[version] = { 'apr_values': [], 'count': 0, 'hashes': [] } version_stats[version]['apr_values'].extend(apr_values) version_stats[version]['count'] += count version_stats[version]['hashes'].append(agent_hash) # Create label with version only (no hash) label = f"{version}" # Add to x-axis positions and labels x_positions.append(i) x_labels.append(label) # Create hover text with detailed statistics hover_text = ( f"Version: {version}
" f"Agent: {agent_name}
" f"Hash: {agent_hash}
" f"Median APR: {median_apr:.2f}%
" f"Mean APR: {mean_apr:.2f}%
" f"Min APR: {min_apr:.2f}%
" f"Max APR: {max_apr:.2f}%
" f"Data points: {count}" ) # Add box plot for this hash fig.add_trace( go.Box( y=apr_values, x=[i] * len(apr_values), # Position on x-axis name=label, boxpoints='outliers', # Show only outlier points instead of all points jitter=0.1, # Reduced jitter for less horizontal spread pointpos=0, # Position of points relative to box marker=dict( color=color, size=6, # Smaller point size opacity=0.7, # Add transparency line=dict(width=1, color='black') ), line=dict( color='black', width=2 # Thicker line for better visibility ), fillcolor=color, hoverinfo='text', hovertext=hover_text, showlegend=False, boxmean=True, # Show mean as a dashed line whiskerwidth=0.8, # Slightly thinner whiskers width=0.6 # Wider boxes ) ) logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points") # Add text annotation with median value above each box fig.add_annotation( x=i, y=median_apr + 5, # Position above the box text=f"{median_apr:.1f}%", showarrow=False, font=dict( family="Arial, sans-serif", size=12, color="black", weight="bold" ) ) # Calculate improvement metrics between versions if "v0.4.1" in version_stats and "v0.4.2" in version_stats: v041_values = version_stats["v0.4.1"]["apr_values"] v042_values = version_stats["v0.4.2"]["apr_values"] v041_median = pd.Series(v041_values).median() v042_median = pd.Series(v042_values).median() improvement = v042_median - v041_median improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf') # Determine if the change is positive or negative is_improvement = improvement > 0 change_color = "green" if is_improvement else "red" change_text = "improvement" if is_improvement else "decrease" # Add annotation showing improvement with better styling (black color) fig.add_annotation( x=(len(sorted_hashes) - 1) / 2, # Center of the x-axis y=90, # Top of the chart text=f"Version Comparison: {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2", showarrow=False, font=dict( family="Arial, sans-serif", size=16, color="black", # Changed to black weight="bold" ), bgcolor="rgba(255, 255, 255, 0.9)", bordercolor="black", # Changed to black borderwidth=2, borderpad=6, opacity=0.9 ) # Update the shapes to match the actual x-axis range num_boxes = len(sorted_hashes) fig.update_shapes( dict(x0=-0.5, x1=num_boxes - 0.5), selector=dict(type='rect') ) fig.update_shapes( dict(x0=-0.5, x1=num_boxes - 0.5), selector=dict(type='line') ) # Update layout with improved styling fig.update_layout( title=dict( text="Performance Graph", font=dict( family="Arial, sans-serif", size=24, # Larger title color="black", weight="bold" ), x=0.5, # Center the title y=0.95 # Position slightly higher ), xaxis_title=dict( text="Agent Version", font=dict( family="Arial, sans-serif", size=18, # Larger axis title color="black", weight="bold" ) ), yaxis_title=None, # Remove the y-axis title as we'll use annotations instead template="plotly_white", height=900, # Increased height for better visualization and more vertical space # width parameter removed to allow full responsiveness autosize=True, # Enable auto-sizing for responsiveness boxmode='group', # Group boxes together margin=dict(r=50, l=120, t=100, b=100), # Reduced right margin since guide was removed hovermode="closest", plot_bgcolor='rgba(250,250,250,0.9)', # Slightly off-white background paper_bgcolor='white', font=dict( family="Arial, sans-serif", size=14, color="black" ), showlegend=False ) # Add single annotation for y-axis fig.add_annotation( x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels y=0, # Center of the y-axis xref="paper", yref="y", text="Agent APR [%]", showarrow=False, font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size textangle=-90, # Rotate text to be vertical align="center" ) # Box plot guide removed as per user request # Update y-axis with autoscaling fig.update_yaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', # Use autoscaling autorange=True, # Enable autoscaling tickformat=".2f", # Format tick labels with 2 decimal places tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size title=None # Remove the built-in axis title since we're using annotations ) # Update x-axis with custom labels fig.update_xaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', tickmode='array', tickvals=x_positions, ticktext=x_labels, tickangle=-45, # Angle the labels for better readability tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size ) try: # Save the figure graph_file = "modius_apr_vs_agent_hash_graph.html" fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) # Also save as image for compatibility img_file = "modius_apr_vs_agent_hash_graph.png" try: fig.write_image(img_file) logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}") except Exception as e: logger.error(f"Error saving image: {e}") logger.info(f"APR vs agent hash graph saved to {graph_file} only") # Return the figure object for direct use in Gradio return fig except Exception as e: logger.error(f"Error creating APR vs agent hash graph: {e}") # Create a simpler graph as fallback simple_fig = go.Figure() # Add zero line simple_fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=-0.5, x1=1.5 # Fixed values for error case ) # Add a note about the error simple_fig.add_annotation( text=f"Error creating graph: {str(e)}", x=0.5, y=0.5, showarrow=False, font=dict(size=15, color="red") ) return simple_fig def save_apr_vs_agent_hash_to_csv(df): """ Save the APR vs agent hash data to a CSV file. Args: df: DataFrame containing the APR data with agent_hash column Returns: The path to the saved CSV file, or None if no data was saved """ if df.empty or 'agent_hash' not in df.columns: logger.error("No data or agent_hash column not found to save to CSV") return None # Filter for APR data only and ensure agent_hash is not null apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy() if apr_data.empty: logger.error("No valid APR data with agent_hash found to save to CSV") return None # Define the CSV file path csv_file = "modius_apr_vs_agent_hash.csv" # Save to CSV apr_data.to_csv(csv_file, index=False) logger.info(f"APR vs agent hash data saved to {csv_file}") return csv_file def generate_apr_vs_agent_hash_visualizations(df): """ Generate APR vs agent hash visualizations. Args: df: DataFrame containing the APR data Returns: A tuple containing the Plotly figure object and the path to the saved CSV file """ if df.empty: logger.info("No APR data available for agent hash visualization.") # Create empty visualization with a message using Plotly fig = go.Figure() fig.add_annotation( x=0.5, y=0.5, text="No APR data available for agent hash visualization", font=dict(size=20), showarrow=False ) fig.update_layout( xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ) return fig, None # Check if agent_hash column exists if 'agent_hash' not in df.columns: logger.error("agent_hash column not found in DataFrame") fig = go.Figure() fig.add_annotation( x=0.5, y=0.5, text="agent_hash column not found in data", font=dict(size=20), showarrow=False ) fig.update_layout( xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ) return fig, None # Save to CSV before creating visualization csv_file = save_apr_vs_agent_hash_to_csv(df) # Create the visualization fig = create_apr_vs_agent_hash_graph(df) return fig, csv_file