Spaces:
Running
Running
File size: 17,277 Bytes
45a247a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 |
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import logging
import json
import os
# Set up logging
logger = logging.getLogger(__name__)
def create_apr_vs_agent_hash_graph(df):
"""
Create a box plot showing APR values distribution for each agent hash version.
Args:
df: DataFrame containing the APR data with agent_hash column
Returns:
A Plotly figure object
"""
if len(df) == 0 or 'agent_hash' not in df.columns:
logger.error("No data or agent_hash column not found to plot APR vs agent hash graph")
fig = go.Figure()
fig.add_annotation(
text="No agent hash data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Filter for APR data only and ensure agent_hash is not null
apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
if len(apr_data) == 0:
logger.error("No valid APR data with agent_hash found")
fig = go.Figure()
fig.add_annotation(
text="No valid APR data with agent_hash found",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Filter out outliers (APR values above 200 or below -200)
outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
# Log the outliers for better debugging
if len(outlier_data) > 0:
excluded_count = len(outlier_data)
logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
# Group outliers by agent for detailed logging
outlier_agents = outlier_data.groupby('agent_name')
for agent_name, agent_outliers in outlier_agents:
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
for idx, row in agent_outliers.iterrows():
logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}")
# Use the filtered data for all subsequent operations
apr_data = apr_data_filtered
# Create Plotly figure
fig = go.Figure()
# Add a zero line that spans the entire width
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=-0.5, x1=10, # Will be adjusted later based on number of boxes
layer="below"
)
# Add background shapes for positive and negative regions
# These will be adjusted later based on the actual x-axis range
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=100, # Use a fixed positive value
x0=-0.5, x1=10, # Will be adjusted later
layer="below"
)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=-100, y1=0, # Use a fixed negative value
x0=-0.5, x1=10, # Will be adjusted later
layer="below"
)
# Group by agent_hash
unique_hashes = apr_data['agent_hash'].unique()
# Map for version labels based on hash endings
version_map = {}
for hash_val in unique_hashes:
if hash_val.endswith("tby"):
version_map[hash_val] = "v0.4.1"
elif hash_val.endswith("vq"):
version_map[hash_val] = "v0.4.2"
else:
# For any other hashes, use the last 6 characters
version_map[hash_val] = f"Hash: {hash_val[-6:]}"
# Sort hashes by version (v0.4.1 first, then v0.4.2)
sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h)
# Colors for different versions
version_colors = {
"v0.4.1": "rgba(31, 119, 180, 0.7)", # Blue
"v0.4.2": "rgba(44, 160, 44, 0.7)", # Green
}
# Default color for other hashes
default_color = "rgba(214, 39, 40, 0.7)" # Red
# Prepare data for box plots and statistics
box_data = []
version_stats = {}
# X-axis positions and labels
x_positions = []
x_labels = []
# Process each hash to create box plot data
for i, agent_hash in enumerate(sorted_hashes):
hash_data = apr_data[apr_data['agent_hash'] == agent_hash]
# Get agent name for this hash (should be the same for all records with this hash)
agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown"
# Get version label
version = version_map[agent_hash]
# Choose color based on version
if version in version_colors:
color = version_colors[version]
else:
color = default_color
# Calculate statistics for this hash
apr_values = hash_data['apr'].tolist()
median_apr = hash_data['apr'].median()
mean_apr = hash_data['apr'].mean()
min_apr = hash_data['apr'].min()
max_apr = hash_data['apr'].max()
count = len(apr_values)
# Store statistics for later use
if version not in version_stats:
version_stats[version] = {
'apr_values': [],
'count': 0,
'hashes': []
}
version_stats[version]['apr_values'].extend(apr_values)
version_stats[version]['count'] += count
version_stats[version]['hashes'].append(agent_hash)
# Create label with version only (no hash)
label = f"{version}"
# Add to x-axis positions and labels
x_positions.append(i)
x_labels.append(label)
# Create hover text with detailed statistics
hover_text = (
f"Version: {version}<br>"
f"Agent: {agent_name}<br>"
f"Hash: {agent_hash}<br>"
f"Median APR: {median_apr:.2f}%<br>"
f"Mean APR: {mean_apr:.2f}%<br>"
f"Min APR: {min_apr:.2f}%<br>"
f"Max APR: {max_apr:.2f}%<br>"
f"Data points: {count}"
)
# Add box plot for this hash
fig.add_trace(
go.Box(
y=apr_values,
x=[i] * len(apr_values), # Position on x-axis
name=label,
boxpoints='outliers', # Show only outlier points instead of all points
jitter=0.1, # Reduced jitter for less horizontal spread
pointpos=0, # Position of points relative to box
marker=dict(
color=color,
size=6, # Smaller point size
opacity=0.7, # Add transparency
line=dict(width=1, color='black')
),
line=dict(
color='black',
width=2 # Thicker line for better visibility
),
fillcolor=color,
hoverinfo='text',
hovertext=hover_text,
showlegend=False,
boxmean=True, # Show mean as a dashed line
whiskerwidth=0.8, # Slightly thinner whiskers
width=0.6 # Wider boxes
)
)
logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points")
# Add text annotation with median value above each box
fig.add_annotation(
x=i,
y=median_apr + 5, # Position above the box
text=f"{median_apr:.1f}%",
showarrow=False,
font=dict(
family="Arial, sans-serif",
size=12,
color="black",
weight="bold"
)
)
# Calculate improvement metrics between versions
if "v0.4.1" in version_stats and "v0.4.2" in version_stats:
v041_values = version_stats["v0.4.1"]["apr_values"]
v042_values = version_stats["v0.4.2"]["apr_values"]
v041_median = pd.Series(v041_values).median()
v042_median = pd.Series(v042_values).median()
improvement = v042_median - v041_median
improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf')
# Determine if the change is positive or negative
is_improvement = improvement > 0
change_color = "green" if is_improvement else "red"
change_text = "improvement" if is_improvement else "decrease"
# Add annotation showing improvement with better styling
fig.add_annotation(
x=(len(sorted_hashes) - 1) / 2, # Center of the x-axis
y=90, # Top of the chart
text=f"<b>Version Comparison:</b> {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2",
showarrow=False,
font=dict(
family="Arial, sans-serif",
size=16,
color=change_color,
weight="bold"
),
bgcolor="rgba(255, 255, 255, 0.9)",
bordercolor=change_color,
borderwidth=2,
borderpad=6,
opacity=0.9
)
# Update the shapes to match the actual x-axis range
num_boxes = len(sorted_hashes)
fig.update_shapes(
dict(x0=-0.5, x1=num_boxes - 0.5),
selector=dict(type='rect')
)
fig.update_shapes(
dict(x0=-0.5, x1=num_boxes - 0.5),
selector=dict(type='line')
)
# Update layout with improved styling
fig.update_layout(
title=dict(
text="APR Values by Agent Version",
font=dict(
family="Arial, sans-serif",
size=24, # Larger title
color="black",
weight="bold"
),
x=0.5, # Center the title
y=0.95 # Position slightly higher
),
xaxis_title=dict(
text="Agent Version",
font=dict(
family="Arial, sans-serif",
size=18, # Larger axis title
color="black",
weight="bold"
)
),
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=700, # Increased height for better visualization
width=900, # Set a fixed width for better proportions
autosize=True, # Still enable auto-sizing for responsiveness
boxmode='group', # Group boxes together
margin=dict(r=50, l=120, t=100, b=100), # Reduced right margin since guide was removed
hovermode="closest",
plot_bgcolor='rgba(250,250,250,0.9)', # Slightly off-white background
paper_bgcolor='white',
font=dict(
family="Arial, sans-serif",
size=14,
color="black"
),
showlegend=False
)
# Add annotations for y-axis regions
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Box plot guide removed as per user request
# Update y-axis with fixed range of -50 to +100 for psychological effect
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use fixed range instead of autoscaling
autorange=False, # Disable autoscaling
range=[-50, 100], # Set fixed range from -50 to +100
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with custom labels
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
tickmode='array',
tickvals=x_positions,
ticktext=x_labels,
tickangle=-45, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
)
try:
# Save the figure
graph_file = "modius_apr_vs_agent_hash_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "modius_apr_vs_agent_hash_graph.png"
try:
fig.write_image(img_file)
logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving image: {e}")
logger.info(f"APR vs agent hash graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
logger.error(f"Error creating APR vs agent hash graph: {e}")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=-0.5, x1=1.5 # Fixed values for error case
)
# Add a note about the error
simple_fig.add_annotation(
text=f"Error creating graph: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return simple_fig
def save_apr_vs_agent_hash_to_csv(df):
"""
Save the APR vs agent hash data to a CSV file.
Args:
df: DataFrame containing the APR data with agent_hash column
Returns:
The path to the saved CSV file, or None if no data was saved
"""
if df.empty or 'agent_hash' not in df.columns:
logger.error("No data or agent_hash column not found to save to CSV")
return None
# Filter for APR data only and ensure agent_hash is not null
apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
if apr_data.empty:
logger.error("No valid APR data with agent_hash found to save to CSV")
return None
# Define the CSV file path
csv_file = "modius_apr_vs_agent_hash.csv"
# Save to CSV
apr_data.to_csv(csv_file, index=False)
logger.info(f"APR vs agent hash data saved to {csv_file}")
return csv_file
def generate_apr_vs_agent_hash_visualizations(df):
"""
Generate APR vs agent hash visualizations.
Args:
df: DataFrame containing the APR data
Returns:
A tuple containing the Plotly figure object and the path to the saved CSV file
"""
if df.empty:
logger.info("No APR data available for agent hash visualization.")
# Create empty visualization with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No APR data available for agent hash visualization",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
# Check if agent_hash column exists
if 'agent_hash' not in df.columns:
logger.error("agent_hash column not found in DataFrame")
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="agent_hash column not found in data",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
# Save to CSV before creating visualization
csv_file = save_apr_vs_agent_hash_to_csv(df)
# Create the visualization
fig = create_apr_vs_agent_hash_graph(df)
return fig, csv_file
|