File size: 17,277 Bytes
45a247a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import logging
import json
import os

# Set up logging
logger = logging.getLogger(__name__)

def create_apr_vs_agent_hash_graph(df):
    """
    Create a box plot showing APR values distribution for each agent hash version.
    
    Args:
        df: DataFrame containing the APR data with agent_hash column
    
    Returns:
        A Plotly figure object
    """
    if len(df) == 0 or 'agent_hash' not in df.columns:
        logger.error("No data or agent_hash column not found to plot APR vs agent hash graph")
        fig = go.Figure()
        fig.add_annotation(
            text="No agent hash data available",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # Filter for APR data only and ensure agent_hash is not null
    apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
    
    if len(apr_data) == 0:
        logger.error("No valid APR data with agent_hash found")
        fig = go.Figure()
        fig.add_annotation(
            text="No valid APR data with agent_hash found",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # Filter out outliers (APR values above 200 or below -200)
    outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
    apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
    
    # Log the outliers for better debugging
    if len(outlier_data) > 0:
        excluded_count = len(outlier_data)
        logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
        
        # Group outliers by agent for detailed logging
        outlier_agents = outlier_data.groupby('agent_name')
        for agent_name, agent_outliers in outlier_agents:
            logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
            for idx, row in agent_outliers.iterrows():
                logger.info(f"  - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}")
    
    # Use the filtered data for all subsequent operations
    apr_data = apr_data_filtered
    
    # Create Plotly figure
    fig = go.Figure()
    
    # Add a zero line that spans the entire width
    fig.add_shape(
        type="line",
        line=dict(dash="solid", width=1.5, color="black"),
        y0=0, y1=0,
        x0=-0.5, x1=10,  # Will be adjusted later based on number of boxes
        layer="below"
    )
    
    # Add background shapes for positive and negative regions
    # These will be adjusted later based on the actual x-axis range
    fig.add_shape(
        type="rect",
        fillcolor="rgba(230, 243, 255, 0.3)",
        line=dict(width=0),
        y0=0, y1=100,  # Use a fixed positive value
        x0=-0.5, x1=10,  # Will be adjusted later
        layer="below"
    )
    
    fig.add_shape(
        type="rect",
        fillcolor="rgba(255, 230, 230, 0.3)",
        line=dict(width=0),
        y0=-100, y1=0,  # Use a fixed negative value
        x0=-0.5, x1=10,  # Will be adjusted later
        layer="below"
    )
    
    # Group by agent_hash
    unique_hashes = apr_data['agent_hash'].unique()
    
    # Map for version labels based on hash endings
    version_map = {}
    for hash_val in unique_hashes:
        if hash_val.endswith("tby"):
            version_map[hash_val] = "v0.4.1"
        elif hash_val.endswith("vq"):
            version_map[hash_val] = "v0.4.2"
        else:
            # For any other hashes, use the last 6 characters
            version_map[hash_val] = f"Hash: {hash_val[-6:]}"
    
    # Sort hashes by version (v0.4.1 first, then v0.4.2)
    sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h)
    
    # Colors for different versions
    version_colors = {
        "v0.4.1": "rgba(31, 119, 180, 0.7)",  # Blue
        "v0.4.2": "rgba(44, 160, 44, 0.7)",   # Green
    }
    
    # Default color for other hashes
    default_color = "rgba(214, 39, 40, 0.7)"  # Red
    
    # Prepare data for box plots and statistics
    box_data = []
    version_stats = {}
    
    # X-axis positions and labels
    x_positions = []
    x_labels = []
    
    # Process each hash to create box plot data
    for i, agent_hash in enumerate(sorted_hashes):
        hash_data = apr_data[apr_data['agent_hash'] == agent_hash]
        
        # Get agent name for this hash (should be the same for all records with this hash)
        agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown"
        
        # Get version label
        version = version_map[agent_hash]
        
        # Choose color based on version
        if version in version_colors:
            color = version_colors[version]
        else:
            color = default_color
        
        # Calculate statistics for this hash
        apr_values = hash_data['apr'].tolist()
        median_apr = hash_data['apr'].median()
        mean_apr = hash_data['apr'].mean()
        min_apr = hash_data['apr'].min()
        max_apr = hash_data['apr'].max()
        count = len(apr_values)
        
        # Store statistics for later use
        if version not in version_stats:
            version_stats[version] = {
                'apr_values': [],
                'count': 0,
                'hashes': []
            }
        
        version_stats[version]['apr_values'].extend(apr_values)
        version_stats[version]['count'] += count
        version_stats[version]['hashes'].append(agent_hash)
        
        # Create label with version only (no hash)
        label = f"{version}"
        
        # Add to x-axis positions and labels
        x_positions.append(i)
        x_labels.append(label)
        
        # Create hover text with detailed statistics
        hover_text = (
            f"Version: {version}<br>"
            f"Agent: {agent_name}<br>"
            f"Hash: {agent_hash}<br>"
            f"Median APR: {median_apr:.2f}%<br>"
            f"Mean APR: {mean_apr:.2f}%<br>"
            f"Min APR: {min_apr:.2f}%<br>"
            f"Max APR: {max_apr:.2f}%<br>"
            f"Data points: {count}"
        )
        
        # Add box plot for this hash
        fig.add_trace(
            go.Box(
                y=apr_values,
                x=[i] * len(apr_values),  # Position on x-axis
                name=label,
                boxpoints='outliers',  # Show only outlier points instead of all points
                jitter=0.1,  # Reduced jitter for less horizontal spread
                pointpos=0,  # Position of points relative to box
                marker=dict(
                    color=color,
                    size=6,  # Smaller point size
                    opacity=0.7,  # Add transparency
                    line=dict(width=1, color='black')
                ),
                line=dict(
                    color='black',
                    width=2  # Thicker line for better visibility
                ),
                fillcolor=color,
                hoverinfo='text',
                hovertext=hover_text,
                showlegend=False,
                boxmean=True,  # Show mean as a dashed line
                whiskerwidth=0.8,  # Slightly thinner whiskers
                width=0.6  # Wider boxes
            )
        )
        
        logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points")
        
        # Add text annotation with median value above each box
        fig.add_annotation(
            x=i,
            y=median_apr + 5,  # Position above the box
            text=f"{median_apr:.1f}%",
            showarrow=False,
            font=dict(
                family="Arial, sans-serif",
                size=12,
                color="black",
                weight="bold"
            )
        )
    
    # Calculate improvement metrics between versions
    if "v0.4.1" in version_stats and "v0.4.2" in version_stats:
        v041_values = version_stats["v0.4.1"]["apr_values"]
        v042_values = version_stats["v0.4.2"]["apr_values"]
        
        v041_median = pd.Series(v041_values).median()
        v042_median = pd.Series(v042_values).median()
        
        improvement = v042_median - v041_median
        improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf')
        
        # Determine if the change is positive or negative
        is_improvement = improvement > 0
        change_color = "green" if is_improvement else "red"
        change_text = "improvement" if is_improvement else "decrease"
        
        # Add annotation showing improvement with better styling
        fig.add_annotation(
            x=(len(sorted_hashes) - 1) / 2,  # Center of the x-axis
            y=90,  # Top of the chart
            text=f"<b>Version Comparison:</b> {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2",
            showarrow=False,
            font=dict(
                family="Arial, sans-serif",
                size=16,
                color=change_color,
                weight="bold"
            ),
            bgcolor="rgba(255, 255, 255, 0.9)",
            bordercolor=change_color,
            borderwidth=2,
            borderpad=6,
            opacity=0.9
        )
    
    # Update the shapes to match the actual x-axis range
    num_boxes = len(sorted_hashes)
    fig.update_shapes(
        dict(x0=-0.5, x1=num_boxes - 0.5),
        selector=dict(type='rect')
    )
    fig.update_shapes(
        dict(x0=-0.5, x1=num_boxes - 0.5),
        selector=dict(type='line')
    )
    
    # Update layout with improved styling
    fig.update_layout(
        title=dict(
            text="APR Values by Agent Version",
            font=dict(
                family="Arial, sans-serif",
                size=24,  # Larger title
                color="black",
                weight="bold"
            ),
            x=0.5,  # Center the title
            y=0.95  # Position slightly higher
        ),
        xaxis_title=dict(
            text="Agent Version",
            font=dict(
                family="Arial, sans-serif",
                size=18,  # Larger axis title
                color="black",
                weight="bold"
            )
        ),
        yaxis_title=None,  # Remove the y-axis title as we'll use annotations instead
        template="plotly_white",
        height=700,  # Increased height for better visualization
        width=900,   # Set a fixed width for better proportions
        autosize=True,  # Still enable auto-sizing for responsiveness
        boxmode='group',  # Group boxes together
        margin=dict(r=50, l=120, t=100, b=100),  # Reduced right margin since guide was removed
        hovermode="closest",
        plot_bgcolor='rgba(250,250,250,0.9)',  # Slightly off-white background
        paper_bgcolor='white',
        font=dict(
            family="Arial, sans-serif",
            size=14,
            color="black"
        ),
        showlegend=False
    )
    
    # Add annotations for y-axis regions
    fig.add_annotation(
        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
        y=-25,    # Middle of the negative region
        xref="paper",
        yref="y",
        text="Percent drawdown [%]",
        showarrow=False,
        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        textangle=-90,  # Rotate text to be vertical
        align="center"
    )
    
    fig.add_annotation(
        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
        y=50,     # Middle of the positive region
        xref="paper",
        yref="y",
        text="Agent APR [%]",
        showarrow=False,
        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        textangle=-90,  # Rotate text to be vertical
        align="center"
    )
    
    # Box plot guide removed as per user request
    
    # Update y-axis with fixed range of -50 to +100 for psychological effect
    fig.update_yaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        # Use fixed range instead of autoscaling
        autorange=False,  # Disable autoscaling
        range=[-50, 100],  # Set fixed range from -50 to +100
        tickformat=".2f",  # Format tick labels with 2 decimal places
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        title=None  # Remove the built-in axis title since we're using annotations
    )
    
    # Update x-axis with custom labels
    fig.update_xaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        tickmode='array',
        tickvals=x_positions,
        ticktext=x_labels,
        tickangle=-45,  # Angle the labels for better readability
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")  # Adjusted font size
    )
    
    try:
        # Save the figure
        graph_file = "modius_apr_vs_agent_hash_graph.html"
        fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
        
        # Also save as image for compatibility
        img_file = "modius_apr_vs_agent_hash_graph.png"
        try:
            fig.write_image(img_file)
            logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}")
        except Exception as e:
            logger.error(f"Error saving image: {e}")
            logger.info(f"APR vs agent hash graph saved to {graph_file} only")
        
        # Return the figure object for direct use in Gradio
        return fig
    except Exception as e:
        logger.error(f"Error creating APR vs agent hash graph: {e}")
        
        # Create a simpler graph as fallback
        simple_fig = go.Figure()
        
        # Add zero line
        simple_fig.add_shape(
            type="line",
            line=dict(dash="solid", width=1.5, color="black"),
            y0=0, y1=0,
            x0=-0.5, x1=1.5  # Fixed values for error case
        )
        
        # Add a note about the error
        simple_fig.add_annotation(
            text=f"Error creating graph: {str(e)}",
            x=0.5, y=0.5,
            showarrow=False,
            font=dict(size=15, color="red")
        )
        
        return simple_fig

def save_apr_vs_agent_hash_to_csv(df):
    """
    Save the APR vs agent hash data to a CSV file.
    
    Args:
        df: DataFrame containing the APR data with agent_hash column
    
    Returns:
        The path to the saved CSV file, or None if no data was saved
    """
    if df.empty or 'agent_hash' not in df.columns:
        logger.error("No data or agent_hash column not found to save to CSV")
        return None
    
    # Filter for APR data only and ensure agent_hash is not null
    apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
    
    if apr_data.empty:
        logger.error("No valid APR data with agent_hash found to save to CSV")
        return None
    
    # Define the CSV file path
    csv_file = "modius_apr_vs_agent_hash.csv"
    
    # Save to CSV
    apr_data.to_csv(csv_file, index=False)
    logger.info(f"APR vs agent hash data saved to {csv_file}")
    
    return csv_file

def generate_apr_vs_agent_hash_visualizations(df):
    """
    Generate APR vs agent hash visualizations.
    
    Args:
        df: DataFrame containing the APR data
    
    Returns:
        A tuple containing the Plotly figure object and the path to the saved CSV file
    """
    if df.empty:
        logger.info("No APR data available for agent hash visualization.")
        # Create empty visualization with a message using Plotly
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5,
            text="No APR data available for agent hash visualization",
            font=dict(size=20),
            showarrow=False
        )
        fig.update_layout(
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
        
        return fig, None
    
    # Check if agent_hash column exists
    if 'agent_hash' not in df.columns:
        logger.error("agent_hash column not found in DataFrame")
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5,
            text="agent_hash column not found in data",
            font=dict(size=20),
            showarrow=False
        )
        fig.update_layout(
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
        
        return fig, None
    
    # Save to CSV before creating visualization
    csv_file = save_apr_vs_agent_hash_to_csv(df)
    
    # Create the visualization
    fig = create_apr_vs_agent_hash_graph(df)
    
    return fig, csv_file