File size: 10,847 Bytes
ed2eb44
 
 
 
 
 
06d4ee9
ed2eb44
 
 
 
 
 
06d4ee9
ed2eb44
 
06d4ee9
ed2eb44
 
 
 
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
 
 
 
 
 
 
 
06d4ee9
 
ed2eb44
 
06d4ee9
 
 
ed2eb44
 
 
 
 
 
 
06d4ee9
 
 
 
 
 
 
ed2eb44
 
 
 
 
 
 
 
 
 
 
06d4ee9
 
 
 
 
 
 
 
ed2eb44
 
06d4ee9
ed2eb44
 
06d4ee9
ed2eb44
 
 
 
 
 
 
06d4ee9
ed2eb44
 
 
06d4ee9
ed2eb44
 
 
 
 
 
 
06d4ee9
 
 
 
 
ed2eb44
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
 
 
 
 
 
 
 
 
 
 
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
06d4ee9
 
ed2eb44
 
 
 
 
 
 
c08520d
ed2eb44
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
"""
Leaderboard table components for the leaderboard application.
"""
import streamlit as st
from src.data.processors import get_model_type_style, get_rank_style

def render_leaderboard_table(display_df, metric_columns, primary_metric):
    """
    Render the custom HTML leaderboard table
    
    Args:
        display_df (pandas.DataFrame): The DataFrame with the display data
        metric_columns (list): List of metric column names
        primary_metric (str): The name of the primary metric
    """
    from src.components.header import render_section_header
    from src.utils.config import metrics_config
    
    # Display model ranking header without the box
    render_section_header("Model Rankings")
    
    # Detect if we have multiple metrics (columns with metric prefixes)
    has_multiple_metrics = any(":" in col for col in metric_columns)
    
    # Group columns by metric if multiple metrics are present
    metric_groups = {}
    if has_multiple_metrics:
        # Primary metric columns (no prefix)
        primary_cols = [col for col in metric_columns if ":" not in col]
        metric_groups[primary_metric] = primary_cols
        
        # Other metrics
        for col in metric_columns:
            if ":" in col:
                prefix, metric_name = col.split(": ", 1)
                full_metric_name = next((m for m in metrics_config if m.startswith(prefix)), prefix)
                if full_metric_name not in metric_groups:
                    metric_groups[full_metric_name] = []
                metric_groups[full_metric_name].append(col)
    else:
        # Single metric
        metric_groups[primary_metric] = metric_columns
    
    # Start building the HTML table structure
    html_table = """
    <div class="fixed-table-container">
      <div class="scroll-container">
        <table class="fixed-table">
          <thead>
            <tr class="header-row">
              <th class="fixed-column first-fixed-column" rowspan="2">Rank</th>
              <th class="fixed-column second-fixed-column" rowspan="2" style="text-align: center;">Agent</th>
              <th class="model-type-cell" rowspan="2" style="text-align: center;">Model Type</th>
    """
    
    # Add metric headers for each metric group
    for metric_name, cols in metric_groups.items():
        html_table += f'<th colspan="{len(cols)}" class="metric-header" style="text-align: center;">{metric_name}</th>'
    
    # Continue the table structure
    html_table += """
            </tr>
            <tr class="sub-header">
    """
    
    # Add individual column headers for all metrics
    for metric_name, cols in metric_groups.items():
        for col in cols:
            # Extract the actual column name if it has a prefix
            display_name = col.split(": ", 1)[-1] if ":" in col else col
            column_class = "overall-cell" if display_name == "Metric Average" else "metric-cell"
            html_table += f'<th class="{column_class}" style="text-align: center;">{display_name}</th>'
    
    # Close the header and start the body
    html_table += """
            </tr>
          </thead>
          <tbody>
    """
    
    # Add the data rows
    for i, (idx, row) in enumerate(display_df.iterrows()):
        # Define background colors to ensure consistency
        # Special background for human row
        is_human_row = row["Agent"] == "Top Human in Competition"
        if is_human_row:
            row_bg = "#2a1e37"  # Purple-ish dark background for human row
            row_style = f'style="background-color: {row_bg}; box-shadow: 0 0 5px #f472b6;"'
        else:
            row_bg = "#0a0a0a" if i % 2 == 0 else "#111111"
            row_style = f'style="background-color: {row_bg};"'
        
        # Start the row
        html_table += f'<tr class="table-row" {row_style}>'
        
        # Add Rank with medal styling and consistent background
        rank_style = "" # Don't set background at cell level
        rank_styles = get_rank_style(row["Rank"])
        for style_key, style_value in rank_styles.items():
            rank_style += f"{style_key}: {style_value};"
                
        html_table += f'<td class="fixed-column first-fixed-column" style="{rank_style}">{row["Rank"]}</td>'
        
        # Model name fixed column with consistent background
        html_table += f'<td class="fixed-column second-fixed-column" title="{row["Agent"]}" style="font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; text-align: center;">{row["Agent"]}</td>'
        
        # Model type cell
        model_type = row["Model Type"]
        type_style = f"text-align: center;"
        model_type_styles = get_model_type_style(model_type)
        for style_key, style_value in model_type_styles.items():
            if style_value:
                type_style += f"{style_key}: {style_value};"
                
        html_table += f'<td class="table-cell model-type-cell" style="{type_style}">{model_type}</td>'
        
        # Add metric values with minimal styling for all columns
        all_metric_columns = [col for group in metric_groups.values() for col in group]
        for col in all_metric_columns:
            display_name = col.split(": ", 1)[-1] if ":" in col else col
            cell_class = "table-cell overall-cell" if display_name == "Metric Average" else "table-cell metric-cell"
            
            # Check if column exists in the row (it should)
            if col in row:
                value_text = row[col]
                
                # Simple styling based on positive/negative values
                try:
                    value = float(str(row[col]).replace(',', ''))
                    if value > 0:
                        cell_class += " positive-value"
                    elif value < 0:
                        cell_class += " negative-value"
                except:
                    pass
                
                html_table += f'<td class="{cell_class}">{value_text}</td>'
            else:
                # If column doesn't exist (shouldn't happen), add empty cell
                html_table += f'<td class="{cell_class}">-</td>'
        
        html_table += "</tr>"
    
    # Close the table
    html_table += """
          </tbody>
        </table>
      </div>
    </div>
    """
    
    # Add styling for metrics section
    metrics_css = """
    <style>
    .metric-definitions {
        margin-top: 30px;
        padding-top: 20px;
        border-top: 1px solid #333;
    }
    .metric-definition {
        background-color: #1a1a1a;
        border-radius: 8px;
        padding: 12px 16px;
        margin-bottom: 16px;
    }
    .metric-definition h4 {
        margin-top: 0;
        color: #a5b4fc;
    }
    .metric-definition p {
        margin-bottom: 0;
        color: #e2e8f0;
    }
    </style>
    """

    # Build a clean HTML string for the metrics section
    metrics_html = '<div class="metric-definitions">'
    
    # Add each metric definition
    for metric_name, metric_info in metrics_config.items():
        metric_description = metric_info.get('description', '')
        
        # Special handling for Relative Improvement to Human to show formula
        if metric_name == "Relative Improvement to Human":
            formula_html = """
            <div style="margin: 15px 0;">
                <p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
                <div style="background-color: #111; padding: 15px; border-radius: 5px; text-align: center; margin-bottom: 15px;">
                    Relative Improvement to Human = max<sub>all runs</sub>((P<sub>agent</sub> - P<sub>baseline</sub>) / (P<sub>human</sub> - P<sub>baseline</sub>)) × 100%
                </div>
                <p style="margin-top: 10px; font-weight: 500;">Where:</p>
                <ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
                    <li style="margin-bottom: 5px;">P<sub>agent</sub> is the agent's test performance</li>
                    <li style="margin-bottom: 5px;">P<sub>baseline</sub> is the baseline test performance</li>
                    <li style="margin-bottom: 5px;">P<sub>human</sub> is the human performance benchmark</li>
                    <li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
                </ul>
            </div>
            """
            
            # Add the metric definition with the formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
        # Special handling for Absolute Improvement to Baseline to show formula
        elif metric_name == "Absolute Improvement to Baseline":
            formula_html = """
            <div style="margin: 15px 0;">
                <p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
                <div style="background-color: #111; padding: 15px; border-radius: 5px; text-align: center; margin-bottom: 15px;">
                    Absolute Improvement to Baseline = max<sub>all runs</sub>((P<sub>agent</sub> - P<sub>baseline</sub>) / P<sub>baseline</sub>) × 100%
                </div>
                <p style="margin-top: 10px; font-weight: 500;">Where:</p>
                <ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
                    <li style="margin-bottom: 5px;">P<sub>agent</sub> is the agent's test performance</li>
                    <li style="margin-bottom: 5px;">P<sub>baseline</sub> is the baseline test performance</li>
                    <li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
                </ul>
            </div>
            """
            
            # Add the metric definition with the formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
        else:
            # Regular metric without formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p></div>'
    
    # Close the metric definitions container
    metrics_html += '</div>'
    
    # Display the styling and HTML separately for maximum control
    st.markdown(html_table, unsafe_allow_html=True)
    st.markdown(metrics_css, unsafe_allow_html=True)
    
    # Render the metrics definitions
    st.markdown(metrics_html, unsafe_allow_html=True)

def render_empty_state():
    """
    Render an empty state when no data is available
    """
    st.markdown("""
    <div class="warning-box">
        <strong>No data to display.</strong> Please select at least one task to view the data.
    </div>
    """, unsafe_allow_html=True)