MLRC_Bench

Running

File size: 10,847 Bytes

"""
Leaderboard table components for the leaderboard application.
"""
import streamlit as st
from src.data.processors import get_model_type_style, get_rank_style

def render_leaderboard_table(display_df, metric_columns, primary_metric):
    """
    Render the custom HTML leaderboard table
    
    Args:
        display_df (pandas.DataFrame): The DataFrame with the display data
        metric_columns (list): List of metric column names
        primary_metric (str): The name of the primary metric
    """
    from src.components.header import render_section_header
    from src.utils.config import metrics_config
    
    # Display model ranking header without the box
    render_section_header("Model Rankings")
    
    # Detect if we have multiple metrics (columns with metric prefixes)
    has_multiple_metrics = any(":" in col for col in metric_columns)
    
    # Group columns by metric if multiple metrics are present
    metric_groups = {}
    if has_multiple_metrics:
        # Primary metric columns (no prefix)
        primary_cols = [col for col in metric_columns if ":" not in col]
        metric_groups[primary_metric] = primary_cols
        
        # Other metrics
        for col in metric_columns:
            if ":" in col:
                prefix, metric_name = col.split(": ", 1)
                full_metric_name = next((m for m in metrics_config if m.startswith(prefix)), prefix)
                if full_metric_name not in metric_groups:
                    metric_groups[full_metric_name] = []
                metric_groups[full_metric_name].append(col)
    else:
        # Single metric
        metric_groups[primary_metric] = metric_columns
    
    # Start building the HTML table structure
    html_table = """
    <div class="fixed-table-container">
      <div class="scroll-container">
        <table class="fixed-table">
          <thead>
            <tr class="header-row">
              <th class="fixed-column first-fixed-column" rowspan="2">Rank</th>
              <th class="fixed-column second-fixed-column" rowspan="2" style="text-align: center;">Agent</th>
              <th class="model-type-cell" rowspan="2" style="text-align: center;">Model Type</th>
    """
    
    # Add metric headers for each metric group
    for metric_name, cols in metric_groups.items():
        html_table += f'<th colspan="{len(cols)}" class="metric-header" style="text-align: center;">{metric_name}</th>'
    
    # Continue the table structure
    html_table += """
            </tr>
            <tr class="sub-header">
    """
    
    # Add individual column headers for all metrics
    for metric_name, cols in metric_groups.items():
        for col in cols:
            # Extract the actual column name if it has a prefix
            display_name = col.split(": ", 1)[-1] if ":" in col else col
            column_class = "overall-cell" if display_name == "Metric Average" else "metric-cell"
            html_table += f'<th class="{column_class}" style="text-align: center;">{display_name}</th>'
    
    # Close the header and start the body
    html_table += """
            </tr>
          </thead>
          <tbody>
    """
    
    # Add the data rows
    for i, (idx, row) in enumerate(display_df.iterrows()):
        # Define background colors to ensure consistency
        # Special background for human row
        is_human_row = row["Agent"] == "Top Human in Competition"
        if is_human_row:
            row_bg = "#2a1e37"  # Purple-ish dark background for human row
            row_style = f'style="background-color: {row_bg}; box-shadow: 0 0 5px #f472b6;"'
        else:
            row_bg = "#0a0a0a" if i % 2 == 0 else "#111111"
            row_style = f'style="background-color: {row_bg};"'
        
        # Start the row
        html_table += f'<tr class="table-row" {row_style}>'
        
        # Add Rank with medal styling and consistent background
        rank_style = "" # Don't set background at cell level
        rank_styles = get_rank_style(row["Rank"])
        for style_key, style_value in rank_styles.items():
            rank_style += f"{style_key}: {style_value};"
                
        html_table += f'<td class="fixed-column first-fixed-column" style="{rank_style}">{row["Rank"]}</td>'
        
        # Model name fixed column with consistent background
        html_table += f'<td class="fixed-column second-fixed-column" title="{row["Agent"]}" style="font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; text-align: center;">{row["Agent"]}</td>'
        
        # Model type cell
        model_type = row["Model Type"]
        type_style = f"text-align: center;"
        model_type_styles = get_model_type_style(model_type)
        for style_key, style_value in model_type_styles.items():
            if style_value:
                type_style += f"{style_key}: {style_value};"
                
        html_table += f'<td class="table-cell model-type-cell" style="{type_style}">{model_type}</td>'
        
        # Add metric values with minimal styling for all columns
        all_metric_columns = [col for group in metric_groups.values() for col in group]
        for col in all_metric_columns:
            display_name = col.split(": ", 1)[-1] if ":" in col else col
            cell_class = "table-cell overall-cell" if display_name == "Metric Average" else "table-cell metric-cell"
            
            # Check if column exists in the row (it should)
            if col in row:
                value_text = row[col]
                
                # Simple styling based on positive/negative values
                try:
                    value = float(str(row[col]).replace(',', ''))
                    if value > 0:
                        cell_class += " positive-value"
                    elif value < 0:
                        cell_class += " negative-value"
                except:
                    pass
                
                html_table += f'<td class="{cell_class}">{value_text}</td>'
            else:
                # If column doesn't exist (shouldn't happen), add empty cell
                html_table += f'<td class="{cell_class}">-</td>'
        
        html_table += "</tr>"
    
    # Close the table
    html_table += """
          </tbody>
        </table>
      </div>
    </div>
    """
    
    # Add styling for metrics section
    metrics_css = """
    <style>
    .metric-definitions {
        margin-top: 30px;
        padding-top: 20px;
        border-top: 1px solid #333;
    }
    .metric-definition {
        background-color: #1a1a1a;
        border-radius: 8px;
        padding: 12px 16px;
        margin-bottom: 16px;
    }
    .metric-definition h4 {
        margin-top: 0;
        color: #a5b4fc;
    }
    .metric-definition p {
        margin-bottom: 0;
        color: #e2e8f0;
    }
    </style>
    """

    # Build a clean HTML string for the metrics section
    metrics_html = '<div class="metric-definitions">'
    
    # Add each metric definition
    for metric_name, metric_info in metrics_config.items():
        metric_description = metric_info.get('description', '')
        
        # Special handling for Relative Improvement to Human to show formula
        if metric_name == "Relative Improvement to Human":
            formula_html = """
            <div style="margin: 15px 0;">
                <p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
                <div style="background-color: #111; padding: 15px; border-radius: 5px; text-align: center; margin-bottom: 15px;">
                    Relative Improvement to Human = max<sub>all runs</sub>((P<sub>agent</sub> - P<sub>baseline</sub>) / (P<sub>human</sub> - P<sub>baseline</sub>)) × 100%
                </div>
                <p style="margin-top: 10px; font-weight: 500;">Where:</p>
                <ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
                    <li style="margin-bottom: 5px;">P<sub>agent</sub> is the agent's test performance</li>
                    <li style="margin-bottom: 5px;">P<sub>baseline</sub> is the baseline test performance</li>
                    <li style="margin-bottom: 5px;">P<sub>human</sub> is the human performance benchmark</li>
                    <li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
                </ul>
            </div>
            """
            
            # Add the metric definition with the formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
        # Special handling for Absolute Improvement to Baseline to show formula
        elif metric_name == "Absolute Improvement to Baseline":
            formula_html = """
            <div style="margin: 15px 0;">
                <p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
                <div style="background-color: #111; padding: 15px; border-radius: 5px; text-align: center; margin-bottom: 15px;">
                    Absolute Improvement to Baseline = max<sub>all runs</sub>((P<sub>agent</sub> - P<sub>baseline</sub>) / P<sub>baseline</sub>) × 100%
                </div>
                <p style="margin-top: 10px; font-weight: 500;">Where:</p>
                <ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
                    <li style="margin-bottom: 5px;">P<sub>agent</sub> is the agent's test performance</li>
                    <li style="margin-bottom: 5px;">P<sub>baseline</sub> is the baseline test performance</li>
                    <li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
                </ul>
            </div>
            """
            
            # Add the metric definition with the formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
        else:
            # Regular metric without formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p></div>'
    
    # Close the metric definitions container
    metrics_html += '</div>'
    
    # Display the styling and HTML separately for maximum control
    st.markdown(html_table, unsafe_allow_html=True)
    st.markdown(metrics_css, unsafe_allow_html=True)
    
    # Render the metrics definitions
    st.markdown(metrics_html, unsafe_allow_html=True)

def render_empty_state():
    """
    Render an empty state when no data is available
    """
    st.markdown("""
    <div class="warning-box">
        <strong>No data to display.</strong> Please select at least one task to view the data.
    </div>
    """, unsafe_allow_html=True)