Spaces:

hmb
/

leaderboard_dataframe

Sleeping

File size: 5,745 Bytes

f0ad9ed

import pandas as pd
import numpy as np
import gradio as gr
from urllib.parse import quote

def style_dataframe(df):
    if len(df) == 0:
        return df

    highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"]
    styled = df.style

    def highlight_green(val):
        try:
            val_float = float(str(val).replace('%', '').replace(' kg', ''))
            return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;'
        except:
            return 'background-color: #121212; color: white;'

    for col in highlight_cols:
        styled = styled.applymap(highlight_green, subset=[col])

    styled = styled.set_properties(
        subset=["Model"],
        **{'color': '#4da6ff'}
    )

    return styled

def create_leaderboard_data():
    models = [
        {"model_name": "meta-llama/llama-3-70b-instruct", "type": "open"},
        {"model_name": "mistralai/Mistral-7B-Instruct-v0.3", "type": "open"},
        {"model_name": "google/gemma-7b-it", "type": "open"},
        {"model_name": "Qwen/Qwen2-7B-Instruct", "type": "open"},
        {"model_name": "anthropic/claude-3-opus", "type": "closed"},
        {"model_name": "OpenAI/gpt-4o", "type": "closed"},
        {"model_name": "01-ai/Yi-1.5-34B-Chat", "type": "open"},
        {"model_name": "google/gemma-2b", "type": "open"},
        {"model_name": "microsoft/phi-3-mini-4k-instruct", "type": "open"},
        {"model_name": "microsoft/phi-3-mini-128k-instruct", "type": "open"},
        {"model_name": "stabilityai/stable-beluga-7b", "type": "open"},
        {"model_name": "togethercomputer/RedPajama-INCITE-7B-Instruct", "type": "open"},
        {"model_name": "databricks/dbrx-instruct", "type": "closed"},
        {"model_name": "mosaicml/mpt-7b-instruct", "type": "open"},
        {"model_name": "01-ai/Yi-1.5-9B-Chat", "type": "open"}
    ]

    np.random.seed(42)

    rows = []
    for i, model in enumerate(models, 1):
        model_name = model["model_name"]
        model_type = model["type"]

        emoji = "🟢" if model_type.lower() == "open" else "🔴"
        type_with_emoji = f"{emoji} {model_type.upper()}"

        if "/" in model_name:
            org, name = model_name.split("/", 1)
            model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})"
        else:
            model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})"

        average = round(np.random.uniform(40, 90), 2)
        ifeval = round(np.random.uniform(30, 90), 2)
        bbhi = round(np.random.uniform(40, 85), 2)
        math = round(np.random.uniform(20, 80), 2)
        gpqa = round(np.random.uniform(10, 70), 2)
        mujb = round(np.random.uniform(10, 70), 2)
        mmlu = round(np.random.uniform(40, 85), 2)
        co2_cost = round(np.random.uniform(1, 100), 2)

        rows.append([
            i,
            type_with_emoji,
            model_link,
            f"{average}",
            f"{ifeval}",
            f"{bbhi}",
            f"{math}",
            f"{gpqa}",
            f"{mujb}",
            f"{mmlu}",
            f"{co2_cost} kg"
        ])

    rows.sort(key=lambda x: float(x[3]), reverse=True)

    for i, row in enumerate(rows, 1):
        row[0] = i

    df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"])
    return style_dataframe(df)

def get_filter_data():
    return {
        "For Edge Devices": 5,
        "For Consumers": 4,
        "Mid-range": 4,
        "For the GPU-rich": 3,
        "Only Official Providers": 8
    }

css = """
.html-container {
    text-align: center;
    display: flex;
    justify-content: center;
    width: 100%;
}

.dataframe-container {
    margin-top: 0.5rem;
    margin-bottom: 0.5rem;
}

.leaderboard-title {
    font-size: 1.5rem;
    font-weight: bold;
    margin-bottom: 0.25rem;
    color: #f0f0f0;
}

.leaderboard-subtitle {
    font-size: 0.9rem;
    margin-bottom: 1rem;
    color: #a0a0a0;
}

.filters-container {
    margin-bottom: 0.5rem;
}
"""

filter_data = get_filter_data()
filter_choices = [f"{key} · {value}" for key, value in filter_data.items()]

with gr.Blocks(css=css) as demo:
    gr.HTML("""
        <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
            <div class="leaderboard-title">Open LLM Leaderboard</div>
        </div>
        <div class="leaderboard-subtitle">Comparing Large Language Models in an open and reproducible way</div>
    """)

    with gr.Row():
        filters = gr.CheckboxGroup(
            label="Quick Filters",
            choices=filter_choices,
        )

    with gr.Row():
        status_text = gr.HTML("<div style='text-align: right; color: #888; font-size: 0.8rem;'>Last updated: June 25, 2024 at 10:30 AM</div>")

    leaderboard_df = create_leaderboard_data()
    leaderboard_table = gr.Dataframe(
        value=leaderboard_df,
        headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"],
        datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"],
        elem_id="leaderboard-table",
        interactive=False,
        max_height=600,
        show_search="search",
        show_copy_button=True,
        show_fullscreen_button=True,
        pinned_columns=2,
        column_widths=["5%", "7%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"]
    )

    refresh_btn = gr.Button("Refresh Data")
    refresh_btn.click(fn=lambda: create_leaderboard_data(), outputs=leaderboard_table)

if __name__ == "__main__":
    demo.launch()