Spaces:

jszheng
/

RACE_leaderboard

Runtime error

File size: 6,622 Bytes

import json

import gradio as gr
import pandas as pd

from css_html import custom_css
from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT
from utils import (
    AutoEvalColumn,
    fields,
)

result_path = './RESULTS.json'
with open(result_path, 'r') as f:
    data = json.load(f)
    
    rows = []
    for col, subcols in data.items():
        row = {"model": col}
        for subcol, datas in subcols.items():
            if subcol == 'readability':
                symbol = '📖'
            elif subcol == 'maintainability':
                symbol = '🔨'
            elif subcol == 'efficiency':
                symbol = '🚀'
            elif subcol == 'correctness':
                symbol = '✅'
            elif subcol == 'overall':
                symbol = '💯'
                
            for key, value in datas.items():
                row[f'{symbol} {key}'] = value

        rows.append(row)
        
    df = pd.DataFrame(rows)
    df = df.sort_values(by='💯 RACE Score', ascending=False)

COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [
    c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
TYPES_LITE = [
    c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]

def select_columns(df, columns):
    always_here_cols = [
        AutoEvalColumn.model.name,
    ]
    # We use COLS to maintain sorting
    filtered_df = df[
        always_here_cols + [c for c in COLS if c in df.columns and c in columns]
    ]
    return filtered_df

demo = gr.Blocks(css=custom_css)
with demo:
    with gr.Column():
        gr.Markdown(
            """<div style="text-align: center;"><h1> 🏎️RACE Leaderboard</h1></div>\
            <br>\
""",
            elem_classes="markdown-text",
        )
        
        gr.Markdown(
            """
            Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is **_correct_** and **_meets the requirements of real-world development scenarios_**.
            
            More details about how to evalute the LLM are available in the [🏎️RACE GitHub repository](https://github.com/jszheng21/RACE). For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: [**Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models**](https://arxiv.org/abs/2407.11470). [![](https://img.shields.io/badge/arXiv-2407.11470-b31b1b.svg)](https://arxiv.org/abs/2407.11470)
""",
            elem_classes="markdown-text",
        )
    
#         gr.Markdown(
#             """<div style="text-align: center;"><h1> 🏎️RACE Leaderboard</h1></div>\
#             <br>\
#             <p>Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is <b><i>correct</i></b> and <b><i>meets the requirements of real-world development scenarios</i></b>.</p>
#             <p>More details about how to evalute the LLM are available in the <a href="https://github.com/jszheng21/RACE">🏎️RACE GitHub repository</a>. For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models</p>
# """,
#             elem_classes="markdown-text",
#         )

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.Column():
            with gr.Tabs(elem_classes="A100-tabs") as A100_tabs:
                with gr.TabItem("🔍 Evaluation Table", id=0):
                    with gr.Column():
                        with gr.Accordion("⏬ Hidden Columns", open=False):
                            shown_columns = gr.CheckboxGroup(
                                choices=[
                                    c
                                    for c in COLS
                                    if c
                                    not in [
                                        AutoEvalColumn.model.name,
                                    ]
                                ],
                                value=[
                                    c
                                    for c in COLS_LITE
                                    if c
                                    not in [
                                        AutoEvalColumn.model.name,
                                    ]
                                ],
                                label="",
                                elem_id="column-select",
                                interactive=True,
                            )
                            
                    leaderboard_df = gr.components.Dataframe(
                        value=df[
                            [
                                AutoEvalColumn.model.name,
                            ]
                            + shown_columns.value
                        ],
                        headers=COLS,
                        datatype=TYPES,
                        elem_id="leaderboard-table",
                        interactive=False,
                    )

                    hidden_leaderboard_df = gr.components.Dataframe(
                        value=df,
                        headers=COLS,
                        datatype=["str" for _ in range(len(COLS))],
                        visible=False,
                    )
                    
                    shown_columns.change(
                        select_columns,
                        [hidden_leaderboard_df, shown_columns],
                        leaderboard_df,
                    )
                    
                    gr.Markdown(NOTES_TEXT, elem_classes="markdown-text")
                        
                with gr.TabItem("📝 About", id=1):
                    gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
                    
        with gr.Row():
            with gr.Accordion("📙 Citation", open=False):
                citation_button = gr.Textbox(
                    value=CITATION_BUTTON_TEXT,
                    label=CITATION_BUTTON_LABEL,
                    lines=10,
                    elem_id="citation-button",
                    show_copy_button=True,
                )
        
        with gr.Row():
            with gr.Accordion("🙏 Acknowledgement", open=False):
                gr.Markdown(ACKNOWLEDGEMENT_TEXT)

demo.launch()