Jason Zheng
add links
c380c7e
raw
history blame
6.62 kB
import json
import gradio as gr
import pandas as pd
from css_html import custom_css
from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT
from utils import (
AutoEvalColumn,
fields,
)
result_path = './RESULTS.json'
with open(result_path, 'r') as f:
data = json.load(f)
rows = []
for col, subcols in data.items():
row = {"model": col}
for subcol, datas in subcols.items():
if subcol == 'readability':
symbol = '๐Ÿ“–'
elif subcol == 'maintainability':
symbol = '๐Ÿ”จ'
elif subcol == 'efficiency':
symbol = '๐Ÿš€'
elif subcol == 'correctness':
symbol = 'โœ…'
elif subcol == 'overall':
symbol = '๐Ÿ’ฏ'
for key, value in datas.items():
row[f'{symbol} {key}'] = value
rows.append(row)
df = pd.DataFrame(rows)
df = df.sort_values(by='๐Ÿ’ฏ RACE Score', ascending=False)
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [
c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
TYPES_LITE = [
c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
def select_columns(df, columns):
always_here_cols = [
AutoEvalColumn.model.name,
]
# We use COLS to maintain sorting
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return filtered_df
demo = gr.Blocks(css=custom_css)
with demo:
with gr.Column():
gr.Markdown(
"""<div style="text-align: center;"><h1> ๐ŸŽ๏ธRACE Leaderboard</h1></div>\
<br>\
""",
elem_classes="markdown-text",
)
gr.Markdown(
"""
Based on the ๐ŸŽ๏ธRACE benchmark, we demonstrated the ability of different LLMs to generate code that is **_correct_** and **_meets the requirements of real-world development scenarios_**.
More details about how to evalute the LLM are available in the [๐ŸŽ๏ธRACE GitHub repository](https://github.com/jszheng21/RACE). For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: [**Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models**](https://arxiv.org/abs/2407.11470). [![](https://img.shields.io/badge/arXiv-2407.11470-b31b1b.svg)](https://arxiv.org/abs/2407.11470)
""",
elem_classes="markdown-text",
)
# gr.Markdown(
# """<div style="text-align: center;"><h1> ๐ŸŽ๏ธRACE Leaderboard</h1></div>\
# <br>\
# <p>Based on the ๐ŸŽ๏ธRACE benchmark, we demonstrated the ability of different LLMs to generate code that is <b><i>correct</i></b> and <b><i>meets the requirements of real-world development scenarios</i></b>.</p>
# <p>More details about how to evalute the LLM are available in the <a href="https://github.com/jszheng21/RACE">๐ŸŽ๏ธRACE GitHub repository</a>. For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models</p>
# """,
# elem_classes="markdown-text",
# )
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Column():
with gr.Tabs(elem_classes="A100-tabs") as A100_tabs:
with gr.TabItem("๐Ÿ” Evaluation Table", id=0):
with gr.Column():
with gr.Accordion("โฌ Hidden Columns", open=False):
shown_columns = gr.CheckboxGroup(
choices=[
c
for c in COLS
if c
not in [
AutoEvalColumn.model.name,
]
],
value=[
c
for c in COLS_LITE
if c
not in [
AutoEvalColumn.model.name,
]
],
label="",
elem_id="column-select",
interactive=True,
)
leaderboard_df = gr.components.Dataframe(
value=df[
[
AutoEvalColumn.model.name,
]
+ shown_columns.value
],
headers=COLS,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
)
hidden_leaderboard_df = gr.components.Dataframe(
value=df,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
shown_columns.change(
select_columns,
[hidden_leaderboard_df, shown_columns],
leaderboard_df,
)
gr.Markdown(NOTES_TEXT, elem_classes="markdown-text")
with gr.TabItem("๐Ÿ“ About", id=1):
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("๐Ÿ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=10,
elem_id="citation-button",
show_copy_button=True,
)
with gr.Row():
with gr.Accordion("๐Ÿ™ Acknowledgement", open=False):
gr.Markdown(ACKNOWLEDGEMENT_TEXT)
demo.launch()