|
|
|
|
|
|
|
import json |
|
from pathlib import Path |
|
import pandas as pd |
|
import gradio as gr |
|
from calculate_memory_usage import report_results |
|
|
|
|
|
def make_default_md_1(): |
|
link_color = "#1976D2" |
|
leaderboard_md = f""" |
|
# π EffiBench Leaderboard π |
|
<a href='https://arxiv.org/abs/2402.02037' style='color: {link_color}; text-decoration: none;'>Paper</a> | |
|
<a href='https://github.com/huangd1999/EffiBench' style='color: {link_color}; text-decoration: none;'>GitHub</a> | |
|
<a href='https://github.com/huangd1999/EffiBench/tree/main/data' style='color: {link_color}; text-decoration: none;'>Dataset</a> |
|
""" |
|
return leaderboard_md |
|
|
|
add_model_md = f""" |
|
π€ [filing a request](https://github.com/huangd1999/EffiBench/issues/new?assignees=&labels=model+eval&projects=&template=model_eval_request.yml&title=%F0%9F%92%A1+%5BREQUEST%5D+-+%3CMODEL_NAME%3E) to add your models on our leaderboard!** |
|
""" |
|
|
|
leaderboard_md = """ |
|
Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**. |
|
""" |
|
|
|
acknowledgment_md = """ |
|
### Terms of Service |
|
|
|
Users are required to agree to the following terms before using the service: |
|
|
|
The service is a research preview. It only provides limited safety measures and may generate offensive content. |
|
It must not be used for any illegal, harmful, violent, racist, or sexual purposes. |
|
Please do not upload any private information. |
|
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license. |
|
""" |
|
|
|
citation_md = """ |
|
### Citation |
|
Please cite the following paper if you find our leaderboard or dataset helpful. |
|
``` |
|
@article{huang2024effibench, |
|
title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code}, |
|
author={Huang, Dong and Zhang, Jie M and Qing, Yuhao and Cui, Heming}, |
|
journal={arXiv preprint arXiv:2402.02037}, |
|
year={2024} |
|
} |
|
""" |
|
|
|
def process_uploaded_file(file): |
|
if file is None: |
|
return "No file uploaded." |
|
try: |
|
file = Path(file) |
|
json_data = json.loads(file.read_text()) |
|
except Exception as e: |
|
return f"Error processing the file: {str(e)}" |
|
|
|
try: |
|
task, model = file.stem.split("_") |
|
except Exception as e: |
|
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json" |
|
|
|
return report_results(task, model, file) |
|
|
|
def build_leaderboard_tab(leaderboard_table_file): |
|
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown") |
|
gr.Markdown(add_model_md, elem_id="leaderboard_markdown") |
|
|
|
df = pd.read_csv(leaderboard_table_file) |
|
def filter_leaderboard(dataset, timeout): |
|
filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)] |
|
return filtered_df.drop(columns=['Timeout', 'Dataset']) |
|
|
|
datasets = df['Dataset'].unique().tolist() |
|
timeouts = df['Timeout'].unique().tolist() |
|
|
|
with gr.Tab("Leaderboard"): |
|
gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown") |
|
with gr.Row(): |
|
dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0]) |
|
timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0]) |
|
|
|
initial_data = filter_leaderboard(datasets[0], timeouts[0]) |
|
leaderboard = gr.Dataframe(value=initial_data) |
|
|
|
def update_leaderboard(dataset, timeout): |
|
filtered_data = filter_leaderboard(dataset, timeout) |
|
return filtered_data |
|
|
|
|
|
dataset_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) |
|
timeout_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) |
|
|
|
with gr.Tab("XXX"): |
|
file_upload = gr.File(label="Upload JSON File") |
|
upload_button = gr.Button("Process File") |
|
output_text = gr.Textbox(label="Output") |
|
|
|
upload_button.click(process_uploaded_file, inputs=file_upload, outputs=output_text) |
|
|
|
with gr.Accordion("Citation", open=True): |
|
gr.Markdown(citation_md, elem_id="leaderboard_markdown") |
|
gr.Markdown(acknowledgment_md, elem_id="ack_markdown") |
|
|