Spaces:
Running
Running
# Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard | |
# Modified by EffiBench | |
import json | |
from pathlib import Path | |
import pandas as pd | |
import gradio as gr | |
from calculate_memory_usage import report_results | |
def make_default_md_1(): | |
link_color = "#1976D2" # This color should be clear in both light and dark mode | |
leaderboard_md = f""" | |
# π EffiBench Leaderboard π | |
<a href='https://arxiv.org/abs/2402.02037' style='color: {link_color}; text-decoration: none;'>Paper</a> | | |
<a href='https://github.com/huangd1999/EffiBench' style='color: {link_color}; text-decoration: none;'>GitHub</a> | | |
<a href='https://github.com/huangd1999/EffiBench/tree/main/data' style='color: {link_color}; text-decoration: none;'>Dataset</a> | |
""" | |
return leaderboard_md | |
add_model_md = f""" | |
π€ [filing a request](https://github.com/huangd1999/EffiBench/issues/new?assignees=&labels=model+eval&projects=&template=model_eval_request.yml&title=%F0%9F%92%A1+%5BREQUEST%5D+-+%3CMODEL_NAME%3E) to add your models on our leaderboard! | |
**Test Version** | |
""" | |
leaderboard_md = """ | |
Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**. | |
""" | |
acknowledgment_md = """ | |
### Terms of Service | |
Users are required to agree to the following terms before using the service: | |
The service is a research preview. It only provides limited safety measures and may generate offensive content. | |
It must not be used for any illegal, harmful, violent, racist, or sexual purposes. | |
Please do not upload any private information. | |
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license. | |
""" | |
citation_md = """ | |
### Citation | |
Please cite the following paper if you find our leaderboard or dataset helpful. | |
``` | |
@article{huang2024effibench, | |
title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code}, | |
author={Huang, Dong and Qing, Yuhao and Weiyi Shang and Cui, Heming and Jie, M.Zhang}, | |
journal={arXiv preprint arXiv:2402.02037}, | |
year={2024} | |
} | |
""" | |
def process_uploaded_file(file): | |
if file is None: | |
return "No file uploaded." | |
try: | |
file = Path(file) | |
json_data = json.loads(file.read_text()) | |
except Exception as e: | |
return f"Error processing the file: {str(e)}" | |
try: | |
task, model = file.stem.split("_") | |
except Exception as e: | |
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json" | |
return report_results(task, model, file) | |
def build_leaderboard_tab(leaderboard_table_file): | |
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown") | |
gr.Markdown(add_model_md, elem_id="leaderboard_markdown") | |
df = pd.read_csv(leaderboard_table_file) | |
def filter_leaderboard(dataset, timeout): | |
filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)] | |
return filtered_df.drop(columns=['Timeout', 'Dataset']) | |
datasets = df['Dataset'].unique().tolist() | |
timeouts = df['Timeout'].unique().tolist() | |
with gr.Tab("Leaderboard"): | |
gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown") | |
with gr.Row(): | |
dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0]) | |
timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0]) | |
initial_data = filter_leaderboard(datasets[0], timeouts[0]) | |
leaderboard = gr.Dataframe(value=initial_data) | |
def update_leaderboard(dataset, timeout): | |
filtered_data = filter_leaderboard(dataset, timeout) | |
return filtered_data | |
# leaderboard.update(value=filtered_data) | |
# return leaderboard.update(value=filtered_data) | |
dataset_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) | |
timeout_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) | |
with gr.Tab("Submit"): | |
file_upload = gr.File(label="Upload JSON File") | |
upload_button = gr.Button("Process File") | |
output_text = gr.Textbox(label="Output") | |
upload_button.click(process_uploaded_file, inputs=file_upload, outputs=output_text) | |
with gr.Accordion("Citation", open=True): | |
gr.Markdown(citation_md, elem_id="leaderboard_markdown") | |
gr.Markdown(acknowledgment_md, elem_id="ack_markdown") | |