File size: 4,574 Bytes
bb8527c c017d10 bb8527c c017d10 bb8527c 7123c10 ceed9d7 bb8527c c017d10 9372938 ac87af7 c017d10 bb8527c f62af41 bb8527c f62af41 9372938 f62af41 bb8527c c017d10 8a5784f c017d10 bb8527c c017d10 bb8527c a57cfe8 bb8527c a57cfe8 bb8527c a57cfe8 bb8527c a57cfe8 bb8527c a57cfe8 c307e47 c4e9d5f bb8527c c307e47 bb8527c eff6503 c017d10 bb8527c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard
# Modified by EffiBench
import json
from pathlib import Path
import pandas as pd
import gradio as gr
from calculate_memory_usage import report_results
def make_default_md_1():
link_color = "#1976D2" # This color should be clear in both light and dark mode
leaderboard_md = f"""
# ๐ EffiBench Leaderboard ๐
<a href='https://arxiv.org/abs/2402.02037' style='color: {link_color}; text-decoration: none;'>Paper</a> |
<a href='https://github.com/huangd1999/EffiBench' style='color: {link_color}; text-decoration: none;'>GitHub</a> |
<a href='https://github.com/huangd1999/EffiBench/tree/main/data' style='color: {link_color}; text-decoration: none;'>Dataset</a>
"""
return leaderboard_md
add_model_md = f"""
๐ค [filing a request](https://github.com/huangd1999/EffiBench/issues/new?assignees=&labels=model+eval&projects=&template=model_eval_request.yml&title=%F0%9F%92%A1+%5BREQUEST%5D+-+%3CMODEL_NAME%3E) to add your models on our leaderboard!
**Test Version**
"""
leaderboard_md = """
Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**.
"""
acknowledgment_md = """
### Terms of Service
Users are required to agree to the following terms before using the service:
The service is a research preview. It only provides limited safety measures and may generate offensive content.
It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
Please do not upload any private information.
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license.
"""
citation_md = """
### Citation
Please cite the following paper if you find our leaderboard or dataset helpful.
```
@article{huang2024effibench,
title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code},
author={Huang, Dong and Qing, Yuhao and Weiyi Shang and Cui, Heming and Jie, M.Zhang},
journal={arXiv preprint arXiv:2402.02037},
year={2024}
}
"""
def process_uploaded_file(file):
if file is None:
return "No file uploaded."
try:
file = Path(file)
json_data = json.loads(file.read_text())
except Exception as e:
return f"Error processing the file: {str(e)}"
try:
task, model = file.stem.split("_")
except Exception as e:
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json"
return report_results(task, model, file)
def build_leaderboard_tab(leaderboard_table_file):
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
gr.Markdown(add_model_md, elem_id="leaderboard_markdown")
df = pd.read_csv(leaderboard_table_file)
def filter_leaderboard(dataset, timeout):
filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)]
return filtered_df.drop(columns=['Timeout', 'Dataset'])
datasets = df['Dataset'].unique().tolist()
timeouts = df['Timeout'].unique().tolist()
with gr.Tab("Leaderboard"):
gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown")
with gr.Row():
dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0])
timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0])
initial_data = filter_leaderboard(datasets[0], timeouts[0])
leaderboard = gr.Dataframe(value=initial_data)
def update_leaderboard(dataset, timeout):
filtered_data = filter_leaderboard(dataset, timeout)
return filtered_data
# leaderboard.update(value=filtered_data)
# return leaderboard.update(value=filtered_data)
dataset_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard)
timeout_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard)
with gr.Tab("Submit"):
file_upload = gr.File(label="Upload JSON File")
upload_button = gr.Button("Process File")
output_text = gr.Textbox(label="Output")
upload_button.click(process_uploaded_file, inputs=file_upload, outputs=output_text)
with gr.Accordion("Citation", open=True):
gr.Markdown(citation_md, elem_id="leaderboard_markdown")
gr.Markdown(acknowledgment_md, elem_id="ack_markdown")
|