|
import gradio as gr |
|
import pandas as pd |
|
import json |
|
from gradio_leaderboard import Leaderboard, SelectColumns |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
|
from src.about import ( |
|
CITATION_BUTTON_LABEL, |
|
CITATION_BUTTON_TEXT, |
|
EVALUATION_QUEUE_TEXT, |
|
INTRODUCTION_TEXT, |
|
TITLE, |
|
) |
|
from src.display.css_html_js import custom_css |
|
from src.display.utils import ( |
|
AutoEvalColumn, |
|
fields |
|
) |
|
from src.envs import API, REPO_ID |
|
|
|
|
|
def restart_space(): |
|
API.restart_space(repo_id=REPO_ID) |
|
|
|
def init_leaderboard(data_file): |
|
with open(data_file, "r") as fp: |
|
data = json.load(fp) |
|
|
|
dataframe = pd.DataFrame() |
|
for key, value in data.items(): |
|
col_df = pd.DataFrame(value) |
|
col_df.rename(columns={"Pass_at_1": key}, inplace=True) |
|
dataframe = col_df if dataframe.empty else dataframe.merge(col_df, on=['Context', 'Method', 'Model'], how='outer') |
|
|
|
dataframe['Score'] = dataframe.drop(columns=['Context', 'Method', 'Model']).sum(axis=1) / 5 |
|
numeric_cols = dataframe.select_dtypes(include='number').columns |
|
dataframe[numeric_cols] = dataframe[numeric_cols].apply(lambda x: x * 100).round(1) |
|
|
|
cols = list(dataframe.columns) |
|
cols.remove('Score') |
|
cols.insert(3, 'Score') |
|
dataframe = dataframe[cols] |
|
cols.insert(3, cols.pop(cols.index('Score'))) |
|
|
|
dataframe = dataframe.sort_values(by='Score', ascending=False) |
|
|
|
return gr.components.DataFrame( |
|
value=dataframe, |
|
headers=[c.name for c in fields(AutoEvalColumn) if not c.hidden], |
|
datatype=[c.type for c in fields(AutoEvalColumn)], |
|
interactive=False, |
|
) |
|
|
|
demo = gr.Blocks(css=custom_css) |
|
with demo: |
|
gr.HTML(TITLE) |
|
gr.HTML(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("[Method] Evaluation", elem_id="llm-benchmark-tab-table", id=0): |
|
leaderboard = init_leaderboard("./data/data_method.json") |
|
|
|
with gr.TabItem("[Context] Evaluation", elem_id="llm-benchmark-tab-table", id=1): |
|
leaderboard = init_leaderboard("./data/data_context.json") |
|
|
|
with gr.TabItem("[Incremental] Evaluation", elem_id="llm-benchmark-tab-table", id=2): |
|
leaderboard = init_leaderboard("./data/data_incr-order.json") |
|
|
|
|
|
|
|
|
|
with gr.TabItem("π Submission", elem_id="llm-benchmark-tab-table", id=3): |
|
with gr.Column(): |
|
with gr.Row(): |
|
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") |
|
|
|
with gr.Row(): |
|
with gr.Accordion("π Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
label=CITATION_BUTTON_LABEL, |
|
lines=20, |
|
elem_id="citation-button", |
|
show_copy_button=True, |
|
) |
|
|
|
scheduler = BackgroundScheduler() |
|
scheduler.add_job(restart_space, "interval", seconds=1800) |
|
scheduler.start() |
|
demo.queue(default_concurrency_limit=40).launch() |