import gradio as gr import pandas as pd import json from gradio_leaderboard import Leaderboard, SelectColumns from apscheduler.schedulers.background import BackgroundScheduler from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( AutoEvalColumn, fields ) from src.envs import API, REPO_ID def restart_space(): API.restart_space(repo_id=REPO_ID) def init_leaderboard(data_file): with open(data_file, "r") as fp: data = json.load(fp) dataframe = pd.DataFrame() for key, value in data.items(): col_df = pd.DataFrame(value) col_df.rename(columns={"Pass_at_1": key}, inplace=True) dataframe = col_df if dataframe.empty else dataframe.merge(col_df, on=['Context', 'Method', 'Model'], how='outer') dataframe['Score'] = dataframe.drop(columns=['Context', 'Method', 'Model']).sum(axis=1) / 5 numeric_cols = dataframe.select_dtypes(include='number').columns dataframe[numeric_cols] = dataframe[numeric_cols].apply(lambda x: x * 100).round(1) cols = list(dataframe.columns) cols.remove('Score') cols.insert(3, 'Score') dataframe = dataframe[cols] cols.insert(3, cols.pop(cols.index('Score'))) dataframe = dataframe.sort_values(by='Score', ascending=False) return gr.components.DataFrame( value=dataframe, headers=[c.name for c in fields(AutoEvalColumn) if not c.hidden], datatype=[c.type for c in fields(AutoEvalColumn)], interactive=False, ) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.HTML(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("[Method] Evaluation", elem_id="llm-benchmark-tab-table", id=0): leaderboard = init_leaderboard("./data/data_method.json") with gr.TabItem("[Context] Evaluation", elem_id="llm-benchmark-tab-table", id=1): leaderboard = init_leaderboard("./data/data_context.json") with gr.TabItem("[Incremental] Evaluation", elem_id="llm-benchmark-tab-table", id=2): leaderboard = init_leaderboard("./data/data_incr-order.json") # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=3): with gr.Column(): with gr.Row(): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.queue(default_concurrency_limit=40).launch()