leaderboard / app.py
Paul Hager
remove
c314f20
raw
history blame
4.71 kB
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
EVAL_COLS,
EVAL_TYPES,
AutoEvalColumn,
ModelType,
fields,
WeightType,
Precision,
)
from src.envs import (
API,
EVAL_RESULTS_PATH_CDM,
EVAL_RESULTS_PATH_CDM_FI,
REPO_ID,
RESULTS_REPO_CDM,
RESULTS_REPO_CDM_FI,
TOKEN,
)
from src.populate import get_leaderboard_df
def restart_space():
API.restart_space(repo_id=REPO_ID)
### Space initialisation
try:
print(EVAL_RESULTS_PATH_CDM)
snapshot_download(
repo_id=RESULTS_REPO_CDM,
local_dir=EVAL_RESULTS_PATH_CDM,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
except Exception:
restart_space()
try:
print(EVAL_RESULTS_PATH_CDM_FI)
snapshot_download(
repo_id=RESULTS_REPO_CDM_FI,
local_dir=EVAL_RESULTS_PATH_CDM_FI,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
except Exception:
restart_space()
LEADERBOARD_DF_CDM = get_leaderboard_df(EVAL_RESULTS_PATH_CDM, COLS, BENCHMARK_COLS)
LEADERBOARD_DF_CDM_FI = get_leaderboard_df(EVAL_RESULTS_PATH_CDM_FI, COLS, BENCHMARK_COLS)
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
print("Warning: Empty dataframe provided to leaderboard")
return gr.Dataframe(
headers=COLS, datatype=[c.type for c in fields(AutoEvalColumn)], label="No results available"
)
print(f"Initializing leaderboard with {len(dataframe)} rows")
print(f"Columns: {dataframe.columns.tolist()}")
# Convert the dataframe to ensure proper types
for col in dataframe.columns:
if col == AutoEvalColumn.model.name:
# Keep model column as is since it contains HTML
continue
# elif col == AutoEvalColumn.still_on_hub.name:
# dataframe[col] = dataframe[col].astype(bool)
elif col in [AutoEvalColumn.seq_length.name, AutoEvalColumn.model_quantization_bits.name]:
dataframe[col] = dataframe[col].astype(int)
else:
# Convert other numeric columns to float
try:
dataframe[col] = dataframe[col].astype(float)
except:
pass
try:
return Leaderboard(
value=dataframe,
headers=COLS,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
label="Select Columns to Display:",
),
search_columns=[AutoEvalColumn.model.name],
interactive=False,
)
except Exception as e:
print(f"Error initializing leaderboard: {e}")
# Instead of showing error message, try simpler table display
return gr.Dataframe(
value=dataframe, headers=COLS, datatype=[c.type for c in fields(AutoEvalColumn)], interactive=False
)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("MIMIC CDM", elem_id="llm-benchmark-tab-table", id=0):
leaderboard_cdm = init_leaderboard(LEADERBOARD_DF_CDM)
with gr.TabItem("MIMIC CDM FI", elem_id="llm-benchmark-tab-table", id=1):
leaderboard_cdm_fi = init_leaderboard(LEADERBOARD_DF_CDM_FI)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch(share=True)