fev-leaderboard / app.py
Oleksandr Shchur
Update leaderboard UI
d8795b1
import fev
import gradio as gr
import pandas as pd
from src import about
from src.custom_html_js import custom_css
from src.formatting import make_clickable_model
# Load the CSV data into a pandas DataFrame
df = pd.read_csv(
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
)
summary_urls = [
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
]
rename_cols = {
"gmean_relative_error": "Average relative error",
"avg_rank": "Average rank",
"median_inference_time_s": "Median inference time (s)",
"training_corpus_overlap": "Training corpus overlap (%)",
}
selected_cols = list(rename_cols.keys())
def highlight_zeroshot(styler):
"""Highlight training overlap for zero-shot models with bold green."""
def style_func(val):
if val == 0:
return "color: green; font-weight: bold"
else:
return "color: black"
return styler.map(style_func, subset=["Training corpus overlap (%)"])
leaderboards = {}
for metric in ["WQL", "MASE"]:
lb = fev.leaderboard(summary_urls, metric_column=metric)[selected_cols].rename(columns=rename_cols)
format_dict = {}
for col in lb.columns:
format_dict[col] = "{:.3f}" if col != "Training corpus overlap (%)" else "{:.1%}"
lb = lb.reset_index()
lb["model_name"] = lb["model_name"].apply(make_clickable_model)
leaderboards[metric] = highlight_zeroshot(lb.style.format(format_dict))
with gr.Blocks(css=custom_css) as demo:
gr.HTML(about.TITLE)
gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons"):
with gr.Tab("πŸ… Chronos Benchmark II", id=0):
with gr.Column():
gr.Markdown(about.CHRONOS_BENCHMARK, elem_classes="markdown-text")
with gr.Tabs():
with gr.Tab("πŸ“Š Probabilistic forecast (WQL)"):
gr.Markdown("""Forecast accuracy measured by Weighted Quantile Loss.""")
gr.Dataframe(
value=leaderboards["WQL"],
datatype=["markdown", "number", "number", "number"],
interactive=False,
)
with gr.Tab("πŸ“ˆ Point forecast (MASE)"):
gr.Markdown("""Forecast accuracy measured by Mean Absolute Scaled Error.""")
gr.Dataframe(
value=leaderboards["MASE"],
datatype=["markdown", "number", "number", "number"],
interactive=False,
)
with gr.Tab("πŸ“ About", id=1):
gr.Markdown(about.ABOUT_LEADERBOARD)
if __name__ == "__main__":
demo.launch()