Spaces:

autogluon
/

fev-leaderboard

Running

File size: 5,185 Bytes

import fev
import gradio as gr
import pandas as pd

from src import about
from src.custom_html_js import custom_css
from src.formatting import make_clickable_model

# Load the CSV data into a pandas DataFrame
df = pd.read_csv(
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
)


summary_urls = [
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm-2.0.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/ttm-r2.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/tirex.csv",
]

rename_cols = {
    "gmean_relative_error": "Average relative error",
    "avg_rank": "Average rank",
    "median_inference_time_s": "Median inference time (s)",
    "training_corpus_overlap": "Training corpus overlap (%)",
}
selected_cols = list(rename_cols.keys())


def highlight_zeroshot(styler):
    """Highlight training overlap for zero-shot models with bold green."""

    def style_func(val):
        if val == 0:
            return "color: green; font-weight: bold"
        else:
            return "color: black"

    return styler.map(style_func, subset=["Training corpus overlap (%)"])


leaderboards = {}
for metric in ["WQL", "MASE"]:
    lb = fev.leaderboard(summary_urls, metric_column=metric)[selected_cols].rename(columns=rename_cols)
    lb = lb.astype("float64").round(3).reset_index()
    lb["Training corpus overlap (%)"] = (lb["Training corpus overlap (%)"] * 100).round(1)
    lb["model_name"] = lb["model_name"].apply(make_clickable_model)
    leaderboards[metric] = highlight_zeroshot(lb.style).format(precision=3)


with gr.Blocks(css=custom_css) as demo:
    gr.HTML(about.TITLE)
    gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.Tab("🏅 Chronos Benchmark II", id=0):
            with gr.Column():
                gr.Markdown(about.CHRONOS_BENCHMARK, elem_classes="markdown-text")
            with gr.Tabs():
                with gr.Tab("📊 Probabilistic forecast (WQL)"):
                    gr.Markdown("""Forecast accuracy measured by Weighted Quantile Loss.""")
                    gr.Dataframe(
                        value=leaderboards["WQL"],
                        datatype=["markdown", "number", "number", "number"],
                        interactive=False,
                    )

                with gr.Tab("📈 Point forecast (MASE)"):
                    gr.Markdown("""Forecast accuracy measured by Mean Absolute Scaled Error.""")
                    gr.Dataframe(
                        value=leaderboards["MASE"],
                        datatype=["markdown", "number", "number", "number"],
                        interactive=False,
                    )

        with gr.Tab("📝 About", id=1):
            gr.Markdown(about.ABOUT_LEADERBOARD)

if __name__ == "__main__":
    demo.launch()