File size: 4,847 Bytes
0737cdd
fdb3d3e
 
 
d8795b1
 
 
 
5068c86
 
 
fdb3d3e
 
 
0737cdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
079b094
0737cdd
079b094
0737cdd
079b094
218d801
 
 
 
 
 
 
 
 
 
079b094
 
 
 
 
 
 
 
d8795b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdb3d3e
5068c86
e1ca246
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import fev
import gradio as gr
import pandas as pd

from src import about
from src.custom_html_js import custom_css
from src.formatting import make_clickable_model

# Load the CSV data into a pandas DataFrame
df = pd.read_csv(
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
)


summary_urls = [
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
]

rename_cols = {
    "gmean_relative_error": "Average relative error",
    "avg_rank": "Average rank",
    "median_inference_time_s": "Median inference time (s)",
    "training_corpus_overlap": "Training corpus overlap (%)",
}
selected_cols = list(rename_cols.keys())


def highlight_zeroshot(styler):
    """Highlight training overlap for zero-shot models with bold green."""

    def style_func(val):
        if val == 0:
            return "color: green; font-weight: bold"
        else:
            return "color: black"

    return styler.map(style_func, subset=["Training corpus overlap (%)"])


leaderboards = {}
for metric in ["WQL", "MASE"]:
    lb = fev.leaderboard(summary_urls, metric_column=metric)[selected_cols].rename(columns=rename_cols)
    format_dict = {}
    for col in lb.columns:
        format_dict[col] = "{:.3f}" if col != "Training corpus overlap (%)" else "{:.1%}"
    lb = lb.reset_index()
    lb["model_name"] = lb["model_name"].apply(make_clickable_model)
    leaderboards[metric] = highlight_zeroshot(lb.style.format(format_dict))


with gr.Blocks(css=custom_css) as demo:
    gr.HTML(about.TITLE)
    gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.Tab("πŸ… Chronos Benchmark II", id=0):
            with gr.Column():
                gr.Markdown(about.CHRONOS_BENCHMARK, elem_classes="markdown-text")
            with gr.Tabs():
                with gr.Tab("πŸ“Š Probabilistic forecast (WQL)"):
                    gr.Markdown("""Forecast accuracy measured by Weighted Quantile Loss.""")
                    gr.Dataframe(
                        value=leaderboards["WQL"],
                        datatype=["markdown", "number", "number", "number"],
                        interactive=False,
                    )

                with gr.Tab("πŸ“ˆ Point forecast (MASE)"):
                    gr.Markdown("""Forecast accuracy measured by Mean Absolute Scaled Error.""")
                    gr.Dataframe(
                        value=leaderboards["MASE"],
                        datatype=["markdown", "number", "number", "number"],
                        interactive=False,
                    )

        with gr.Tab("πŸ“ About", id=1):
            gr.Markdown(about.ABOUT_LEADERBOARD)

if __name__ == "__main__":
    demo.launch()