Taha Aksu commited on
Commit
05f0a7f
·
1 Parent(s): 1afd9bb

Update leaderboard structure and model configs

Browse files
results/Moirai_base/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "Moirai_base",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base",
6
  "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
 
1
  {
2
  "model": "Moirai_base",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base",
6
  "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
results/Moirai_large/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "Moirai_large",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
6
  "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
 
1
  {
2
  "model": "Moirai_large",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
6
  "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
results/Moirai_small/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "Moirai_small",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
6
  "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
 
1
  {
2
  "model": "Moirai_small",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
6
  "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
results/Toto_Open_Base_1.0/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "Toto_Open_Base_1.0",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Datadog/Toto-Open-Base-1.0",
6
  "org": "Datadog",
 
1
  {
2
  "model": "Toto_Open_Base_1.0",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/Datadog/Toto-Open-Base-1.0",
6
  "org": "Datadog",
results/YingLong_110m/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "YingLong_110m",
3
- "model_type": "pretrained",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_110m",
6
  "org": "Alibaba",
 
1
  {
2
  "model": "YingLong_110m",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_110m",
6
  "org": "Alibaba",
results/YingLong_300m/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "YingLong_300m",
3
- "model_type": "pretrained",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_300m",
6
  "org": "Alibaba",
 
1
  {
2
  "model": "YingLong_300m",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_300m",
6
  "org": "Alibaba",
results/YingLong_50m/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "YingLong_50m",
3
- "model_type": "pretrained",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_50m",
6
  "org": "Alibaba",
 
1
  {
2
  "model": "YingLong_50m",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_50m",
6
  "org": "Alibaba",
results/YingLong_6m/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "YingLong_6m",
3
- "model_type": "pretrained",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_6m",
6
  "org": "Alibaba",
 
1
  {
2
  "model": "YingLong_6m",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "bf16",
5
  "model_link": "https://huggingface.co/qcw2333/YingLong_6m",
6
  "org": "Alibaba",
results/sundial_base_128m/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "sundial_base_128m",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/thuml/sundial-base-128m",
6
  "org": "THUML @ Tsinghua University",
 
1
  {
2
  "model": "sundial_base_128m",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://huggingface.co/thuml/sundial-base-128m",
6
  "org": "THUML @ Tsinghua University",
results/tabpfn_ts/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "TabPFN-TS",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://github.com/liam-sbhoo/tabpfn-time-series/tree/main",
6
  "org": "PriorLabs",
 
1
  {
2
  "model": "TabPFN-TS",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://github.com/liam-sbhoo/tabpfn-time-series/tree/main",
6
  "org": "PriorLabs",
results/visionts/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model": "VisionTS",
3
- "model_type": "pretrained",
4
  "model_dtype": "float32",
5
  "model_link": "https://github.com/Keytoyze/VisionTS",
6
  "org": "Zhejiang University",
 
1
  {
2
  "model": "VisionTS",
3
+ "model_type": "zero-shot",
4
  "model_dtype": "float32",
5
  "model_link": "https://github.com/Keytoyze/VisionTS",
6
  "org": "Zhejiang University",
src/about.py CHANGED
@@ -43,14 +43,18 @@ points, spanning seven domains, 10 frequencies, multivariate inputs, and predict
43
  # Which evaluations are you running? how can people reproduce what you have?
44
  LLM_BENCHMARKS_TEXT = f"""
45
  ## Update Log
46
-
47
- ### 2025‑07‑24
48
- - Corrected the Naive and Seasonal Naive scores to match the latest GIFT‑Eval notebooks. Most model rankings remain unchanged; only a few near the bottom shifted slightly (AutoETS and Timer each dropped two places now at 35th and 36th places respectively, while NBEATS moved up one now at 27th place).
49
 
50
  ### 2025-08-05
51
  - Added new columns to the leaderboard: Organization, TestData Leakage, and MASE_Rank. TestData Leakage is a binary indicator specifying whether any test data was present in the training set. MASE_Rank reflects the model's ranking based on the MASE metric, aligned with the ranking scheme used for CRPS_Rank. These additions were made in response to multiple requests from independent groups seeking fairer comparisons. With these updates, the leaderboard now supports sorting by models that do not leak test data, and viewers can choose to rank models based on either MASE_Rank or CRPS_Rank, depending on their use case.
52
  - Added new model type: Agentic to indicate submissions that use agentic system to generate the forecasts.
53
 
 
 
 
 
 
54
  ## How It Works
55
 
56
  To participate in the GIFT-Eval leaderboard, follow these steps to evaluate your Time Series Model:
 
43
  # Which evaluations are you running? how can people reproduce what you have?
44
  LLM_BENCHMARKS_TEXT = f"""
45
  ## Update Log
46
+ ### 2025-08-25
47
+ - Added new model type: Zero-shot to distinguish between foundation model submissions that don't use training data of GIFT-Eval. Now models tagged with zero-shot indicate that the model is not trained on the GIFT-Eval training data. Test data leakage is still separately tracked with the TestData Leakage column. For a model be tagged as `zero-shot`, it must both not have test data leakage and not use any training split from GIFT-Eval.
 
48
 
49
  ### 2025-08-05
50
  - Added new columns to the leaderboard: Organization, TestData Leakage, and MASE_Rank. TestData Leakage is a binary indicator specifying whether any test data was present in the training set. MASE_Rank reflects the model's ranking based on the MASE metric, aligned with the ranking scheme used for CRPS_Rank. These additions were made in response to multiple requests from independent groups seeking fairer comparisons. With these updates, the leaderboard now supports sorting by models that do not leak test data, and viewers can choose to rank models based on either MASE_Rank or CRPS_Rank, depending on their use case.
51
  - Added new model type: Agentic to indicate submissions that use agentic system to generate the forecasts.
52
 
53
+
54
+ ### 2025‑07‑24
55
+ - Corrected the Naive and Seasonal Naive scores to match the latest GIFT‑Eval notebooks. Most model rankings remain unchanged; only a few near the bottom shifted slightly (AutoETS and Timer each dropped two places now at 35th and 36th places respectively, while NBEATS moved up one now at 27th place).
56
+
57
+
58
  ## How It Works
59
 
60
  To participate in the GIFT-Eval leaderboard, follow these steps to evaluate your Time Series Model:
src/display/utils.py CHANGED
@@ -61,6 +61,7 @@ class ModelDetails:
61
 
62
  class ModelType(Enum):
63
  PT = ModelDetails(name="🟢 pretrained", symbol="🟢")
 
64
  FT = ModelDetails(name="🟣 fine-tuned", symbol="🟣")
65
  AG = ModelDetails(name="🟡 agentic", symbol="🟡")
66
  DL = ModelDetails(name="🔷 deep-learning", symbol="🔷")
@@ -78,6 +79,8 @@ class ModelType(Enum):
78
  return ModelType.FT
79
  if "pretrained" in type or "🟢" in type:
80
  return ModelType.PT
 
 
81
  if "agentic" in type or "🟡" in type:
82
  return ModelType.AG
83
  if "deep-learning" in type or "🟦" in type:
 
61
 
62
  class ModelType(Enum):
63
  PT = ModelDetails(name="🟢 pretrained", symbol="🟢")
64
+ ZT = ModelDetails(name="🔴 zero-shot", symbol="🔴")
65
  FT = ModelDetails(name="🟣 fine-tuned", symbol="🟣")
66
  AG = ModelDetails(name="🟡 agentic", symbol="🟡")
67
  DL = ModelDetails(name="🔷 deep-learning", symbol="🔷")
 
79
  return ModelType.FT
80
  if "pretrained" in type or "🟢" in type:
81
  return ModelType.PT
82
+ if "zero-shot" in type or "🔴" in type:
83
+ return ModelType.ZT
84
  if "agentic" in type or "🟡" in type:
85
  return ModelType.AG
86
  if "deep-learning" in type or "🟦" in type: