Spaces:

Salesforce
/

GIFT-Eval

Running

App Files Files Community

Taha Aksu commited on 1 day ago

Commit

05f0a7f

1 Parent(s): 1afd9bb

Update leaderboard structure and model configs

Browse files

Files changed (13) hide show

results/Moirai_base/config.json +1 -1
results/Moirai_large/config.json +1 -1
results/Moirai_small/config.json +1 -1
results/Toto_Open_Base_1.0/config.json +1 -1
results/YingLong_110m/config.json +1 -1
results/YingLong_300m/config.json +1 -1
results/YingLong_50m/config.json +1 -1
results/YingLong_6m/config.json +1 -1
results/sundial_base_128m/config.json +1 -1
results/tabpfn_ts/config.json +1 -1
results/visionts/config.json +1 -1
src/about.py +7 -3
src/display/utils.py +3 -0

results/Moirai_base/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "Moirai_base",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base",
     "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",

 {
     "model": "Moirai_base",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base",
     "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",

results/Moirai_large/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "Moirai_large",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
     "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",

 {
     "model": "Moirai_large",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
     "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",

results/Moirai_small/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "Moirai_small",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
     "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",

 {
     "model": "Moirai_small",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
     "code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",

results/Toto_Open_Base_1.0/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "Toto_Open_Base_1.0",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Datadog/Toto-Open-Base-1.0",
     "org": "Datadog",

 {
     "model": "Toto_Open_Base_1.0",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/Datadog/Toto-Open-Base-1.0",
     "org": "Datadog",

results/YingLong_110m/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "YingLong_110m",
-    "model_type": "pretrained",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_110m",
     "org": "Alibaba",

 {
     "model": "YingLong_110m",
+    "model_type": "zero-shot",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_110m",
     "org": "Alibaba",

results/YingLong_300m/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "YingLong_300m",
-    "model_type": "pretrained",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_300m",
     "org": "Alibaba",

 {
     "model": "YingLong_300m",
+    "model_type": "zero-shot",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_300m",
     "org": "Alibaba",

results/YingLong_50m/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "YingLong_50m",
-    "model_type": "pretrained",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_50m",
     "org": "Alibaba",

 {
     "model": "YingLong_50m",
+    "model_type": "zero-shot",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_50m",
     "org": "Alibaba",

results/YingLong_6m/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "YingLong_6m",
-    "model_type": "pretrained",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_6m",
     "org": "Alibaba",

 {
     "model": "YingLong_6m",
+    "model_type": "zero-shot",
     "model_dtype": "bf16",
     "model_link": "https://huggingface.co/qcw2333/YingLong_6m",
     "org": "Alibaba",

results/sundial_base_128m/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "sundial_base_128m",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/thuml/sundial-base-128m",
     "org": "THUML @ Tsinghua University",

 {
     "model": "sundial_base_128m",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://huggingface.co/thuml/sundial-base-128m",
     "org": "THUML @ Tsinghua University",

results/tabpfn_ts/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "TabPFN-TS",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://github.com/liam-sbhoo/tabpfn-time-series/tree/main",
     "org": "PriorLabs",

 {
     "model": "TabPFN-TS",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://github.com/liam-sbhoo/tabpfn-time-series/tree/main",
     "org": "PriorLabs",

results/visionts/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model": "VisionTS",
-    "model_type": "pretrained",
     "model_dtype": "float32",
     "model_link": "https://github.com/Keytoyze/VisionTS",
     "org": "Zhejiang University",

 {
     "model": "VisionTS",
+    "model_type": "zero-shot",
     "model_dtype": "float32",
     "model_link": "https://github.com/Keytoyze/VisionTS",
     "org": "Zhejiang University",

src/about.py CHANGED Viewed

@@ -43,14 +43,18 @@ points, spanning seven domains, 10 frequencies, multivariate inputs, and predict
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## Update Log
-### 2025‑07‑24
-- Corrected the Naive and Seasonal Naive scores to match the latest GIFT‑Eval notebooks. Most model rankings remain unchanged; only a few near the bottom shifted slightly (AutoETS and Timer each dropped two places now at 35th and 36th places respectively, while NBEATS moved up one now at 27th place).
 ### 2025-08-05
 - Added new columns to the leaderboard: Organization, TestData Leakage, and MASE_Rank. TestData Leakage is a binary indicator specifying whether any test data was present in the training set. MASE_Rank reflects the model's ranking based on the MASE metric, aligned with the ranking scheme used for CRPS_Rank. These additions were made in response to multiple requests from independent groups seeking fairer comparisons. With these updates, the leaderboard now supports sorting by models that do not leak test data, and viewers can choose to rank models based on either MASE_Rank or CRPS_Rank, depending on their use case.
 - Added new model type: Agentic to indicate submissions that use agentic system to generate the forecasts.
 ## How It Works
 To participate in the GIFT-Eval leaderboard, follow these steps to evaluate your Time Series Model:

 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## Update Log
+### 2025-08-25
+- Added new model type: Zero-shot to distinguish between foundation model submissions that don't use training data of GIFT-Eval. Now models tagged with zero-shot indicate that the model is not trained on the GIFT-Eval training data. Test data leakage is still separately tracked with the TestData Leakage column. For a model be tagged as `zero-shot`, it must both not have test data leakage and not use any training split from GIFT-Eval.
 ### 2025-08-05
 - Added new columns to the leaderboard: Organization, TestData Leakage, and MASE_Rank. TestData Leakage is a binary indicator specifying whether any test data was present in the training set. MASE_Rank reflects the model's ranking based on the MASE metric, aligned with the ranking scheme used for CRPS_Rank. These additions were made in response to multiple requests from independent groups seeking fairer comparisons. With these updates, the leaderboard now supports sorting by models that do not leak test data, and viewers can choose to rank models based on either MASE_Rank or CRPS_Rank, depending on their use case.
 - Added new model type: Agentic to indicate submissions that use agentic system to generate the forecasts.
+### 2025‑07‑24
+- Corrected the Naive and Seasonal Naive scores to match the latest GIFT‑Eval notebooks. Most model rankings remain unchanged; only a few near the bottom shifted slightly (AutoETS and Timer each dropped two places now at 35th and 36th places respectively, while NBEATS moved up one now at 27th place).
 ## How It Works
 To participate in the GIFT-Eval leaderboard, follow these steps to evaluate your Time Series Model:

src/display/utils.py CHANGED Viewed

@@ -61,6 +61,7 @@ class ModelDetails:
 class ModelType(Enum):
     PT = ModelDetails(name="🟢 pretrained", symbol="🟢")
     FT = ModelDetails(name="🟣 fine-tuned", symbol="🟣")
     AG = ModelDetails(name="🟡 agentic", symbol="🟡")
     DL = ModelDetails(name="🔷 deep-learning", symbol="🔷")
@@ -78,6 +79,8 @@ class ModelType(Enum):
             return ModelType.FT
         if "pretrained" in type or "🟢" in type:
             return ModelType.PT
         if "agentic" in type or "🟡" in type:
             return ModelType.AG
         if "deep-learning" in type or "🟦" in type:

 class ModelType(Enum):
     PT = ModelDetails(name="🟢 pretrained", symbol="🟢")
+    ZT = ModelDetails(name="🔴 zero-shot", symbol="🔴")
     FT = ModelDetails(name="🟣 fine-tuned", symbol="🟣")
     AG = ModelDetails(name="🟡 agentic", symbol="🟡")
     DL = ModelDetails(name="🔷 deep-learning", symbol="🔷")
             return ModelType.FT
         if "pretrained" in type or "🟢" in type:
             return ModelType.PT
+        if "zero-shot" in type or "🔴" in type:
+            return ModelType.ZT
         if "agentic" in type or "🟡" in type:
             return ModelType.AG
         if "deep-learning" in type or "🟦" in type: