Spaces:

Datadog
/

BOOM

Runtime error

App Files Files Community

annamonica commited on May 20

Commit

c3ba57d

1 Parent(s): bdacdff

parse model config from json files and display clickable links under Model column

Browse files

Files changed (5) hide show

app.py +15 -4
results/BOOM_leaderboard.csv +15 -15
src/display/formatting.py +12 -2
src/leaderboard/read_evals.py +73 -21
src/populate.py +19 -16

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
 from src.about import (
     CITATION_BUTTON_LABEL,
@@ -66,6 +66,7 @@ LEADERBOARD_DF = get_leaderboard_df(
 LEADERBOARD_DF_DOMAIN = get_leaderboard_df(
     EVAL_RESULTS_PATH + "/" + "BOOM_leaderboard.csv", EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS
 )
 # (
 #     finished_eval_queue_df,
@@ -74,12 +75,21 @@ LEADERBOARD_DF_DOMAIN = get_leaderboard_df(
 # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
     # TODO: merge results df with model info df
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     return Leaderboard(
-        value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn)],
         select_columns=SelectColumns(
             default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
@@ -92,6 +102,7 @@ def init_leaderboard(dataframe):
             ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
         ],
         bool_checkboxgroup_label="Hide models",
         interactive=False,
     )
@@ -103,7 +114,7 @@ with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 Overall", elem_id="boom-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
         # TODO - add other tabs if needed
         # with gr.TabItem("🏅 By Domain - TODO", elem_id="boom-benchmark-tab-table", id=1):

 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
+from src.populate import get_model_info_df, get_merged_df
 from src.about import (
     CITATION_BUTTON_LABEL,
 LEADERBOARD_DF_DOMAIN = get_leaderboard_df(
     EVAL_RESULTS_PATH + "/" + "BOOM_leaderboard.csv", EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS
 )
+model_info_df = get_model_info_df(EVAL_RESULTS_PATH)
 # (
 #     finished_eval_queue_df,
 # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+def init_leaderboard(dataframe, model_info_df):
     # TODO: merge results df with model info df
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
+    merged_df = get_merged_df(dataframe, model_info_df)
+    merged_df = merged_df.sort_values(by=[AutoEvalColumn.Rank_6750_scaled.name], ascending=True)
+    # Move the model_type_symbol column to the beginning
+    cols = [AutoEvalColumn.model_type_symbol.name] + [
+        col for col in merged_df.columns if col != AutoEvalColumn.model_type_symbol.name
+    ]
+    merged_df = merged_df[cols]
     return Leaderboard(
+        value=merged_df,
         datatype=[c.type for c in fields(AutoEvalColumn)],
         select_columns=SelectColumns(
             default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
             ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
         ],
         bool_checkboxgroup_label="Hide models",
+        column_widths=[40, 150] + [180 for _ in range(len(merged_df.columns) - 2)],
         interactive=False,
     )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 Overall", elem_id="boom-benchmark-tab-table", id=0):
+            leaderboard = init_leaderboard(LEADERBOARD_DF, model_info_df)
         # TODO - add other tabs if needed
         # with gr.TabItem("🏅 By Domain - TODO", elem_id="boom-benchmark-tab-table", id=1):

results/BOOM_leaderboard.csv CHANGED Viewed

@@ -1,15 +1,15 @@
-model,model_type,MASE_6750_scaled,CRPS_6750_scaled,Rank_6750_scaled
-Toto-Open-Base-1.0,pretrained,0.617,0.375,2.336
-moirai_1.1_base,pretrained,0.710,0.428,4.253
-moirai_1.1_large,pretrained,0.720,0.436,4.481
-moirai_1.1_small,pretrained,0.729,0.442,4.820
-timesfm_2_0_500m,pretrained,0.725,0.447,5.155
-chronos_bolt_base,pretrained,0.726,0.451,5.447
-chronos_bolt_small,pretrained,0.733,0.455,5.792
-autoarima,statistical,0.824,0.736,9.166
-timer,pretrained,0.796,0.639,9.370
-time-moe,pretrained,0.806,0.649,9.381
-visionts,pretrained,0.988,0.673,10.317
-autoets,statistical,0.842,1.975,10.968
-autotheta,statistical,1.123,1.018,11.724
-seasonalnaive,statistical,1.000,1.000,11.791

+model,MASE_6750_scaled,CRPS_6750_scaled,Rank_6750_scaled
+Toto-Open-Base-1.0,0.617,0.375,2.336
+moirai_1.1_base,0.710,0.428,4.253
+moirai_1.1_large,0.720,0.436,4.481
+moirai_1.1_small,0.729,0.442,4.820
+timesfm_2_0_500m,0.725,0.447,5.155
+chronos_bolt_base,0.726,0.451,5.447
+chronos_bolt_small,0.733,0.455,5.792
+autoarima,0.824,0.736,9.166
+timer,0.796,0.639,9.370
+time-moe,0.806,0.649,9.381
+visionts,0.988,0.673,10.317
+autoets,0.842,1.975,10.968
+autotheta,1.123,1.018,11.724
+seasonalnaive,1.000,1.000,11.791

src/display/formatting.py CHANGED Viewed

@@ -1,5 +1,15 @@
-def model_hyperlink(link, model_name):
-    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def make_clickable_model(model_name):

+def model_hyperlink(model_link, code_link, model_name):
+    if model_link == "":
+        return model_name
+        # return f'<a target="_blank">{model_name}</a>'
+        # return f'<a target="_blank" href="{link}" rel="noopener noreferrer">{model_name}</a>'
+    else:
+        model_url = f'<a target="_blank" href="{model_link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+        if code_link == "":
+            return model_url
+        else:
+            code_url = f'<a target="_blank" href="{code_link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">code</a>'
+            return f"{model_url} ({code_url})"
 def make_clickable_model(model_name):

src/leaderboard/read_evals.py CHANGED Viewed

@@ -3,33 +3,33 @@ import json
 import math
 import os
 from dataclasses import dataclass
 import dateutil
 import numpy as np
-from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
 from src.submission.check_validity import is_model_on_hub
 @dataclass
 class EvalResult:
-    """Represents one full evaluation. Built from a combination of the result and request file for a given run.
-    """
-    eval_name: str # org_model_precision (uid)
-    full_model: str # org/model (path on hub)
-    org: str
     model: str
-    revision: str # commit hash, "" if main
     results: dict
     precision: Precision = Precision.Unknown
-    model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
-    weight_type: WeightType = WeightType.Original # Original or Adapter
-    architecture: str = "Unknown"
     license: str = "?"
     likes: int = 0
     num_params: int = 0
-    date: str = "" # submission date of request file
     still_on_hub: bool = False
     @classmethod
@@ -85,10 +85,10 @@ class EvalResult:
             org=org,
             model=model,
             results=results,
-            precision=precision,
-            revision= config.get("model_sha", ""),
             still_on_hub=still_on_hub,
-            architecture=architecture
         )
     def update_with_request_file(self, requests_path):
@@ -105,7 +105,9 @@ class EvalResult:
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
-            print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
@@ -132,6 +134,59 @@ class EvalResult:
         return data_dict
 def get_request_file_for_model(requests_path, model_name, precision):
     """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
     request_files = os.path.join(
@@ -146,10 +201,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
     for tmp_request_file in request_files:
         with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
-            if (
-                req_content["status"] in ["FINISHED"]
-                and req_content["precision"] == precision.split(".")[-1]
-            ):
                 request_file = tmp_request_file
     return request_file
@@ -188,7 +240,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     results = []
     for v in eval_results.values():
         try:
-            v.to_dict() # we test if the dict version is complete
             results.append(v)
         except KeyError:  # not all eval values present
             continue

 import math
 import os
 from dataclasses import dataclass
+from pathlib import Path
 import dateutil
 import numpy as np
+from src.display.formatting import make_clickable_model, model_hyperlink
 from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
 from src.submission.check_validity import is_model_on_hub
 @dataclass
 class EvalResult:
+    """Represents one full evaluation. Built from a combination of the result and request file for a given run."""
+    eval_name: str  # org_model_precision (uid)
+    full_model: str  # org/model (path on hub)
+    org: str
     model: str
+    revision: str  # commit hash, "" if main
     results: dict
     precision: Precision = Precision.Unknown
+    model_type: ModelType = ModelType.Unknown  # Pretrained, fine tuned, ...
+    weight_type: WeightType = WeightType.Original  # Original or Adapter
+    architecture: str = "Unknown"
     license: str = "?"
     likes: int = 0
     num_params: int = 0
+    date: str = ""  # submission date of request file
     still_on_hub: bool = False
     @classmethod
             org=org,
             model=model,
             results=results,
+            precision=precision,
+            revision=config.get("model_sha", ""),
             still_on_hub=still_on_hub,
+            architecture=architecture,
         )
     def update_with_request_file(self, requests_path):
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
+            print(
+                f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}"
+            )
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
         return data_dict
+@dataclass
+class ModelConfig:
+    """Represents the model configuration of a model"""
+    model: str
+    tmp_name: str
+    model_link: str = ""
+    model_type: ModelType = ModelType.Unknown
+    code_link: str = ""
+    @classmethod
+    def init_from_json_file(cls, json_filepath):
+        """Inits the result from the specific model result file"""
+        with open(json_filepath) as fp:
+            data = json.load(fp)
+        model_type = ModelType.from_str(data.get("model_type", ""))
+        model = data.get("model", "")
+        tmp_name = data.get("tmp_name", "")
+        model_link = data.get("model_link", "")
+        code_link = data.get("code_link", "")
+        return cls(model=model, tmp_name=tmp_name, model_link=model_link, model_type=model_type, code_link=code_link)
+    def to_dict(self):
+        """Converts the model info to a dict compatible with our dataframe display"""
+        data_dict = {
+            AutoEvalColumn.model.name: self.model,
+            "model_w_link": model_hyperlink(self.model_link, self.code_link, self.model),
+            AutoEvalColumn.model_type.name: self.model_type.value.name,
+            AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
+            "tmp_name": self.tmp_name,
+        }
+        return data_dict
+def get_model_info(results_path: str) -> list[ModelConfig]:
+    """From the path of the results folder root, extract all needed info for results"""
+    model_result_filepaths = (Path(results_path) / "models_info").glob("**/config.json")
+    model_info_list = []
+    for model_result_filepath in model_result_filepaths:
+        # Creation of result
+        model_info = ModelConfig.init_from_json_file(model_result_filepath)
+        try:
+            model_info.to_dict()  # we test if the dict version is complete
+            model_info_list.append(model_info)
+        except KeyError:  # not all eval values present
+            continue
+    return model_info_list
 def get_request_file_for_model(requests_path, model_name, precision):
     """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
     request_files = os.path.join(
     for tmp_request_file in request_files:
         with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
+            if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
                 request_file = tmp_request_file
     return request_file
     results = []
     for v in eval_results.values():
         try:
+            v.to_dict()  # we test if the dict version is complete
             results.append(v)
         except KeyError:  # not all eval values present
             continue

src/populate.py CHANGED Viewed

@@ -5,10 +5,28 @@ import pandas as pd
 from dataclasses import fields
 from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
-from src.leaderboard.read_evals import get_raw_eval_results
 from src.display.utils import ModelType
 # def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
 #     """Creates a dataframe from all the individual experiment results"""
 #     raw_data = get_raw_eval_results(results_path, requests_path)
@@ -45,21 +63,6 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     # Assuming `df` is your DataFrame:
     df.rename(columns=column_mapping, inplace=True)
-    # Create a new column for model type symbol by parsing the model_type column
-    df[AutoEvalColumn.model_type_symbol.name] = df[AutoEvalColumn.model_type.name].apply(
-        lambda x: ModelType.from_str(x).value.symbol
-    )
-    # Prepend the value of model_type_symbol to the value of model_type
-    df[AutoEvalColumn.model_type.name] = (
-        df[AutoEvalColumn.model_type_symbol.name] + " " + df[AutoEvalColumn.model_type.name]
-    )
-    # Move the model_type_symbol column to the beginning
-    cols = [AutoEvalColumn.model_type_symbol.name] + [
-        col for col in df.columns if col != AutoEvalColumn.model_type_symbol.name
-    ]
-    df = df[cols]
     df = df.sort_values(by=[AutoEvalColumn.Rank_6750_scaled.name], ascending=True)
     return df

 from dataclasses import fields
 from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
+from src.leaderboard.read_evals import get_model_info
 from src.display.utils import ModelType
+def get_model_info_df(results_path: str) -> pd.DataFrame:
+    """Creates a dataframe from all the individual experiment results"""
+    raw_data = get_model_info(results_path)
+    all_data_json = [v.to_dict() for v in raw_data]
+    df = pd.DataFrame.from_records(all_data_json)
+    return df
+def get_merged_df(result_df: pd.DataFrame, model_info_df: pd.DataFrame) -> pd.DataFrame:
+    """Merges the model info dataframe with the results dataframe"""
+    result_df = result_df.rename(columns={"Model": "tmp_name"})
+    merged_df = pd.merge(model_info_df, result_df, on="tmp_name", how="inner")
+    assert len(merged_df) == len(result_df)
+    merged_df = merged_df.drop(columns=["Model", "tmp_name"])
+    merged_df = merged_df.rename(columns={"model_w_link": "Model"})
+    return merged_df
 # def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
 #     """Creates a dataframe from all the individual experiment results"""
 #     raw_data = get_raw_eval_results(results_path, requests_path)
     # Assuming `df` is your DataFrame:
     df.rename(columns=column_mapping, inplace=True)
     df = df.sort_values(by=[AutoEvalColumn.Rank_6750_scaled.name], ascending=True)
     return df