annamonica commited on
Commit
c3ba57d
·
1 Parent(s): bdacdff

parse model config from json files and display clickable links under Model column

Browse files
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
@@ -66,6 +66,7 @@ LEADERBOARD_DF = get_leaderboard_df(
66
  LEADERBOARD_DF_DOMAIN = get_leaderboard_df(
67
  EVAL_RESULTS_PATH + "/" + "BOOM_leaderboard.csv", EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS
68
  )
 
69
 
70
  # (
71
  # finished_eval_queue_df,
@@ -74,12 +75,21 @@ LEADERBOARD_DF_DOMAIN = get_leaderboard_df(
74
  # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
75
 
76
 
77
- def init_leaderboard(dataframe):
78
  # TODO: merge results df with model info df
79
  if dataframe is None or dataframe.empty:
80
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
 
 
 
 
 
 
 
 
81
  return Leaderboard(
82
- value=dataframe,
83
  datatype=[c.type for c in fields(AutoEvalColumn)],
84
  select_columns=SelectColumns(
85
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
@@ -92,6 +102,7 @@ def init_leaderboard(dataframe):
92
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
93
  ],
94
  bool_checkboxgroup_label="Hide models",
 
95
  interactive=False,
96
  )
97
 
@@ -103,7 +114,7 @@ with demo:
103
 
104
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
105
  with gr.TabItem("🏅 Overall", elem_id="boom-benchmark-tab-table", id=0):
106
- leaderboard = init_leaderboard(LEADERBOARD_DF)
107
 
108
  # TODO - add other tabs if needed
109
  # with gr.TabItem("🏅 By Domain - TODO", elem_id="boom-benchmark-tab-table", id=1):
 
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
+ from src.populate import get_model_info_df, get_merged_df
6
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
 
66
  LEADERBOARD_DF_DOMAIN = get_leaderboard_df(
67
  EVAL_RESULTS_PATH + "/" + "BOOM_leaderboard.csv", EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS
68
  )
69
+ model_info_df = get_model_info_df(EVAL_RESULTS_PATH)
70
 
71
  # (
72
  # finished_eval_queue_df,
 
75
  # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
76
 
77
 
78
+ def init_leaderboard(dataframe, model_info_df):
79
  # TODO: merge results df with model info df
80
  if dataframe is None or dataframe.empty:
81
  raise ValueError("Leaderboard DataFrame is empty or None.")
82
+
83
+ merged_df = get_merged_df(dataframe, model_info_df)
84
+ merged_df = merged_df.sort_values(by=[AutoEvalColumn.Rank_6750_scaled.name], ascending=True)
85
+
86
+ # Move the model_type_symbol column to the beginning
87
+ cols = [AutoEvalColumn.model_type_symbol.name] + [
88
+ col for col in merged_df.columns if col != AutoEvalColumn.model_type_symbol.name
89
+ ]
90
+ merged_df = merged_df[cols]
91
  return Leaderboard(
92
+ value=merged_df,
93
  datatype=[c.type for c in fields(AutoEvalColumn)],
94
  select_columns=SelectColumns(
95
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
 
102
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
103
  ],
104
  bool_checkboxgroup_label="Hide models",
105
+ column_widths=[40, 150] + [180 for _ in range(len(merged_df.columns) - 2)],
106
  interactive=False,
107
  )
108
 
 
114
 
115
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
116
  with gr.TabItem("🏅 Overall", elem_id="boom-benchmark-tab-table", id=0):
117
+ leaderboard = init_leaderboard(LEADERBOARD_DF, model_info_df)
118
 
119
  # TODO - add other tabs if needed
120
  # with gr.TabItem("🏅 By Domain - TODO", elem_id="boom-benchmark-tab-table", id=1):
results/BOOM_leaderboard.csv CHANGED
@@ -1,15 +1,15 @@
1
- model,model_type,MASE_6750_scaled,CRPS_6750_scaled,Rank_6750_scaled
2
- Toto-Open-Base-1.0,pretrained,0.617,0.375,2.336
3
- moirai_1.1_base,pretrained,0.710,0.428,4.253
4
- moirai_1.1_large,pretrained,0.720,0.436,4.481
5
- moirai_1.1_small,pretrained,0.729,0.442,4.820
6
- timesfm_2_0_500m,pretrained,0.725,0.447,5.155
7
- chronos_bolt_base,pretrained,0.726,0.451,5.447
8
- chronos_bolt_small,pretrained,0.733,0.455,5.792
9
- autoarima,statistical,0.824,0.736,9.166
10
- timer,pretrained,0.796,0.639,9.370
11
- time-moe,pretrained,0.806,0.649,9.381
12
- visionts,pretrained,0.988,0.673,10.317
13
- autoets,statistical,0.842,1.975,10.968
14
- autotheta,statistical,1.123,1.018,11.724
15
- seasonalnaive,statistical,1.000,1.000,11.791
 
1
+ model,MASE_6750_scaled,CRPS_6750_scaled,Rank_6750_scaled
2
+ Toto-Open-Base-1.0,0.617,0.375,2.336
3
+ moirai_1.1_base,0.710,0.428,4.253
4
+ moirai_1.1_large,0.720,0.436,4.481
5
+ moirai_1.1_small,0.729,0.442,4.820
6
+ timesfm_2_0_500m,0.725,0.447,5.155
7
+ chronos_bolt_base,0.726,0.451,5.447
8
+ chronos_bolt_small,0.733,0.455,5.792
9
+ autoarima,0.824,0.736,9.166
10
+ timer,0.796,0.639,9.370
11
+ time-moe,0.806,0.649,9.381
12
+ visionts,0.988,0.673,10.317
13
+ autoets,0.842,1.975,10.968
14
+ autotheta,1.123,1.018,11.724
15
+ seasonalnaive,1.000,1.000,11.791
src/display/formatting.py CHANGED
@@ -1,5 +1,15 @@
1
- def model_hyperlink(link, model_name):
2
- return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def make_clickable_model(model_name):
 
1
+ def model_hyperlink(model_link, code_link, model_name):
2
+ if model_link == "":
3
+ return model_name
4
+ # return f'<a target="_blank">{model_name}</a>'
5
+ # return f'<a target="_blank" href="{link}" rel="noopener noreferrer">{model_name}</a>'
6
+ else:
7
+ model_url = f'<a target="_blank" href="{model_link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
8
+ if code_link == "":
9
+ return model_url
10
+ else:
11
+ code_url = f'<a target="_blank" href="{code_link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">code</a>'
12
+ return f"{model_url} ({code_url})"
13
 
14
 
15
  def make_clickable_model(model_name):
src/leaderboard/read_evals.py CHANGED
@@ -3,33 +3,33 @@ import json
3
  import math
4
  import os
5
  from dataclasses import dataclass
6
-
7
  import dateutil
8
  import numpy as np
9
 
10
- from src.display.formatting import make_clickable_model
11
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
16
  class EvalResult:
17
- """Represents one full evaluation. Built from a combination of the result and request file for a given run.
18
- """
19
- eval_name: str # org_model_precision (uid)
20
- full_model: str # org/model (path on hub)
21
- org: str
22
  model: str
23
- revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
- weight_type: WeightType = WeightType.Original # Original or Adapter
28
- architecture: str = "Unknown"
29
  license: str = "?"
30
  likes: int = 0
31
  num_params: int = 0
32
- date: str = "" # submission date of request file
33
  still_on_hub: bool = False
34
 
35
  @classmethod
@@ -85,10 +85,10 @@ class EvalResult:
85
  org=org,
86
  model=model,
87
  results=results,
88
- precision=precision,
89
- revision= config.get("model_sha", ""),
90
  still_on_hub=still_on_hub,
91
- architecture=architecture
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
@@ -105,7 +105,9 @@ class EvalResult:
105
  self.num_params = request.get("params", 0)
106
  self.date = request.get("submitted_time", "")
107
  except Exception:
108
- print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
 
 
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
@@ -132,6 +134,59 @@ class EvalResult:
132
  return data_dict
133
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def get_request_file_for_model(requests_path, model_name, precision):
136
  """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
137
  request_files = os.path.join(
@@ -146,10 +201,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
146
  for tmp_request_file in request_files:
147
  with open(tmp_request_file, "r") as f:
148
  req_content = json.load(f)
149
- if (
150
- req_content["status"] in ["FINISHED"]
151
- and req_content["precision"] == precision.split(".")[-1]
152
- ):
153
  request_file = tmp_request_file
154
  return request_file
155
 
@@ -188,7 +240,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
188
  results = []
189
  for v in eval_results.values():
190
  try:
191
- v.to_dict() # we test if the dict version is complete
192
  results.append(v)
193
  except KeyError: # not all eval values present
194
  continue
 
3
  import math
4
  import os
5
  from dataclasses import dataclass
6
+ from pathlib import Path
7
  import dateutil
8
  import numpy as np
9
 
10
+ from src.display.formatting import make_clickable_model, model_hyperlink
11
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
16
  class EvalResult:
17
+ """Represents one full evaluation. Built from a combination of the result and request file for a given run."""
18
+
19
+ eval_name: str # org_model_precision (uid)
20
+ full_model: str # org/model (path on hub)
21
+ org: str
22
  model: str
23
+ revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
26
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
+ weight_type: WeightType = WeightType.Original # Original or Adapter
28
+ architecture: str = "Unknown"
29
  license: str = "?"
30
  likes: int = 0
31
  num_params: int = 0
32
+ date: str = "" # submission date of request file
33
  still_on_hub: bool = False
34
 
35
  @classmethod
 
85
  org=org,
86
  model=model,
87
  results=results,
88
+ precision=precision,
89
+ revision=config.get("model_sha", ""),
90
  still_on_hub=still_on_hub,
91
+ architecture=architecture,
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
 
105
  self.num_params = request.get("params", 0)
106
  self.date = request.get("submitted_time", "")
107
  except Exception:
108
+ print(
109
+ f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}"
110
+ )
111
 
112
  def to_dict(self):
113
  """Converts the Eval Result to a dict compatible with our dataframe display"""
 
134
  return data_dict
135
 
136
 
137
+ @dataclass
138
+ class ModelConfig:
139
+ """Represents the model configuration of a model"""
140
+
141
+ model: str
142
+ tmp_name: str
143
+ model_link: str = ""
144
+ model_type: ModelType = ModelType.Unknown
145
+ code_link: str = ""
146
+
147
+ @classmethod
148
+ def init_from_json_file(cls, json_filepath):
149
+ """Inits the result from the specific model result file"""
150
+ with open(json_filepath) as fp:
151
+ data = json.load(fp)
152
+
153
+ model_type = ModelType.from_str(data.get("model_type", ""))
154
+ model = data.get("model", "")
155
+ tmp_name = data.get("tmp_name", "")
156
+ model_link = data.get("model_link", "")
157
+ code_link = data.get("code_link", "")
158
+ return cls(model=model, tmp_name=tmp_name, model_link=model_link, model_type=model_type, code_link=code_link)
159
+
160
+ def to_dict(self):
161
+ """Converts the model info to a dict compatible with our dataframe display"""
162
+ data_dict = {
163
+ AutoEvalColumn.model.name: self.model,
164
+ "model_w_link": model_hyperlink(self.model_link, self.code_link, self.model),
165
+ AutoEvalColumn.model_type.name: self.model_type.value.name,
166
+ AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
167
+ "tmp_name": self.tmp_name,
168
+ }
169
+
170
+ return data_dict
171
+
172
+
173
+ def get_model_info(results_path: str) -> list[ModelConfig]:
174
+ """From the path of the results folder root, extract all needed info for results"""
175
+ model_result_filepaths = (Path(results_path) / "models_info").glob("**/config.json")
176
+
177
+ model_info_list = []
178
+ for model_result_filepath in model_result_filepaths:
179
+ # Creation of result
180
+ model_info = ModelConfig.init_from_json_file(model_result_filepath)
181
+
182
+ try:
183
+ model_info.to_dict() # we test if the dict version is complete
184
+ model_info_list.append(model_info)
185
+ except KeyError: # not all eval values present
186
+ continue
187
+ return model_info_list
188
+
189
+
190
  def get_request_file_for_model(requests_path, model_name, precision):
191
  """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
192
  request_files = os.path.join(
 
201
  for tmp_request_file in request_files:
202
  with open(tmp_request_file, "r") as f:
203
  req_content = json.load(f)
204
+ if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
 
 
 
205
  request_file = tmp_request_file
206
  return request_file
207
 
 
240
  results = []
241
  for v in eval_results.values():
242
  try:
243
+ v.to_dict() # we test if the dict version is complete
244
  results.append(v)
245
  except KeyError: # not all eval values present
246
  continue
src/populate.py CHANGED
@@ -5,10 +5,28 @@ import pandas as pd
5
  from dataclasses import fields
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
- from src.leaderboard.read_evals import get_raw_eval_results
9
  from src.display.utils import ModelType
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
  # """Creates a dataframe from all the individual experiment results"""
14
  # raw_data = get_raw_eval_results(results_path, requests_path)
@@ -45,21 +63,6 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
45
  # Assuming `df` is your DataFrame:
46
  df.rename(columns=column_mapping, inplace=True)
47
 
48
- # Create a new column for model type symbol by parsing the model_type column
49
- df[AutoEvalColumn.model_type_symbol.name] = df[AutoEvalColumn.model_type.name].apply(
50
- lambda x: ModelType.from_str(x).value.symbol
51
- )
52
- # Prepend the value of model_type_symbol to the value of model_type
53
- df[AutoEvalColumn.model_type.name] = (
54
- df[AutoEvalColumn.model_type_symbol.name] + " " + df[AutoEvalColumn.model_type.name]
55
- )
56
-
57
- # Move the model_type_symbol column to the beginning
58
- cols = [AutoEvalColumn.model_type_symbol.name] + [
59
- col for col in df.columns if col != AutoEvalColumn.model_type_symbol.name
60
- ]
61
- df = df[cols]
62
-
63
  df = df.sort_values(by=[AutoEvalColumn.Rank_6750_scaled.name], ascending=True)
64
  return df
65
 
 
5
  from dataclasses import fields
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
+ from src.leaderboard.read_evals import get_model_info
9
  from src.display.utils import ModelType
10
 
11
 
12
+ def get_model_info_df(results_path: str) -> pd.DataFrame:
13
+ """Creates a dataframe from all the individual experiment results"""
14
+ raw_data = get_model_info(results_path)
15
+ all_data_json = [v.to_dict() for v in raw_data]
16
+ df = pd.DataFrame.from_records(all_data_json)
17
+ return df
18
+
19
+
20
+ def get_merged_df(result_df: pd.DataFrame, model_info_df: pd.DataFrame) -> pd.DataFrame:
21
+ """Merges the model info dataframe with the results dataframe"""
22
+ result_df = result_df.rename(columns={"Model": "tmp_name"})
23
+ merged_df = pd.merge(model_info_df, result_df, on="tmp_name", how="inner")
24
+ assert len(merged_df) == len(result_df)
25
+ merged_df = merged_df.drop(columns=["Model", "tmp_name"])
26
+ merged_df = merged_df.rename(columns={"model_w_link": "Model"})
27
+ return merged_df
28
+
29
+
30
  # def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
31
  # """Creates a dataframe from all the individual experiment results"""
32
  # raw_data = get_raw_eval_results(results_path, requests_path)
 
63
  # Assuming `df` is your DataFrame:
64
  df.rename(columns=column_mapping, inplace=True)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  df = df.sort_values(by=[AutoEvalColumn.Rank_6750_scaled.name], ascending=True)
67
  return df
68