jbnayahu commited on
Commit
98eb96a
·
unverified ·
1 Parent(s): b2373ad

Signed-off-by: Jonathan Bnayahu <[email protected]>

Files changed (4) hide show
  1. app.py +22 -23
  2. src/display/utils.py +48 -48
  3. src/leaderboard/read_evals.py +66 -64
  4. src/populate.py +36 -36
app.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import snapshot_download
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
@@ -18,15 +18,15 @@ from src.display.utils import (
18
  BENCHMARK_COLS,
19
  COLS,
20
  EVAL_COLS,
21
- EVAL_TYPES,
22
  AutoEvalColumn,
23
- ModelType,
24
  fields,
25
- WeightType,
26
- Precision
27
  )
28
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
30
  from src.submission.submit import add_new_eval
31
 
32
 
@@ -34,13 +34,13 @@ def restart_space():
34
  API.restart_space(repo_id=REPO_ID)
35
 
36
  ### Space initialisation
37
- try:
38
- print(EVAL_REQUESTS_PATH)
39
- snapshot_download(
40
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
41
- )
42
- except Exception:
43
- restart_space()
44
  try:
45
  print(EVAL_RESULTS_PATH)
46
  snapshot_download(
@@ -49,14 +49,13 @@ try:
49
  except Exception:
50
  restart_space()
51
 
 
52
 
53
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
54
-
55
- (
56
- finished_eval_queue_df,
57
- running_eval_queue_df,
58
- pending_eval_queue_df,
59
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
60
 
61
  def init_leaderboard(dataframe):
62
  if dataframe is None or dataframe.empty:
@@ -69,7 +68,7 @@ def init_leaderboard(dataframe):
69
  # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
70
  # label="Select Columns to Display:",
71
  # ),
72
- # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
73
  # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
74
  # filter_columns=[
75
  # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
@@ -85,7 +84,7 @@ def init_leaderboard(dataframe):
85
  # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
86
  # ),
87
  # ],
88
- bool_checkboxgroup_label="Hide models",
89
  interactive=False,
90
  )
91
 
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
10
+ # EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
 
18
  BENCHMARK_COLS,
19
  COLS,
20
  EVAL_COLS,
21
+ # EVAL_TYPES,
22
  AutoEvalColumn,
23
+ # ModelType,
24
  fields,
25
+ # WeightType,
26
+ # Precision
27
  )
28
+ from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
29
+ from src.populate import get_leaderboard_df
30
  from src.submission.submit import add_new_eval
31
 
32
 
 
34
  API.restart_space(repo_id=REPO_ID)
35
 
36
  ### Space initialisation
37
+ # try:
38
+ # print(EVAL_REQUESTS_PATH)
39
+ # snapshot_download(
40
+ # repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
41
+ # )
42
+ # except Exception:
43
+ # restart_space()
44
  try:
45
  print(EVAL_RESULTS_PATH)
46
  snapshot_download(
 
49
  except Exception:
50
  restart_space()
51
 
52
+ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
53
 
54
+ # (
55
+ # finished_eval_queue_df,
56
+ # running_eval_queue_df,
57
+ # pending_eval_queue_df,
58
+ # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
 
59
 
60
  def init_leaderboard(dataframe):
61
  if dataframe is None or dataframe.empty:
 
68
  # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
  # label="Select Columns to Display:",
70
  # ),
71
+ search_columns=[AutoEvalColumn.model.name],
72
  # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
  # filter_columns=[
74
  # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
 
84
  # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
  # ),
86
  # ],
87
+ # bool_checkboxgroup_label="Hide models",
88
  interactive=False,
89
  )
90
 
src/display/utils.py CHANGED
@@ -23,22 +23,22 @@ class ColumnContent:
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
@@ -61,44 +61,44 @@ class ModelDetails:
61
  symbol: str = "" # emoji
62
 
63
 
64
- class ModelType(Enum):
65
- PT = ModelDetails(name="pretrained", symbol="🟢")
66
- FT = ModelDetails(name="fine-tuned", symbol="🔶")
67
- IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
68
- RL = ModelDetails(name="RL-tuned", symbol="🟦")
69
- Unknown = ModelDetails(name="", symbol="?")
70
-
71
- def to_str(self, separator=" "):
72
- return f"{self.value.symbol}{separator}{self.value.name}"
73
-
74
- @staticmethod
75
- def from_str(type):
76
- if "fine-tuned" in type or "🔶" in type:
77
- return ModelType.FT
78
- if "pretrained" in type or "🟢" in type:
79
- return ModelType.PT
80
- if "RL-tuned" in type or "🟦" in type:
81
- return ModelType.RL
82
- if "instruction-tuned" in type or "⭕" in type:
83
- return ModelType.IFT
84
- return ModelType.Unknown
85
-
86
- class WeightType(Enum):
87
- Adapter = ModelDetails("Adapter")
88
- Original = ModelDetails("Original")
89
- Delta = ModelDetails("Delta")
90
-
91
- class Precision(Enum):
92
- float16 = ModelDetails("float16")
93
- bfloat16 = ModelDetails("bfloat16")
94
- Unknown = ModelDetails("?")
95
-
96
- def from_str(precision):
97
- if precision in ["torch.float16", "float16"]:
98
- return Precision.float16
99
- if precision in ["torch.bfloat16", "bfloat16"]:
100
- return Precision.bfloat16
101
- return Precision.Unknown
102
 
103
  # Column selection
104
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
+ # auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
+ # auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
+ # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
+ # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
+ # auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
+ # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
+ # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
+ # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
+ # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
+ # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
61
  symbol: str = "" # emoji
62
 
63
 
64
+ # class ModelType(Enum):
65
+ # PT = ModelDetails(name="pretrained", symbol="🟢")
66
+ # FT = ModelDetails(name="fine-tuned", symbol="🔶")
67
+ # IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
68
+ # RL = ModelDetails(name="RL-tuned", symbol="🟦")
69
+ # Unknown = ModelDetails(name="", symbol="?")
70
+
71
+ # def to_str(self, separator=" "):
72
+ # return f"{self.value.symbol}{separator}{self.value.name}"
73
+
74
+ # @staticmethod
75
+ # def from_str(type):
76
+ # if "fine-tuned" in type or "🔶" in type:
77
+ # return ModelType.FT
78
+ # if "pretrained" in type or "🟢" in type:
79
+ # return ModelType.PT
80
+ # if "RL-tuned" in type or "🟦" in type:
81
+ # return ModelType.RL
82
+ # if "instruction-tuned" in type or "⭕" in type:
83
+ # return ModelType.IFT
84
+ # return ModelType.Unknown
85
+
86
+ # class WeightType(Enum):
87
+ # Adapter = ModelDetails("Adapter")
88
+ # Original = ModelDetails("Original")
89
+ # Delta = ModelDetails("Delta")
90
+
91
+ # class Precision(Enum):
92
+ # float16 = ModelDetails("float16")
93
+ # bfloat16 = ModelDetails("bfloat16")
94
+ # Unknown = ModelDetails("?")
95
+
96
+ # def from_str(precision):
97
+ # if precision in ["torch.float16", "float16"]:
98
+ # return Precision.float16
99
+ # if precision in ["torch.bfloat16", "bfloat16"]:
100
+ # return Precision.bfloat16
101
+ # return Precision.Unknown
102
 
103
  # Column selection
104
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
src/leaderboard/read_evals.py CHANGED
@@ -8,8 +8,8 @@ import dateutil
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
- from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
@@ -18,19 +18,19 @@ class EvalResult:
18
  """
19
  eval_name: str # org_model_precision (uid)
20
  full_model: str # org/model (path on hub)
21
- org: str
22
- model: str
23
- revision: str # commit hash, "" if main
24
  results: dict
25
- precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
- weight_type: WeightType = WeightType.Original # Original or Adapter
28
- architecture: str = "Unknown"
29
- license: str = "?"
30
- likes: int = 0
31
- num_params: int = 0
32
  date: str = "" # submission date of request file
33
- still_on_hub: bool = False
34
 
35
  @classmethod
36
  def init_from_json_file(self, json_filepath):
@@ -38,33 +38,35 @@ class EvalResult:
38
  with open(json_filepath) as fp:
39
  data = json.load(fp)
40
 
 
 
41
  config = data.get("config")
42
 
43
  # Precision
44
- precision = Precision.from_str(config.get("model_dtype"))
45
 
46
  # Get model and org
47
  org_and_model = config.get("model_name", config.get("model_args", None))
48
  org_and_model = org_and_model.split("/", 1)
49
 
50
- if len(org_and_model) == 1:
51
- org = None
52
- model = org_and_model[0]
53
- result_key = f"{model}_{precision.value.name}"
54
- else:
55
- org = org_and_model[0]
56
- model = org_and_model[1]
57
- result_key = f"{org}_{model}_{precision.value.name}"
58
  full_model = "/".join(org_and_model)
59
 
60
- still_on_hub, _, model_config = is_model_on_hub(
61
- full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
62
- )
63
- architecture = "?"
64
- if model_config is not None:
65
- architectures = getattr(model_config, "architectures", None)
66
- if architectures:
67
- architecture = ";".join(architectures)
68
 
69
  # Extract results available in this file (some results are split in several files)
70
  results = {}
@@ -80,50 +82,50 @@ class EvalResult:
80
  results[task.benchmark] = mean_acc
81
 
82
  return self(
83
- eval_name=result_key,
84
  full_model=full_model,
85
- org=org,
86
- model=model,
87
  results=results,
88
- precision=precision,
89
- revision= config.get("model_sha", ""),
90
- still_on_hub=still_on_hub,
91
- architecture=architecture
92
  )
93
 
94
- def update_with_request_file(self, requests_path):
95
- """Finds the relevant request file for the current model and updates info with it"""
96
- request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
97
-
98
- try:
99
- with open(request_file, "r") as f:
100
- request = json.load(f)
101
- self.model_type = ModelType.from_str(request.get("model_type", ""))
102
- self.weight_type = WeightType[request.get("weight_type", "Original")]
103
- self.license = request.get("license", "?")
104
- self.likes = request.get("likes", 0)
105
- self.num_params = request.get("params", 0)
106
- self.date = request.get("submitted_time", "")
107
- except Exception:
108
- print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
- AutoEvalColumn.precision.name: self.precision.value.name,
116
- AutoEvalColumn.model_type.name: self.model_type.value.name,
117
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
118
- AutoEvalColumn.weight_type.name: self.weight_type.value.name,
119
- AutoEvalColumn.architecture.name: self.architecture,
120
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
121
- AutoEvalColumn.revision.name: self.revision,
122
  AutoEvalColumn.average.name: average,
123
- AutoEvalColumn.license.name: self.license,
124
- AutoEvalColumn.likes.name: self.likes,
125
- AutoEvalColumn.params.name: self.num_params,
126
- AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
128
 
129
  for task in Tasks:
@@ -154,7 +156,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
154
  return request_file
155
 
156
 
157
- def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
158
  """From the path of the results folder root, extract all needed info for results"""
159
  model_result_filepaths = []
160
 
@@ -176,7 +178,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
176
  for model_result_filepath in model_result_filepaths:
177
  # Creation of result
178
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
179
- eval_result.update_with_request_file(requests_path)
180
 
181
  # Store results of same eval together
182
  eval_name = eval_result.eval_name
 
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
+ from src.display.utils import AutoEvalColumn, Tasks
12
+ # from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
 
18
  """
19
  eval_name: str # org_model_precision (uid)
20
  full_model: str # org/model (path on hub)
21
+ # org: str
22
+ # model: str
23
+ # revision: str # commit hash, "" if main
24
  results: dict
25
+ # precision: Precision = Precision.Unknown
26
+ # model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
+ # weight_type: WeightType = WeightType.Original # Original or Adapter
28
+ # architecture: str = "Unknown"
29
+ # license: str = "?"
30
+ # likes: int = 0
31
+ # num_params: int = 0
32
  date: str = "" # submission date of request file
33
+ # still_on_hub: bool = False
34
 
35
  @classmethod
36
  def init_from_json_file(self, json_filepath):
 
38
  with open(json_filepath) as fp:
39
  data = json.load(fp)
40
 
41
+ env_info = data.get("environment_info")
42
+
43
  config = data.get("config")
44
 
45
  # Precision
46
+ # precision = Precision.from_str(config.get("model_dtype"))
47
 
48
  # Get model and org
49
  org_and_model = config.get("model_name", config.get("model_args", None))
50
  org_and_model = org_and_model.split("/", 1)
51
 
52
+ # if len(org_and_model) == 1:
53
+ # org = None
54
+ # model = org_and_model[0]
55
+ # result_key = f"{model}_{precision.value.name}"
56
+ # else:
57
+ # org = org_and_model[0]
58
+ # model = org_and_model[1]
59
+ # result_key = f"{org}_{model}_{precision.value.name}"
60
  full_model = "/".join(org_and_model)
61
 
62
+ # still_on_hub, _, model_config = is_model_on_hub(
63
+ # full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
64
+ # )
65
+ # architecture = "?"
66
+ # if model_config is not None:
67
+ # architectures = getattr(model_config, "architectures", None)
68
+ # if architectures:
69
+ # architecture = ";".join(architectures)
70
 
71
  # Extract results available in this file (some results are split in several files)
72
  results = {}
 
82
  results[task.benchmark] = mean_acc
83
 
84
  return self(
85
+ eval_name=full_model,
86
  full_model=full_model,
87
+ # org=org,
88
+ # model=model,
89
  results=results,
90
+ # precision=precision,
91
+ # revision= config.get("model_sha", ""),
92
+ # still_on_hub=still_on_hub,
93
+ # architecture=architecture
94
  )
95
 
96
+ # def update_with_request_file(self, requests_path):
97
+ # """Finds the relevant request file for the current model and updates info with it"""
98
+ # request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
99
+
100
+ # try:
101
+ # with open(request_file, "r") as f:
102
+ # request = json.load(f)
103
+ # self.model_type = ModelType.from_str(request.get("model_type", ""))
104
+ # self.weight_type = WeightType[request.get("weight_type", "Original")]
105
+ # self.license = request.get("license", "?")
106
+ # self.likes = request.get("likes", 0)
107
+ # self.num_params = request.get("params", 0)
108
+ # self.date = request.get("submitted_time", "")
109
+ # except Exception:
110
+ # print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
111
 
112
  def to_dict(self):
113
  """Converts the Eval Result to a dict compatible with our dataframe display"""
114
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
115
  data_dict = {
116
  "eval_name": self.eval_name, # not a column, just a save name,
117
+ # AutoEvalColumn.precision.name: self.precision.value.name,
118
+ # AutoEvalColumn.model_type.name: self.model_type.value.name,
119
+ # AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
120
+ # AutoEvalColumn.weight_type.name: self.weight_type.value.name,
121
+ # AutoEvalColumn.architecture.name: self.architecture,
122
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
123
+ # AutoEvalColumn.revision.name: self.revision,
124
  AutoEvalColumn.average.name: average,
125
+ # AutoEvalColumn.license.name: self.license,
126
+ # AutoEvalColumn.likes.name: self.likes,
127
+ # AutoEvalColumn.params.name: self.num_params,
128
+ # AutoEvalColumn.still_on_hub.name: self.still_on_hub,
129
  }
130
 
131
  for task in Tasks:
 
156
  return request_file
157
 
158
 
159
+ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
160
  """From the path of the results folder root, extract all needed info for results"""
161
  model_result_filepaths = []
162
 
 
178
  for model_result_filepath in model_result_filepaths:
179
  # Creation of result
180
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
181
+ # eval_result.update_with_request_file(requests_path)
182
 
183
  # Store results of same eval together
184
  eval_name = eval_result.eval_name
src/populate.py CHANGED
@@ -8,9 +8,9 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
- def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
- raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
@@ -22,37 +22,37 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
22
  return df
23
 
24
 
25
- def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
26
- """Creates the different dataframes for the evaluation queues requestes"""
27
- entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
28
- all_evals = []
29
-
30
- for entry in entries:
31
- if ".json" in entry:
32
- file_path = os.path.join(save_path, entry)
33
- with open(file_path) as fp:
34
- data = json.load(fp)
35
-
36
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
37
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
38
-
39
- all_evals.append(data)
40
- elif ".md" not in entry:
41
- # this is a folder
42
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
43
- for sub_entry in sub_entries:
44
- file_path = os.path.join(save_path, entry, sub_entry)
45
- with open(file_path) as fp:
46
- data = json.load(fp)
47
-
48
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
49
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
50
- all_evals.append(data)
51
-
52
- pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
53
- running_list = [e for e in all_evals if e["status"] == "RUNNING"]
54
- finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
55
- df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
56
- df_running = pd.DataFrame.from_records(running_list, columns=cols)
57
- df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
58
- return df_finished[cols], df_running[cols], df_pending[cols]
 
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
+ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
+ raw_data = get_raw_eval_results(results_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
 
22
  return df
23
 
24
 
25
+ # def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
26
+ # """Creates the different dataframes for the evaluation queues requestes"""
27
+ # entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
28
+ # all_evals = []
29
+
30
+ # for entry in entries:
31
+ # if ".json" in entry:
32
+ # file_path = os.path.join(save_path, entry)
33
+ # with open(file_path) as fp:
34
+ # data = json.load(fp)
35
+
36
+ # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
37
+ # data[EvalQueueColumn.revision.name] = data.get("revision", "main")
38
+
39
+ # all_evals.append(data)
40
+ # elif ".md" not in entry:
41
+ # # this is a folder
42
+ # sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
43
+ # for sub_entry in sub_entries:
44
+ # file_path = os.path.join(save_path, entry, sub_entry)
45
+ # with open(file_path) as fp:
46
+ # data = json.load(fp)
47
+
48
+ # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
49
+ # data[EvalQueueColumn.revision.name] = data.get("revision", "main")
50
+ # all_evals.append(data)
51
+
52
+ # pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
53
+ # running_list = [e for e in all_evals if e["status"] == "RUNNING"]
54
+ # finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
55
+ # df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
56
+ # df_running = pd.DataFrame.from_records(running_list, columns=cols)
57
+ # df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
58
+ # return df_finished[cols], df_running[cols], df_pending[cols]