hSterz commited on
Commit
ac00ce9
·
1 Parent(s): a0b0f73
src/about.py CHANGED
@@ -17,7 +17,8 @@ class Tasks(Enum):
17
  task2 = Task("VCR", "acc", "VCR")
18
  task3 = Task("Culture", "acc", "Culture")
19
  task4 = Task("Trick", "acc", "Trick")
20
-
 
21
  task0_f1 = Task("Count", "f1", "Count")
22
  task1_f1 = Task("Order", "f1", "Order")
23
  task2_f1 = Task("VCR", "f1", "VCR")
 
17
  task2 = Task("VCR", "acc", "VCR")
18
  task3 = Task("Culture", "acc", "Culture")
19
  task4 = Task("Trick", "acc", "Trick")
20
+
21
+ class N_Tasks(Enum):
22
  task0_f1 = Task("Count", "f1", "Count")
23
  task1_f1 = Task("Order", "f1", "Order")
24
  task2_f1 = Task("VCR", "f1", "VCR")
src/display/utils.py CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from src.about import Tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
3
 
4
  import pandas as pd
5
 
6
+ from src.about import Tasks, N_Tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
src/leaderboard/read_evals.py CHANGED
@@ -8,7 +8,8 @@ import dateutil
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
 
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
@@ -114,9 +115,9 @@ class EvalResult:
114
  except Exception:
115
  print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
116
 
117
- def to_dict(self):
118
  """Converts the Eval Result to a dict compatible with our dataframe display"""
119
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
120
  data_dict = {
121
  "eval_name": self.eval_name, # not a column, just a save name,
122
  AutoEvalColumn.architecture.name: self.architecture,
@@ -127,7 +128,7 @@ class EvalResult:
127
  AutoEvalColumn.dataset_version.name: self.dataset_version,
128
  }
129
 
130
- for task in Tasks:
131
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
132
 
133
  return data_dict
@@ -187,10 +188,12 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
187
  else:
188
  eval_results[eval_name] = eval_result
189
 
 
 
190
  results = []
191
  for v in eval_results.values():
192
  try:
193
- v.to_dict() # we test if the dict version is complete
194
  results.append(v)
195
  except KeyError: # not all eval values present
196
  continue
 
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
+ from src.display.utils import AutoEvalColumn, ModelType, Precision, WeightType
12
+ from src.about import Tasks, N_Tasks
13
  from src.submission.check_validity import is_model_on_hub
14
 
15
 
 
115
  except Exception:
116
  print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
117
 
118
+ def to_dict(self, tasks):
119
  """Converts the Eval Result to a dict compatible with our dataframe display"""
120
+ average = sum([v for v in self.results.values() if v is not None]) / len(tasks)
121
  data_dict = {
122
  "eval_name": self.eval_name, # not a column, just a save name,
123
  AutoEvalColumn.architecture.name: self.architecture,
 
128
  AutoEvalColumn.dataset_version.name: self.dataset_version,
129
  }
130
 
131
+ for task in tasks:
132
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
133
 
134
  return data_dict
 
188
  else:
189
  eval_results[eval_name] = eval_result
190
 
191
+ version = results_path.split("/")[-1]
192
+ tasks = N_Tasks if "n_" in version else Tasks
193
  results = []
194
  for v in eval_results.values():
195
  try:
196
+ v.to_dict(tasks) # we test if the dict version is complete
197
  results.append(v)
198
  except KeyError: # not all eval values present
199
  continue