[email protected] commited on
Commit
5952263
·
1 Parent(s): 0885182
Files changed (2) hide show
  1. src/about.py +61 -11
  2. src/leaderboard/read_evals.py +3 -3
src/about.py CHANGED
@@ -22,20 +22,70 @@ class Tasks(Enum):
22
  class AssetTasks(Enum):
23
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
24
  task0 = Task("acc_electric_motor", "acc_electric_motor", "acc_electric_motor")
25
- # task1 = Task("acc_steam_turbine", "acc_steam_turbine", "acc_steam_turbine")
26
- # task2 = Task("acc_aero_gas_turbine", "acc_aero_gas_turbine", "acc_aero_gas_turbine")
27
- # task3 = Task("acc_industrial_gas_turbine", "acc_industrial_gas_turbine", "acc_industrial_gas_turbine")
28
- # task4 = Task("acc_pump", "acc_pump", "acc_pump")
29
- # task5 = Task("acc_compressor", "acc_compressor", "acc_compressor")
30
- # task6 = Task("acc_reciprocating_internal_combustion_engine", "acc_reciprocating_internal_combustion_engine", "acc_reciprocating_internal_combustion_engine")
31
- # task7 = Task("acc_electric_generator", "acc_electric_generator", "acc_electric_generator")
32
- # task8 = Task("acc_fan", "acc_fan", "acc_fan")
33
- # task9 = Task("acc_power_transformer", "acc_power_transformer", "acc_power_transformer")
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  NUM_FEWSHOT = 0 # Change with your few shot
37
  # ---------------------------------------------------
38
- # "results": {"acc_overall": {"acc": 0.22797150356205476}, "acc_sel": {"acc_sel": 0.16170212765957448}, "acc_el": {"acc_el": 0.3669724770642202}, "acc_perturb": {"perturb_score": 0.161604799400075}, "score_consistency": {"consist_score": 0.048743907011623545}, "uncertainty": {"uncertainty_score": 0}}, "acc_electric motor": 0.2863247863247863, "acc_steam turbine": 0.19298245614035087, "acc_aero gas turbine": 0.1875, "acc_industrial gas turbine": 0.19166666666666668, "acc_pump": 0.23684210526315788, "acc_compressor": 0.20454545454545456, "acc_reciprocating internal combustion engine": 0.32142857142857145, "acc_electric generator": 0.2222222222222222, "acc_fan": 0.27, "acc_power transformer": 0.19117647058823528}
39
 
40
 
41
  # Your leaderboard name
 
22
  class AssetTasks(Enum):
23
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
24
  task0 = Task("acc_electric_motor", "acc_electric_motor", "acc_electric_motor")
25
+ task1 = Task("acc_steam_turbine", "acc_steam_turbine", "acc_steam_turbine")
26
+ task2 = Task("acc_aero_gas_turbine", "acc_aero_gas_turbine", "acc_aero_gas_turbine")
27
+ task3 = Task("acc_industrial_gas_turbine", "acc_industrial_gas_turbine", "acc_industrial_gas_turbine")
28
+ task4 = Task("acc_pump", "acc_pump", "acc_pump")
29
+ task5 = Task("acc_compressor", "acc_compressor", "acc_compressor")
30
+ task6 = Task("acc_reciprocating_internal_combustion_engine", "acc_reciprocating_internal_combustion_engine", "acc_reciprocating_internal_combustion_engine")
31
+ task7 = Task("acc_electric_generator", "acc_electric_generator", "acc_electric_generator")
32
+ task8 = Task("acc_fan", "acc_fan", "acc_fan")
33
+ task9 = Task("acc_power_transformer", "acc_power_transformer", "acc_power_transformer")
34
+
35
+
36
+ # {
37
+ # "acc_overall": {
38
+ # "acc": 0.38732658417697785
39
+ # },
40
+ # "acc_sel": {
41
+ # "acc_sel": 0.40638297872340423
42
+ # },
43
+ # "acc_el": {
44
+ # "acc_el": 0.4954128440366973
45
+ # },
46
+ # "acc_perturb": {
47
+ # "perturb_score": 0.2819647544056993
48
+ # },
49
+ # "score_consistency": {
50
+ # "consist_score": 0.16422947131608548
51
+ # },
52
+ # "uncertainty": {
53
+ # "uncertainty_score": 0
54
+ # },
55
+ # "acc_electric_motor": {
56
+ # "acc_electric_motor": 0.41025641025641024
57
+ # },
58
+ # "acc_steam_turbine": {
59
+ # "acc_steam_turbine": 0.30409356725146197
60
+ # },
61
+ # "acc_aero_gas_turbine": {
62
+ # "acc_aero_gas_turbine": 0.3541666666666667
63
+ # },
64
+ # "acc_industrial_gas_turbine": {
65
+ # "acc_industrial_gas_turbine": 0.45
66
+ # },
67
+ # "acc_pump": {
68
+ # "acc_pump": 0.39473684210526316
69
+ # },
70
+ # "acc_compressor": {
71
+ # "acc_compressor": 0.35
72
+ # },
73
+ # "acc_reciprocating_internal_combustion_engine": {
74
+ # "acc_reciprocating_internal_combustion_engine": 0.47619047619047616
75
+ # },
76
+ # "acc_electric_generator": {
77
+ # "acc_electric_generator": 0.42735042735042733
78
+ # },
79
+ # "acc_fan": {
80
+ # "acc_fan": 0.445
81
+ # },
82
+ # "acc_power_transformer": {
83
+ # "acc_power_transformer": 0.3161764705882353
84
+ # }
85
+ # }
86
 
87
  NUM_FEWSHOT = 0 # Change with your few shot
88
  # ---------------------------------------------------
 
89
 
90
 
91
  # Your leaderboard name
src/leaderboard/read_evals.py CHANGED
@@ -107,7 +107,7 @@ class EvalResult:
107
  except Exception:
108
  print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
109
 
110
- def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
@@ -126,7 +126,7 @@ class EvalResult:
126
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
128
 
129
- for task in Tasks:
130
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
131
 
132
  return data_dict
@@ -186,7 +186,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, task_class) -> l
186
  results = []
187
  for v in eval_results.values():
188
  try:
189
- v.to_dict() # we test if the dict version is complete
190
  results.append(v)
191
  except KeyError: # not all eval values present
192
  print(f'error key {str(v)}')
 
107
  except Exception:
108
  print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
109
 
110
+ def to_dict(self, task_class):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
 
126
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
128
 
129
+ for task in task_class:
130
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
131
 
132
  return data_dict
 
186
  results = []
187
  for v in eval_results.values():
188
  try:
189
+ v.to_dict(task_class) # we test if the dict version is complete
190
  results.append(v)
191
  except KeyError: # not all eval values present
192
  print(f'error key {str(v)}')