Spaces:
Running
Running
[email protected]
commited on
Commit
·
5952263
1
Parent(s):
0885182
test
Browse files- src/about.py +61 -11
- src/leaderboard/read_evals.py +3 -3
src/about.py
CHANGED
@@ -22,20 +22,70 @@ class Tasks(Enum):
|
|
22 |
class AssetTasks(Enum):
|
23 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
24 |
task0 = Task("acc_electric_motor", "acc_electric_motor", "acc_electric_motor")
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
NUM_FEWSHOT = 0 # Change with your few shot
|
37 |
# ---------------------------------------------------
|
38 |
-
# "results": {"acc_overall": {"acc": 0.22797150356205476}, "acc_sel": {"acc_sel": 0.16170212765957448}, "acc_el": {"acc_el": 0.3669724770642202}, "acc_perturb": {"perturb_score": 0.161604799400075}, "score_consistency": {"consist_score": 0.048743907011623545}, "uncertainty": {"uncertainty_score": 0}}, "acc_electric motor": 0.2863247863247863, "acc_steam turbine": 0.19298245614035087, "acc_aero gas turbine": 0.1875, "acc_industrial gas turbine": 0.19166666666666668, "acc_pump": 0.23684210526315788, "acc_compressor": 0.20454545454545456, "acc_reciprocating internal combustion engine": 0.32142857142857145, "acc_electric generator": 0.2222222222222222, "acc_fan": 0.27, "acc_power transformer": 0.19117647058823528}
|
39 |
|
40 |
|
41 |
# Your leaderboard name
|
|
|
22 |
class AssetTasks(Enum):
|
23 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
24 |
task0 = Task("acc_electric_motor", "acc_electric_motor", "acc_electric_motor")
|
25 |
+
task1 = Task("acc_steam_turbine", "acc_steam_turbine", "acc_steam_turbine")
|
26 |
+
task2 = Task("acc_aero_gas_turbine", "acc_aero_gas_turbine", "acc_aero_gas_turbine")
|
27 |
+
task3 = Task("acc_industrial_gas_turbine", "acc_industrial_gas_turbine", "acc_industrial_gas_turbine")
|
28 |
+
task4 = Task("acc_pump", "acc_pump", "acc_pump")
|
29 |
+
task5 = Task("acc_compressor", "acc_compressor", "acc_compressor")
|
30 |
+
task6 = Task("acc_reciprocating_internal_combustion_engine", "acc_reciprocating_internal_combustion_engine", "acc_reciprocating_internal_combustion_engine")
|
31 |
+
task7 = Task("acc_electric_generator", "acc_electric_generator", "acc_electric_generator")
|
32 |
+
task8 = Task("acc_fan", "acc_fan", "acc_fan")
|
33 |
+
task9 = Task("acc_power_transformer", "acc_power_transformer", "acc_power_transformer")
|
34 |
+
|
35 |
+
|
36 |
+
# {
|
37 |
+
# "acc_overall": {
|
38 |
+
# "acc": 0.38732658417697785
|
39 |
+
# },
|
40 |
+
# "acc_sel": {
|
41 |
+
# "acc_sel": 0.40638297872340423
|
42 |
+
# },
|
43 |
+
# "acc_el": {
|
44 |
+
# "acc_el": 0.4954128440366973
|
45 |
+
# },
|
46 |
+
# "acc_perturb": {
|
47 |
+
# "perturb_score": 0.2819647544056993
|
48 |
+
# },
|
49 |
+
# "score_consistency": {
|
50 |
+
# "consist_score": 0.16422947131608548
|
51 |
+
# },
|
52 |
+
# "uncertainty": {
|
53 |
+
# "uncertainty_score": 0
|
54 |
+
# },
|
55 |
+
# "acc_electric_motor": {
|
56 |
+
# "acc_electric_motor": 0.41025641025641024
|
57 |
+
# },
|
58 |
+
# "acc_steam_turbine": {
|
59 |
+
# "acc_steam_turbine": 0.30409356725146197
|
60 |
+
# },
|
61 |
+
# "acc_aero_gas_turbine": {
|
62 |
+
# "acc_aero_gas_turbine": 0.3541666666666667
|
63 |
+
# },
|
64 |
+
# "acc_industrial_gas_turbine": {
|
65 |
+
# "acc_industrial_gas_turbine": 0.45
|
66 |
+
# },
|
67 |
+
# "acc_pump": {
|
68 |
+
# "acc_pump": 0.39473684210526316
|
69 |
+
# },
|
70 |
+
# "acc_compressor": {
|
71 |
+
# "acc_compressor": 0.35
|
72 |
+
# },
|
73 |
+
# "acc_reciprocating_internal_combustion_engine": {
|
74 |
+
# "acc_reciprocating_internal_combustion_engine": 0.47619047619047616
|
75 |
+
# },
|
76 |
+
# "acc_electric_generator": {
|
77 |
+
# "acc_electric_generator": 0.42735042735042733
|
78 |
+
# },
|
79 |
+
# "acc_fan": {
|
80 |
+
# "acc_fan": 0.445
|
81 |
+
# },
|
82 |
+
# "acc_power_transformer": {
|
83 |
+
# "acc_power_transformer": 0.3161764705882353
|
84 |
+
# }
|
85 |
+
# }
|
86 |
|
87 |
NUM_FEWSHOT = 0 # Change with your few shot
|
88 |
# ---------------------------------------------------
|
|
|
89 |
|
90 |
|
91 |
# Your leaderboard name
|
src/leaderboard/read_evals.py
CHANGED
@@ -107,7 +107,7 @@ class EvalResult:
|
|
107 |
except Exception:
|
108 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
109 |
|
110 |
-
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
113 |
data_dict = {
|
@@ -126,7 +126,7 @@ class EvalResult:
|
|
126 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
127 |
}
|
128 |
|
129 |
-
for task in
|
130 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
131 |
|
132 |
return data_dict
|
@@ -186,7 +186,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, task_class) -> l
|
|
186 |
results = []
|
187 |
for v in eval_results.values():
|
188 |
try:
|
189 |
-
v.to_dict() # we test if the dict version is complete
|
190 |
results.append(v)
|
191 |
except KeyError: # not all eval values present
|
192 |
print(f'error key {str(v)}')
|
|
|
107 |
except Exception:
|
108 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
109 |
|
110 |
+
def to_dict(self, task_class):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
113 |
data_dict = {
|
|
|
126 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
127 |
}
|
128 |
|
129 |
+
for task in task_class:
|
130 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
131 |
|
132 |
return data_dict
|
|
|
186 |
results = []
|
187 |
for v in eval_results.values():
|
188 |
try:
|
189 |
+
v.to_dict(task_class) # we test if the dict version is complete
|
190 |
results.append(v)
|
191 |
except KeyError: # not all eval values present
|
192 |
print(f'error key {str(v)}')
|