Spaces:
Sleeping
Sleeping
Yotam-Perlitz
commited on
Commit
•
f32be22
1
Parent(s):
363d8ae
remove HFv2 BBH Raw
Browse filesSigned-off-by: Yotam-Perlitz <[email protected]>
app.py
CHANGED
@@ -75,8 +75,8 @@ with st.expander("Leaderboard configurations (defaults are great BTW)", icon="
|
|
75 |
n_models_taken_list = st.slider(
|
76 |
label="Select number of models to use",
|
77 |
min_value=3,
|
78 |
-
max_value=
|
79 |
-
value=
|
80 |
)
|
81 |
|
82 |
n_models_taken_list = [n_models_taken_list]
|
@@ -140,7 +140,7 @@ def run_load(
|
|
140 |
corr_types=["kendall"],
|
141 |
n_exps=10,
|
142 |
my_benchmark=Benchmark(),
|
143 |
-
use_caching=
|
144 |
):
|
145 |
# Create a hash of the inputs to generate a unique cache file for each set of inputs
|
146 |
input_str = (
|
@@ -182,13 +182,16 @@ def run_load(
|
|
182 |
allbench = Benchmark()
|
183 |
allbench.load_local_catalog()
|
184 |
|
|
|
|
|
|
|
185 |
allbench.add_aggregate(
|
186 |
new_col_name="aggregate",
|
187 |
agg_source_name="aggregate",
|
188 |
scenario_whitelist=aggregate_scenario_whitelist,
|
189 |
min_scenario_for_models_to_appear_in_agg=1
|
190 |
if len(aggregate_scenario_whitelist) == 1
|
191 |
-
else 2,
|
192 |
)
|
193 |
|
194 |
allbench.extend(my_benchmark)
|
|
|
75 |
n_models_taken_list = st.slider(
|
76 |
label="Select number of models to use",
|
77 |
min_value=3,
|
78 |
+
max_value=15,
|
79 |
+
value=8,
|
80 |
)
|
81 |
|
82 |
n_models_taken_list = [n_models_taken_list]
|
|
|
140 |
corr_types=["kendall"],
|
141 |
n_exps=10,
|
142 |
my_benchmark=Benchmark(),
|
143 |
+
use_caching=True,
|
144 |
):
|
145 |
# Create a hash of the inputs to generate a unique cache file for each set of inputs
|
146 |
input_str = (
|
|
|
182 |
allbench = Benchmark()
|
183 |
allbench.load_local_catalog()
|
184 |
|
185 |
+
scenarios_to_drop = ["HFv2 BBH Raw"]
|
186 |
+
allbench.df = allbench.df.query("scenario not in @scenarios_to_drop")
|
187 |
+
|
188 |
allbench.add_aggregate(
|
189 |
new_col_name="aggregate",
|
190 |
agg_source_name="aggregate",
|
191 |
scenario_whitelist=aggregate_scenario_whitelist,
|
192 |
min_scenario_for_models_to_appear_in_agg=1
|
193 |
if len(aggregate_scenario_whitelist) == 1
|
194 |
+
else len(aggregate_scenario_whitelist) // 2,
|
195 |
)
|
196 |
|
197 |
allbench.extend(my_benchmark)
|