Spaces:
Running
Running
[email protected]
commited on
Commit
·
36b6f3b
1
Parent(s):
7d43722
update
Browse files- app.py +1 -1
- src/leaderboard/read_evals.py +3 -1
app.py
CHANGED
@@ -89,7 +89,7 @@ def init_asset_plot(df):
|
|
89 |
autosize=False,
|
90 |
width=1000,
|
91 |
height=700,
|
92 |
-
title=f"
|
93 |
)
|
94 |
return fig
|
95 |
|
|
|
89 |
autosize=False,
|
90 |
width=1000,
|
91 |
height=700,
|
92 |
+
title=f"Top 3 accuracies breakdown"
|
93 |
)
|
94 |
return fig
|
95 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -73,10 +73,12 @@ class EvalResult:
|
|
73 |
|
74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
75 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
|
|
76 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
continue
|
78 |
print(accs)
|
79 |
-
mean_acc = np.
|
|
|
80 |
results[task.benchmark] = mean_acc
|
81 |
|
82 |
return self(
|
|
|
73 |
|
74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
75 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
76 |
+
# if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
78 |
continue
|
79 |
print(accs)
|
80 |
+
mean_acc = np.nanmean(accs) * 100.0
|
81 |
+
# mean_acc = np.mean(accs) * 100.0
|
82 |
results[task.benchmark] = mean_acc
|
83 |
|
84 |
return self(
|