Spaces:
Sleeping
Sleeping
Debug
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -84,6 +84,7 @@ class EvalResult:
|
|
84 |
|
85 |
mean_acc = np.mean(accs)
|
86 |
results[task.benchmark] = mean_acc
|
|
|
87 |
|
88 |
return self(
|
89 |
eval_name=result_key,
|
|
|
84 |
|
85 |
mean_acc = np.mean(accs)
|
86 |
results[task.benchmark] = mean_acc
|
87 |
+
print(f"{task.benchmark}: {mean_acc:.2f}, (n={len(accs)}) {task.metric}")
|
88 |
|
89 |
return self(
|
90 |
eval_name=result_key,
|