[email protected] commited on
Commit
36b6f3b
·
1 Parent(s): 7d43722
Files changed (2) hide show
  1. app.py +1 -1
  2. src/leaderboard/read_evals.py +3 -1
app.py CHANGED
@@ -89,7 +89,7 @@ def init_asset_plot(df):
89
  autosize=False,
90
  width=1000,
91
  height=700,
92
- title=f"Accuracy per Asset"
93
  )
94
  return fig
95
 
 
89
  autosize=False,
90
  width=1000,
91
  height=700,
92
+ title=f"Top 3 accuracies breakdown"
93
  )
94
  return fig
95
 
src/leaderboard/read_evals.py CHANGED
@@ -73,10 +73,12 @@ class EvalResult:
73
 
74
  # We average all scores of a given metric (not all metrics are present in all files)
75
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
 
76
  if accs.size == 0 or any([acc is None for acc in accs]):
77
  continue
78
  print(accs)
79
- mean_acc = np.mean(accs) * 100.0
 
80
  results[task.benchmark] = mean_acc
81
 
82
  return self(
 
73
 
74
  # We average all scores of a given metric (not all metrics are present in all files)
75
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
76
+ # if accs.size == 0 or any([acc is None for acc in accs]):
77
  if accs.size == 0 or any([acc is None for acc in accs]):
78
  continue
79
  print(accs)
80
+ mean_acc = np.nanmean(accs) * 100.0
81
+ # mean_acc = np.mean(accs) * 100.0
82
  results[task.benchmark] = mean_acc
83
 
84
  return self(