samuelam commited on
Commit
78bcf13
1 Parent(s): 7ec3506

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -109,6 +109,8 @@ def add_new_eval(
109
  difficulty_scores = {"Easy": 0, "Medium": 0, "Hard": 0}
110
  difficulty_counts = {"Easy": 0, "Medium": 0, "Hard": 0}
111
 
 
 
112
  with open(f"scored/{organization}_{model_name}.jsonl", "w") as scored_file:
113
  with open(file_path, 'r') as f:
114
  for ix, line in enumerate(f):
@@ -139,6 +141,8 @@ def add_new_eval(
139
  }) + "\n"
140
  )
141
 
 
 
142
  scores += score
143
  num_questions += 1
144
  difficulty_scores[difficulty] += score
@@ -156,10 +160,10 @@ def add_new_eval(
156
  token=TOKEN
157
  )
158
 
159
- accuracy = float("{:.1f}".format(np.average([x["acc"] for x in scored_file]) * 100))
160
- coverage = float("{:.1f}".format(np.average([x["has_ans"] for x in scored_file])))
161
- em = float("{:.1f}".format(np.average([1 if x["acc"] == 1 else 0 for x in scored_file])))
162
- precision = float("{:.1f}".format(np.average([x["acc"] for x in scored_file if x["has_ans"] == 1])))
163
 
164
  eval_entry = {
165
  "Model Name": model_name,
 
109
  difficulty_scores = {"Easy": 0, "Medium": 0, "Hard": 0}
110
  difficulty_counts = {"Easy": 0, "Medium": 0, "Hard": 0}
111
 
112
+ all_scores = list()
113
+
114
  with open(f"scored/{organization}_{model_name}.jsonl", "w") as scored_file:
115
  with open(file_path, 'r') as f:
116
  for ix, line in enumerate(f):
 
141
  }) + "\n"
142
  )
143
 
144
+ all_scores.append({"score": score, "has_ans": has_ans})
145
+
146
  scores += score
147
  num_questions += 1
148
  difficulty_scores[difficulty] += score
 
160
  token=TOKEN
161
  )
162
 
163
+ accuracy = float("{:.1f}".format(np.average([x["acc"] for x in all_scores]) * 100))
164
+ coverage = float("{:.1f}".format(np.average([x["has_ans"] for x in all_scores])))
165
+ em = float("{:.1f}".format(np.average([1 if x["acc"] == 1 else 0 for x in all_scores])))
166
+ precision = float("{:.1f}".format(np.average([x["acc"] for x in all_scores if x["has_ans"] == 1])))
167
 
168
  eval_entry = {
169
  "Model Name": model_name,