Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -109,6 +109,8 @@ def add_new_eval(
|
|
109 |
difficulty_scores = {"Easy": 0, "Medium": 0, "Hard": 0}
|
110 |
difficulty_counts = {"Easy": 0, "Medium": 0, "Hard": 0}
|
111 |
|
|
|
|
|
112 |
with open(f"scored/{organization}_{model_name}.jsonl", "w") as scored_file:
|
113 |
with open(file_path, 'r') as f:
|
114 |
for ix, line in enumerate(f):
|
@@ -139,6 +141,8 @@ def add_new_eval(
|
|
139 |
}) + "\n"
|
140 |
)
|
141 |
|
|
|
|
|
142 |
scores += score
|
143 |
num_questions += 1
|
144 |
difficulty_scores[difficulty] += score
|
@@ -156,10 +160,10 @@ def add_new_eval(
|
|
156 |
token=TOKEN
|
157 |
)
|
158 |
|
159 |
-
accuracy = float("{:.1f}".format(np.average([x["acc"] for x in
|
160 |
-
coverage = float("{:.1f}".format(np.average([x["has_ans"] for x in
|
161 |
-
em = float("{:.1f}".format(np.average([1 if x["acc"] == 1 else 0 for x in
|
162 |
-
precision = float("{:.1f}".format(np.average([x["acc"] for x in
|
163 |
|
164 |
eval_entry = {
|
165 |
"Model Name": model_name,
|
|
|
109 |
difficulty_scores = {"Easy": 0, "Medium": 0, "Hard": 0}
|
110 |
difficulty_counts = {"Easy": 0, "Medium": 0, "Hard": 0}
|
111 |
|
112 |
+
all_scores = list()
|
113 |
+
|
114 |
with open(f"scored/{organization}_{model_name}.jsonl", "w") as scored_file:
|
115 |
with open(file_path, 'r') as f:
|
116 |
for ix, line in enumerate(f):
|
|
|
141 |
}) + "\n"
|
142 |
)
|
143 |
|
144 |
+
all_scores.append({"score": score, "has_ans": has_ans})
|
145 |
+
|
146 |
scores += score
|
147 |
num_questions += 1
|
148 |
difficulty_scores[difficulty] += score
|
|
|
160 |
token=TOKEN
|
161 |
)
|
162 |
|
163 |
+
accuracy = float("{:.1f}".format(np.average([x["acc"] for x in all_scores]) * 100))
|
164 |
+
coverage = float("{:.1f}".format(np.average([x["has_ans"] for x in all_scores])))
|
165 |
+
em = float("{:.1f}".format(np.average([1 if x["acc"] == 1 else 0 for x in all_scores])))
|
166 |
+
precision = float("{:.1f}".format(np.average([x["acc"] for x in all_scores if x["has_ans"] == 1])))
|
167 |
|
168 |
eval_entry = {
|
169 |
"Model Name": model_name,
|