squad_precision_recall

Sleeping

App Files Files Community

omidf commited on Jan 31, 2023

Commit

163936c

1 Parent(s): 32e4ba0

Update compute_score.py

Browse files

Files changed (1) hide show

compute_score.py +38 -9

compute_score.py CHANGED Viewed

@@ -25,8 +25,17 @@ def normalize_answer(s):
     return white_space_fix(remove_articles(remove_punc(lower(s))))
-def f1_score(prediction, ground_truth):
     prediction_tokens = normalize_answer(prediction).split()
     ground_truth_tokens = normalize_answer(ground_truth).split()
     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
@@ -34,9 +43,20 @@ def f1_score(prediction, ground_truth):
     if num_same == 0:
         return 0
     precision = 1.0 * num_same / len(prediction_tokens)
-    recall = 1.0 * num_same / len(ground_truth_tokens)
-    f1 = (2 * precision * recall) / (precision + recall)
-    return f1
 def exact_match_score(prediction, ground_truth):
@@ -52,7 +72,7 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
 def compute_score(dataset, predictions):
-    f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article["paragraphs"]:
             for qa in paragraph["qas"]:
@@ -64,15 +84,24 @@ def compute_score(dataset, predictions):
                 ground_truths = list(map(lambda x: x["text"], qa["answers"]))
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
-                f1_temp = metric_max_over_ground_truths(f1_score, prediction, ground_truths)
-                print(f1_temp)
-                f1 += f1_temp
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
-    return {"exact_match": exact_match, "f1": f1}
 if __name__ == "__main__":

     return white_space_fix(remove_articles(remove_punc(lower(s))))
+def recall_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    return recall
+def precision_score(prediction, ground_truth):
     prediction_tokens = normalize_answer(prediction).split()
     ground_truth_tokens = normalize_answer(ground_truth).split()
     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
     if num_same == 0:
         return 0
     precision = 1.0 * num_same / len(prediction_tokens)
+    return precision
+# def f1_score(prediction, ground_truth):
+#     prediction_tokens = normalize_answer(prediction).split()
+#     ground_truth_tokens = normalize_answer(ground_truth).split()
+#     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+#     num_same = sum(common.values())
+#     if num_same == 0:
+#         return 0
+#     precision = 1.0 * num_same / len(prediction_tokens)
+#     recall = 1.0 * num_same / len(ground_truth_tokens)
+#     f1 = (2 * precision * recall) / (precision + recall)
+#     return f1
 def exact_match_score(prediction, ground_truth):
 def compute_score(dataset, predictions):
+    recall = precision = f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article["paragraphs"]:
             for qa in paragraph["qas"]:
                 ground_truths = list(map(lambda x: x["text"], qa["answers"]))
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
+                recall_temp = metric_max_over_ground_truths(recall_score, prediction, ground_truths)
+                precision_temp = metric_max_over_ground_truths(precision_score, prediction, ground_truths)
+                if recall_temp + precision_temp == 0:
+                    f1_temp = 0
+                else:
+                    f1_temp = (2 * precision * recall) / (precision + recall)
+                f1 += f1_temp
+                recall += recall_temp
+                precision += precision_temp
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
+    precision = 100.0 * precision / total
+    recall = 100.0 * recall / total
+    return {"exact_match": exact_match, "f1": f1, "recall": recall, "precision": precision}
 if __name__ == "__main__":