update jer metric to add equality operation argument
Browse files
jer.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# limitations under the License.
|
14 |
"""TODO: Add a description here."""
|
15 |
|
|
|
16 |
from typing import Iterable
|
17 |
|
18 |
import evaluate
|
@@ -43,17 +44,16 @@ Args:
|
|
43 |
should be a string with tokens separated by spaces.
|
44 |
references: list of reference for each prediction. Each
|
45 |
reference should be a string with tokens separated by spaces.
|
|
|
46 |
Returns:
|
47 |
-
|
48 |
-
|
|
|
49 |
Examples:
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
>>> my_new_module = evaluate.load("my_new_module")
|
54 |
-
>>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
|
55 |
>>> print(results)
|
56 |
-
{'
|
57 |
"""
|
58 |
|
59 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
@@ -85,7 +85,7 @@ class jer(evaluate.Metric):
|
|
85 |
# TODO: Download external resources if needed
|
86 |
pass
|
87 |
|
88 |
-
def _compute(self, predictions, references):
|
89 |
"""Returns the scores"""
|
90 |
score_dicts = [
|
91 |
self._compute_single(prediction=prediction, reference=reference)
|
@@ -93,22 +93,28 @@ class jer(evaluate.Metric):
|
|
93 |
]
|
94 |
return {('mean_' + key): np.mean([scores[key] for scores in score_dicts]) for key in score_dicts[0].keys()}
|
95 |
|
96 |
-
def _compute_single(self, *, prediction: Iterable[str | tuple | int], reference: Iterable[str | tuple | int]):
|
97 |
reference_set = set(reference)
|
98 |
assert len(reference) == len(reference_set), f"Duplicates found in the reference list {reference}"
|
99 |
prediction_set = set(prediction)
|
100 |
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
|
105 |
# Calculate metrics
|
106 |
-
precision =
|
107 |
-
recall =
|
108 |
f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
|
109 |
|
110 |
return {
|
111 |
'precision': precision,
|
112 |
'recall': recall,
|
113 |
'f1': f1_score
|
114 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# limitations under the License.
|
14 |
"""TODO: Add a description here."""
|
15 |
|
16 |
+
from operator import eq
|
17 |
from typing import Iterable
|
18 |
|
19 |
import evaluate
|
|
|
44 |
should be a string with tokens separated by spaces.
|
45 |
references: list of reference for each prediction. Each
|
46 |
reference should be a string with tokens separated by spaces.
|
47 |
+
eq_fn: function to compare two items. Defaults to the equality operator.
|
48 |
Returns:
|
49 |
+
recall:
|
50 |
+
precision:
|
51 |
+
f1:
|
52 |
Examples:
|
53 |
+
>>> jer = evaluate.load("jer")
|
54 |
+
>>> results = jer.compute(references=[["Baris | play | tennis", "Deniz | travel | London"]], predictions=[["Baris | play | tennis"]])
|
|
|
|
|
|
|
55 |
>>> print(results)
|
56 |
+
{'recall': 0.5, 'precision': 1.0, 'f1': 0.6666666666666666}
|
57 |
"""
|
58 |
|
59 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
|
|
85 |
# TODO: Download external resources if needed
|
86 |
pass
|
87 |
|
88 |
+
def _compute(self, predictions, references, eq_fn=eq):
|
89 |
"""Returns the scores"""
|
90 |
score_dicts = [
|
91 |
self._compute_single(prediction=prediction, reference=reference)
|
|
|
93 |
]
|
94 |
return {('mean_' + key): np.mean([scores[key] for scores in score_dicts]) for key in score_dicts[0].keys()}
|
95 |
|
96 |
+
def _compute_single(self, *, prediction: Iterable[str | tuple | int], reference: Iterable[str | tuple | int], eq_fn=eq):
|
97 |
reference_set = set(reference)
|
98 |
assert len(reference) == len(reference_set), f"Duplicates found in the reference list {reference}"
|
99 |
prediction_set = set(prediction)
|
100 |
|
101 |
+
tp = sum(int(is_in(item, prediction, eq_fn=eq_fn)) for item in reference)
|
102 |
+
fp = len(prediction_set) - tp
|
103 |
+
fn = len(reference_set) - tp
|
104 |
|
105 |
# Calculate metrics
|
106 |
+
precision = tp / (tp + fp) if tp + fp > 0 else 0
|
107 |
+
recall = tp / (tp + fn) if tp + fn > 0 else 0
|
108 |
f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
|
109 |
|
110 |
return {
|
111 |
'precision': precision,
|
112 |
'recall': recall,
|
113 |
'f1': f1_score
|
114 |
+
}
|
115 |
+
|
116 |
+
def is_in(target, collection: Iterable, eq_fn=eq) -> bool:
|
117 |
+
for item in collection:
|
118 |
+
if eq_fn(item, target):
|
119 |
+
return True
|
120 |
+
return False
|