add metric
Browse files
ecqa.py
CHANGED
@@ -15,7 +15,9 @@
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
-
|
|
|
|
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
@@ -56,6 +58,34 @@ Examples:
|
|
56 |
# TODO: Define external resources urls if needed
|
57 |
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
61 |
class ecqa(evaluate.Metric):
|
@@ -85,11 +115,59 @@ class ecqa(evaluate.Metric):
|
|
85 |
"""Optional: download external resources useful to compute the scores"""
|
86 |
# TODO: Download external resources if needed
|
87 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
"""Returns the scores"""
|
91 |
# TODO: Compute the different scores of the module
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
return {
|
94 |
-
"
|
|
|
|
|
95 |
}
|
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
+
import re
|
19 |
+
import string
|
20 |
+
from collections import Counter
|
21 |
|
22 |
# TODO: Add BibTeX citation
|
23 |
_CITATION = """\
|
|
|
58 |
# TODO: Define external resources urls if needed
|
59 |
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
60 |
|
61 |
+
def remove_(text: str)-> str:
|
62 |
+
''' ๋ถํ์ํ ๊ธฐํธ ์ ๊ฑฐ '''
|
63 |
+
text = re.sub("'", " ", text)
|
64 |
+
text = re.sub('"', " ", text)
|
65 |
+
text = re.sub('ใ', " ", text)
|
66 |
+
text = re.sub('ใ', " ", text)
|
67 |
+
text = re.sub('<', " ", text)
|
68 |
+
text = re.sub('>', " ", text)
|
69 |
+
text = re.sub('ใ', " ", text)
|
70 |
+
text = re.sub('ใ', " ", text)
|
71 |
+
text = re.sub("\(", " ", text)
|
72 |
+
text = re.sub("\)", " ", text)
|
73 |
+
text = re.sub("โ", " ", text)
|
74 |
+
text = re.sub("โ", " ", text)
|
75 |
+
return text
|
76 |
+
|
77 |
+
def white_space_fix(text: str)-> str:
|
78 |
+
'''์ฐ์๋ ๊ณต๋ฐฑ์ผ ๊ฒฝ์ฐ ํ๋์ ๊ณต๋ฐฑ์ผ๋ก ๋์ฒด'''
|
79 |
+
return ' '.join(text.split())
|
80 |
+
|
81 |
+
def remove_punc(text: str)-> str:
|
82 |
+
'''๊ตฌ๋์ ์ ๊ฑฐ'''
|
83 |
+
exclude = set(string.punctuation)
|
84 |
+
return ''.join(ch for ch in text if ch not in exclude)
|
85 |
+
|
86 |
+
def lower(text: str)-> str:
|
87 |
+
'''์๋ฌธ์ ์ ํ'''
|
88 |
+
return text.lower()
|
89 |
|
90 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
91 |
class ecqa(evaluate.Metric):
|
|
|
115 |
"""Optional: download external resources useful to compute the scores"""
|
116 |
# TODO: Download external resources if needed
|
117 |
pass
|
118 |
+
|
119 |
+
def __normalize(self, text: str):
|
120 |
+
text = remove_(text)
|
121 |
+
text = lower(text)
|
122 |
+
text = remove_punc(text)
|
123 |
+
return white_space_fix(text)
|
124 |
+
|
125 |
+
def __compute_f1(self, prediction: str, reference: str)-> tuple[float, float, float]:
|
126 |
+
predicted_tokens = prediction.split()
|
127 |
+
referenced_tokens = reference.split()
|
128 |
+
|
129 |
+
predictied_chars = []
|
130 |
+
for token in predicted_tokens:
|
131 |
+
predictied_chars += [char for char in token]
|
132 |
|
133 |
+
referenced_chars = []
|
134 |
+
for token in referenced_tokens:
|
135 |
+
referenced_chars += [char for char in token]
|
136 |
+
|
137 |
+
true_positive = Counter(predictied_chars) & Counter(referenced_chars)
|
138 |
+
n_true_positive = sum(true_positive.values())
|
139 |
+
precision = 1.0 * n_true_positive / len(predictied_chars)
|
140 |
+
recall = 1.0 * n_true_positive / len(referenced_chars)
|
141 |
+
f1 = (2 * precision * recall) / (precision + recall)
|
142 |
+
|
143 |
+
return f1, recall, precision
|
144 |
+
def _compute(self, predictions: list[str], references: list[str]):
|
145 |
"""Returns the scores"""
|
146 |
# TODO: Compute the different scores of the module
|
147 |
+
assert isinstance(predictions, list)
|
148 |
+
assert isinstance(references, list)
|
149 |
+
assert len(predictions) == len(references)
|
150 |
+
f1_acc = precision_acc = recall_acc = total = 0
|
151 |
+
for prediction, reference in zip(predictions, references):
|
152 |
+
total += 1
|
153 |
+
f1_computed, precision_computed, recall_computed = self.__compute_f1(prediction, reference)
|
154 |
+
|
155 |
+
f1_acc += f1_computed
|
156 |
+
precision_acc += precision_computed
|
157 |
+
recall_acc += recall_computed
|
158 |
+
|
159 |
+
f1, precision, recall = [
|
160 |
+
# average
|
161 |
+
100.0 * computed / total
|
162 |
+
|
163 |
+
for computed in [
|
164 |
+
f1_acc,
|
165 |
+
precision_acc,
|
166 |
+
recall_acc
|
167 |
+
]
|
168 |
+
]
|
169 |
return {
|
170 |
+
"f1": f1,
|
171 |
+
"precision": precision,
|
172 |
+
"recall": recall
|
173 |
}
|