Spaces:
Sleeping
Sleeping
DoctorSlimm
commited on
Commit
•
8b5d1da
1
Parent(s):
4b03595
save
Browse files
__pycache__/kaushiks_criteria.cpython-39.pyc
ADDED
Binary file (3.13 kB). View file
|
|
kaushiks_criteria.py
CHANGED
@@ -97,12 +97,84 @@ class kaushiks_criteria(evaluate.Metric):
|
|
97 |
def _download_and_prepare(self, dl_manager):
|
98 |
"""Optional: download external resources useful to compute the scores"""
|
99 |
# TODO: Download external resources if needed
|
|
|
|
|
100 |
pass
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
def _compute(self, predictions, references):
|
103 |
-
"""
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
return {
|
107 |
-
|
|
|
|
|
108 |
}
|
|
|
97 |
def _download_and_prepare(self, dl_manager):
|
98 |
"""Optional: download external resources useful to compute the scores"""
|
99 |
# TODO: Download external resources if needed
|
100 |
+
import evaluate
|
101 |
+
evaluate.load('exact_match')
|
102 |
pass
|
103 |
|
104 |
+
def normalize_fn(self, example, text_field='text'):
|
105 |
+
"""
|
106 |
+
parse output text into headers, rows, and records
|
107 |
+
- parse row by row (incomplete rows)
|
108 |
+
:param example:
|
109 |
+
:return:
|
110 |
+
Note: this does not handle special tokens
|
111 |
+
expected input format:
|
112 |
+
|
113 |
+
| col1 | col2 | col3 | <- start and trailing pipes required
|
114 |
+
| ---- | ---- | ---- | <- exactly 3x '-' per column
|
115 |
+
| val1 | val2 | val3 |
|
116 |
+
| ... | ... | ... |
|
117 |
+
"""
|
118 |
+
headers, sep_row, row_counts = "", "", []
|
119 |
+
|
120 |
+
rows = dict(example)[text_field].strip().split('\n')
|
121 |
+
|
122 |
+
# parse headers
|
123 |
+
if len(rows) > 0:
|
124 |
+
headers = rows[0].strip()
|
125 |
+
|
126 |
+
# parse separator row
|
127 |
+
if len(rows) > 1:
|
128 |
+
sep_row = rows[1].strip()
|
129 |
+
|
130 |
+
# parse row cell counts
|
131 |
+
if len(rows) > 2:
|
132 |
+
data_rows = rows[2:]
|
133 |
+
for row in data_rows:
|
134 |
+
cell_counts = len(row.strip('|').split('|'))
|
135 |
+
row_counts.append(str(int(cell_counts)))
|
136 |
+
return {
|
137 |
+
'headers': headers,
|
138 |
+
'sep_row': sep_row,
|
139 |
+
'row_counts': ''.join(row_counts)
|
140 |
+
}
|
141 |
+
|
142 |
def _compute(self, predictions, references):
|
143 |
+
"""
|
144 |
+
compute the quality of the output format with respect to the reference format
|
145 |
+
* column names match
|
146 |
+
* column order matches
|
147 |
+
* total row count
|
148 |
+
* number of cells in each row
|
149 |
+
:param predictions:
|
150 |
+
:param references:
|
151 |
+
:return:
|
152 |
+
"""
|
153 |
+
pred_ds = Dataset.from_dict({'text': predictions})
|
154 |
+
refs_ds = Dataset.from_dict({'text': references})
|
155 |
+
proc_ds = DatasetDict({'predictions': pred_ds, 'references': refs_ds})
|
156 |
+
proc_ds = proc_ds.map(self.normalize_fn, num_proc=num_proc)
|
157 |
+
|
158 |
+
# compare headers
|
159 |
+
exact_match = evaluate.load('exact_match')
|
160 |
+
exact_match_headers = exact_match.compute(
|
161 |
+
predictions=proc_ds['predictions']['headers'],
|
162 |
+
references=proc_ds['references']['headers']
|
163 |
+
)['exact_match']
|
164 |
+
|
165 |
+
# compare separator row
|
166 |
+
exact_match_sep_row = exact_match.compute(
|
167 |
+
predictions=proc_ds['predictions']['sep_row'],
|
168 |
+
references=proc_ds['references']['sep_row']
|
169 |
+
)['exact_match']
|
170 |
+
|
171 |
+
# compare row counts
|
172 |
+
exact_match_row_counts = exact_match.compute(
|
173 |
+
predictions=proc_ds['predictions']['row_counts'],
|
174 |
+
references=proc_ds['references']['row_counts']
|
175 |
+
)['exact_match']
|
176 |
return {
|
177 |
+
'exact_match_headers': exact_match_headers,
|
178 |
+
'exact_match_sep_row': exact_match_sep_row,
|
179 |
+
'exact_match_row_counts': exact_match_row_counts,
|
180 |
}
|