Spaces:

DoctorSlimm
/

kaushiks_criteria

Sleeping

App Files Files Community

drslimm commited on Feb 1, 2024

Commit

8b5d1da

1 Parent(s): 4b03595

save

Browse files

Files changed (2) hide show

__pycache__/kaushiks_criteria.cpython-39.pyc +0 -0
kaushiks_criteria.py +76 -4

__pycache__/kaushiks_criteria.cpython-39.pyc ADDED Viewed

Binary file (3.13 kB). View file

kaushiks_criteria.py CHANGED Viewed

@@ -97,12 +97,84 @@ class kaushiks_criteria(evaluate.Metric):
     def _download_and_prepare(self, dl_manager):
         """Optional: download external resources useful to compute the scores"""
         # TODO: Download external resources if needed
         pass
     def _compute(self, predictions, references):
-        """Returns the scores"""
-        # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
         return {
-            "accuracy": accuracy,
         }

     def _download_and_prepare(self, dl_manager):
         """Optional: download external resources useful to compute the scores"""
         # TODO: Download external resources if needed
+        import evaluate
+        evaluate.load('exact_match')
         pass
+    def normalize_fn(self, example, text_field='text'):
+        """
+        parse output text into headers, rows, and records
+        - parse row by row (incomplete rows)
+        :param example:
+        :return:
+        Note: this does not handle special tokens
+        expected input format:
+        | col1 | col2 | col3 |      <- start and trailing pipes required
+        | ---- | ---- | ---- |      <- exactly 3x '-' per column
+        | val1 | val2 | val3 |
+        | ... | ... | ... |
+        """
+        headers, sep_row, row_counts = "", "", []
+        rows = dict(example)[text_field].strip().split('\n')
+        # parse headers
+        if len(rows) > 0:
+            headers = rows[0].strip()
+        # parse separator row
+        if len(rows) > 1:
+            sep_row = rows[1].strip()
+        # parse row cell counts
+        if len(rows) > 2:
+            data_rows = rows[2:]
+            for row in data_rows:
+                cell_counts = len(row.strip('|').split('|'))
+                row_counts.append(str(int(cell_counts)))
+        return {
+            'headers': headers,
+            'sep_row': sep_row,
+            'row_counts': ''.join(row_counts)
+        }
     def _compute(self, predictions, references):
+        """
+        compute the quality of the output format with respect to the reference format
+        * column names match
+        * column order matches
+        * total row count
+        * number of cells in each row
+        :param predictions:
+        :param references:
+        :return:
+        """
+        pred_ds = Dataset.from_dict({'text': predictions})
+        refs_ds = Dataset.from_dict({'text': references})
+        proc_ds = DatasetDict({'predictions': pred_ds, 'references': refs_ds})
+        proc_ds = proc_ds.map(self.normalize_fn, num_proc=num_proc)
+        # compare headers
+        exact_match = evaluate.load('exact_match')
+        exact_match_headers = exact_match.compute(
+            predictions=proc_ds['predictions']['headers'],
+            references=proc_ds['references']['headers']
+        )['exact_match']
+        # compare separator row
+        exact_match_sep_row = exact_match.compute(
+            predictions=proc_ds['predictions']['sep_row'],
+            references=proc_ds['references']['sep_row']
+        )['exact_match']
+        # compare row counts
+        exact_match_row_counts = exact_match.compute(
+            predictions=proc_ds['predictions']['row_counts'],
+            references=proc_ds['references']['row_counts']
+        )['exact_match']
         return {
+            'exact_match_headers': exact_match_headers,
+            'exact_match_sep_row': exact_match_sep_row,
+            'exact_match_row_counts': exact_match_row_counts,
         }