Spaces:

maqiuping59
/

table_markdown

Running

App Files Files Community

maqiuping59 commited on May 29

Commit

41b45e8

verified ·

1 Parent(s): 980aa29

Update metric.py

Browse files

Files changed (1) hide show

metric.py +268 -86

metric.py CHANGED Viewed

@@ -1,9 +1,205 @@
 import re
 import json
 import evaluate
 import datasets
-from typing import Set, Tuple, List, Dict, Any
-from dataclasses import dataclass
 _DESCRIPTION = """
 Table evaluation metrics for assessing the matching degree between predicted and reference tables. It calculates the following metrics:
@@ -42,23 +238,21 @@ Examples:
 _CITATION = """
 """
-@dataclass(frozen=True)
-class TableCell:
-    labels: frozenset[str]  # Using frozenset for hashable unordered pair
-    value: float
-    def __eq__(self, other):
-        if not isinstance(other, TableCell):
-            return False
-        return self.labels == other.labels and abs(self.value - other.value) < 0.05
-    def __hash__(self):
-        return hash((self.labels, round(self.value, 3)))  # Round to handle float comparison
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class Accuracy(evaluate.Metric):
     def _info(self):
@@ -85,66 +279,67 @@ class Accuracy(evaluate.Metric):
         return None
-    def _table_to_cell_set(self, table_str: str) -> Set[TableCell]:
-        """Convert markdown table string to a set of TableCell objects."""
-        result_set = set()
         table_str = table_str.lstrip("|").rstrip("|")
         parts = table_str.split('||')
         parts = [part for part in parts if "--" not in part]
-        if not parts:
-            return result_set
         legends = parts[0].split("|")
-        legends = [l.strip() for l in legends if l.strip()]
         rows = len(parts)
-        if rows == 2:  # Single row table - use single label
             nums = parts[1].split("|")
-            nums = [n.strip() for n in nums if n.strip()]
-            for i, num in enumerate(nums):
-                try:
-                    value = float(num)
-                    # For single row tables, use a single label
-                    cell = TableCell(frozenset([legends[i]]), value)
-                    result_set.add(cell)
-                except ValueError:
-                    continue
-        elif rows >= 3:  # Multi-row table - use label pairs
-            for i in range(1, rows):
-                row = parts[i].split("|")
-                row = [r.strip() for r in row if r.strip()]
-                if not row:
-                    continue
-                row_label = row[0]
-                for j, num in enumerate(row[1:], 1):
-                    if j >= len(legends):
-                        continue
-                    try:
-                        value = float(num)
-                        # For multi-row tables, use label pairs
-                        cell = TableCell(frozenset([row_label, legends[j-1]]), value)
-                        result_set.add(cell)
-                    except ValueError:
-                        continue
-        return result_set
-    def _markdown_to_cell_set(self, markdown_str: str) -> Set[TableCell]:
-        """Convert markdown string to a set of TableCell objects."""
         table_str = self._extract_markdown_table(markdown_str)
         if table_str:
-            return self._table_to_cell_set(table_str)
-        return set()
-    def _calculate_table_metrics(self, pred_cells: Set[TableCell], true_cells: Set[TableCell]) -> Dict[str, Any]:
-        """Calculate metrics using cell set comparison."""
-        true_positives = len(pred_cells.intersection(true_cells))
-        false_positives = len(pred_cells - true_cells)
-        false_negatives = len(true_cells - pred_cells)
         precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
         recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
         f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
@@ -161,39 +356,26 @@ class Accuracy(evaluate.Metric):
     def _compute(self, predictions, references):
         predictions = "".join(predictions)
         references = "".join(references)
-        pred_cells = self._markdown_to_cell_set(predictions)
-        true_cells = self._markdown_to_cell_set(references)
-        return self._calculate_table_metrics(pred_cells, true_cells)
 def main():
     accuracy_metric = Accuracy()
-    # Test with different table formats
-    # Test 1: Single row table
-    results1 = accuracy_metric.compute(
-        predictions=["""
-|  | value1 | value2 | value3 ||--|--|--|--|| data | 1.01 | 2 | 3 |
-"""],
-        references=["""
-|  | value1 | value2 | value3 ||--|--|--|--|| data | 1 | 2 | 3 |
-"""],
-    )
-    print("Single row table test:", results1)
-    # Test 2: Multi-row table (transposed)
-    results2 = accuracy_metric.compute(
         predictions=["""
-|  | desire | wage ||--|--|--|| lobby | 5.01 | 1 || search | 8 | 5 || band | 7 | 3 || charge | 5 | 8 || chain | 9 | 5 |
-"""],
         references=["""
-|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5.01 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |
-"""],
     )
-    print("Multi-row table test:", results2)
 if __name__ == '__main__':
     main()

+# import re
+# import json
+# import evaluate
+# import datasets
+# from typing import Set, Tuple, List, Dict, Any
+# from dataclasses import dataclass
+# _DESCRIPTION = """
+# Table evaluation metrics for assessing the matching degree between predicted and reference tables. It calculates the following metrics:
+# 1. Precision: The ratio of correctly predicted cells to the total number of cells in the predicted table
+# 2. Recall: The ratio of correctly predicted cells to the total number of cells in the reference table
+# 3. F1 Score: The harmonic mean of precision and recall
+# These metrics help evaluate the accuracy of table data extraction or generation.
+# """
+# _KWARGS_DESCRIPTION = """
+# Args:
+#     predictions (`str`): Predicted table in Markdown format.
+#     references (`str`): Reference table in Markdown format.
+# Returns:
+#     dict: A dictionary containing the following metrics:
+#         - precision (`float`): Precision score, range [0,1]
+#         - recall (`float`): Recall score, range [0,1]
+#         - f1 (`float`): F1 score, range [0,1]
+#         - true_positives (`int`): Number of correctly predicted cells
+#         - false_positives (`int`): Number of incorrectly predicted cells
+#         - false_negatives (`int`): Number of cells that were not predicted
+# Examples:
+#     >>> accuracy_metric = evaluate.load("accuracy")
+#     >>> results = accuracy_metric.compute(
+#     ...     predictions="|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |",
+#     ...     references="|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 1 | 6 | 7 | 5 | 9 || wage | 1 | 5 | 2 | 8 | 5 |"
+#     ... )
+#     >>> print(results)
+#     {'precision': 0.7, 'recall': 0.7, 'f1': 0.7, 'true_positives': 7, 'false_positives': 3, 'false_negatives': 3}
+# """
+# _CITATION = """
+# """
+# @dataclass(frozen=True)
+# class TableCell:
+#     labels: frozenset[str]  # Using frozenset for hashable unordered pair
+#     value: float
+#     def __eq__(self, other):
+#         if not isinstance(other, TableCell):
+#             return False
+#         return self.labels == other.labels and abs(self.value - other.value) < 0.05
+#     def __hash__(self):
+#         return hash((self.labels, round(self.value, 3)))  # Round to handle float comparison
+# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+# class Accuracy(evaluate.Metric):
+#     def _info(self):
+#         return evaluate.MetricInfo(
+#             description=_DESCRIPTION,
+#             citation=_CITATION,
+#             inputs_description=_KWARGS_DESCRIPTION,
+#             features=datasets.Features(
+#                 {
+#                     "predictions": datasets.Value("string"),
+#                     "references": datasets.Value("string"),
+#                 }
+#             ),
+#             reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html"],
+#         )
+#     def _extract_markdown_table(self,text):
+#         text = text.replace('\n', '')
+#         text = text.replace(" ","")
+#         pattern = r'\|(?:[^|]+\|)+[^|]+\|'
+#         matches = re.findall(pattern, text)
+#         if matches:
+#             return ''.join(matches)
+#         return None
+#     def _table_to_cell_set(self, table_str: str) -> Set[TableCell]:
+#         """Convert markdown table string to a set of TableCell objects."""
+#         result_set = set()
+#         table_str = table_str.lstrip("|").rstrip("|")
+#         parts = table_str.split('||')
+#         parts = [part for part in parts if "--" not in part]
+#         if not parts:
+#             return result_set
+#         legends = parts[0].split("|")
+#         legends = [l.strip() for l in legends if l.strip()]
+#         rows = len(parts)
+#         if rows == 2:  # Single row table - use single label
+#             nums = parts[1].split("|")
+#             nums = [n.strip() for n in nums if n.strip()]
+#             for i, num in enumerate(nums):
+#                 try:
+#                     value = float(num)
+#                     # For single row tables, use a single label
+#                     cell = TableCell(frozenset([legends[i]]), value)
+#                     result_set.add(cell)
+#                 except ValueError:
+#                     continue
+#         elif rows >= 3:  # Multi-row table - use label pairs
+#             for i in range(1, rows):
+#                 row = parts[i].split("|")
+#                 row = [r.strip() for r in row if r.strip()]
+#                 if not row:
+#                     continue
+#                 row_label = row[0]
+#                 for j, num in enumerate(row[1:], 1):
+#                     if j >= len(legends):
+#                         continue
+#                     try:
+#                         value = float(num)
+#                         # For multi-row tables, use label pairs
+#                         cell = TableCell(frozenset([row_label, legends[j-1]]), value)
+#                         result_set.add(cell)
+#                     except ValueError:
+#                         continue
+#         return result_set
+#     def _markdown_to_cell_set(self, markdown_str: str) -> Set[TableCell]:
+#         """Convert markdown string to a set of TableCell objects."""
+#         table_str = self._extract_markdown_table(markdown_str)
+#         if table_str:
+#             return self._table_to_cell_set(table_str)
+#         return set()
+#     def _calculate_table_metrics(self, pred_cells: Set[TableCell], true_cells: Set[TableCell]) -> Dict[str, Any]:
+#         """Calculate metrics using cell set comparison."""
+#         true_positives = len(pred_cells.intersection(true_cells))
+#         false_positives = len(pred_cells - true_cells)
+#         false_negatives = len(true_cells - pred_cells)
+#         precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
+#         recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
+#         f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+#         return {
+#             'precision': precision,
+#             'recall': recall,
+#             'f1': f1,
+#             'true_positives': true_positives,
+#             'false_positives': false_positives,
+#             'false_negatives': false_negatives
+#         }
+#     def _compute(self, predictions, references):
+#         predictions = "".join(predictions)
+#         references = "".join(references)
+#         pred_cells = self._markdown_to_cell_set(predictions)
+#         true_cells = self._markdown_to_cell_set(references)
+#         return self._calculate_table_metrics(pred_cells, true_cells)
+# def main():
+#     accuracy_metric = Accuracy()
+#     # Test with different table formats
+#     # Test 1: Single row table
+#     results1 = accuracy_metric.compute(
+#         predictions=["""
+# |  | value1 | value2 | value3 ||--|--|--|--|| data | 1.01 | 2 | 3 |
+# """],
+#         references=["""
+# |  | value1 | value2 | value3 ||--|--|--|--|| data | 1 | 2 | 3 |
+# """],
+#     )
+#     print("Single row table test:", results1)
+#     # Test 2: Multi-row table (transposed)
+#     results2 = accuracy_metric.compute(
+#         predictions=["""
+# |  | desire | wage ||--|--|--|| lobby | 5.01 | 1 || search | 8 | 5 || band | 7 | 3 || charge | 5 | 8 || chain | 9 | 5 |
+# """],
+#         references=["""
+# |  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5.01 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |
+# """],
+#     )
+#     print("Multi-row table test:", results2)
+# if __name__ == '__main__':
+#     main()
 import re
 import json
 import evaluate
 import datasets
 _DESCRIPTION = """
 Table evaluation metrics for assessing the matching degree between predicted and reference tables. It calculates the following metrics:
 _CITATION = """
+@article{scikit-learn,
+  title={Scikit-learn: Machine Learning in {P}ython},
+  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+  journal={Journal of Machine Learning Research},
+  volume={12},
+  pages={2825--2830},
+  year={2011}
+}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class Accuracy(evaluate.Metric):
     def _info(self):
         return None
+    def _table_to_dict(self,table_str):
+        result_dict = {}
         table_str = table_str.lstrip("|").rstrip("|")
         parts = table_str.split('||')
         parts = [part for part in parts if "--" not in part]
         legends = parts[0].split("|")
         rows = len(parts)
+        if rows == 2:
             nums = parts[1].split("|")
+            for i in range(len(nums)):
+                result_dict[legends[i]]=float(nums[i])
+        elif rows >=3:
+            for i in range(1,rows):
+                pre_row = parts[i]
+                pre_row = pre_row.split("|")
+                label = pre_row[0]
+                result_dict[label] = {}
+                for j in range(1,len(pre_row)):
+                    result_dict[label][legends[j-1]] = float(pre_row[j])
+        else:
+            return None
+        return result_dict
+    def _markdown_to_dict(self,markdown_str):
         table_str = self._extract_markdown_table(markdown_str)
         if table_str:
+            return self._table_to_dict(table_str)
+        else:
+            return None
+    def _calculate_table_metrics(self,pred_table, true_table):
+        true_positives = 0
+        false_positives = 0
+        false_negatives = 0
+        # 遍历预测表格的所有键值对
+        for key, pred_value in pred_table.items():
+            if key in true_table:
+                true_value = true_table[key]
+                if isinstance(pred_value, dict) and isinstance(true_value, dict):
+                    nested_metrics = self._calculate_table_metrics(pred_value, true_value)
+                    true_positives += nested_metrics['true_positives']
+                    false_positives += nested_metrics['false_positives']
+                    false_negatives += nested_metrics['false_negatives']
+                # 如果值相等
+                elif pred_value == true_value:
+                    true_positives += 1
+                else:
+                    false_positives += 1
+                    false_negatives += 1
+            else:
+                false_positives += 1
+        # 计算未匹配的真实值
+        for key in true_table:
+            if key not in pred_table:
+                false_negatives += 1
+        # 计算指标
         precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
         recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
         f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
     def _compute(self, predictions, references):
         predictions = "".join(predictions)
         references = "".join(references)
+        return self._calculate_table_metrics(self._markdown_to_dict(predictions), self._markdown_to_dict(references))
 def main():
     accuracy_metric = Accuracy()
+    # 计算指标
+    results = accuracy_metric.compute(
         predictions=["""
+|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |
+"""],  # 预测的表格
         references=["""
+|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 1 | 6 | 7 | 5 | 9 || wage | 1 | 5 | 2 | 8 | 5 |
+"""],   # 参考的表格
     )
+    print(results)  # 输出结果
 if __name__ == '__main__':
     main()