Spaces:

maqiuping59
/

table_markdown

Running

File size: 6,649 Bytes

import re
import json
import evaluate
import datasets
import pprint

_DESCRIPTION = """
Table evaluation metrics for assessing the matching degree between predicted and reference tables. It calculates the following metrics:

1. Precision: The ratio of correctly predicted cells to the total number of cells in the predicted table
2. Recall: The ratio of correctly predicted cells to the total number of cells in the reference table
3. F1 Score: The harmonic mean of precision and recall

These metrics help evaluate the accuracy of table data extraction or generation.
"""


_KWARGS_DESCRIPTION = """
Args:
    predictions (`str`): Predicted table in Markdown format.
    references (`str`): Reference table in Markdown format.

Returns:
    dict: A dictionary containing the following metrics:
        - precision (`float`): Precision score, range [0,1]
        - recall (`float`): Recall score, range [0,1]
        - f1 (`float`): F1 score, range [0,1]
        - true_positives (`int`): Number of correctly predicted cells
        - false_positives (`int`): Number of incorrectly predicted cells
        - false_negatives (`int`): Number of cells that were not predicted

Examples:
    >>> accuracy_metric = evaluate.load("accuracy")
    >>> results = accuracy_metric.compute(
    ...     predictions="|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |",
    ...     references="|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 1 | 6 | 7 | 5 | 9 || wage | 1 | 5 | 2 | 8 | 5 |"
    ... )
    >>> print(results)
    {'precision': 0.7, 'recall': 0.7, 'f1': 0.7, 'true_positives': 7, 'false_positives': 3, 'false_negatives': 3}
"""


_CITATION = """

"""


    
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class Accuracy(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "predictions": datasets.Value("string"),
                    "references": datasets.Value("string"),
                }
            ),
            reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html"],
        )
    def _extract_markdown_table(self,text):
        text = text.replace('\n', '')
        text = text.replace(" ","")
        if text.startswith("||"):
            text = "|header"+text[1:]
        pattern = r'\|(?:[^|]+\|)+[^|]+\|'
        matches = re.findall(pattern, text)
        
        if matches:
            return ''.join(matches)
        
        return None
    
    def _table_prase(self,table_str):
        print(table_str)
        table_str = table_str.split("||")
        table_str = [item.strip("|").split("|") for item in table_str]
        parse_result = []
        for i in range(len(table_str)):
            pre_row = table_str[i]
            for j in range(len(pre_row)):
                index = []
                try:
                    value = float(pre_row[j])
                    try:
                        float(pre_row[0])
                    except ValueError as e:
                        index.append(pre_row[0])
                    try:
                        float(table_str[0][j])
                    except ValueError as e:
                        index.append(table_str[0][j])
                    if len(index)>0:
                        parse_result.append([set(index),value])
                except:
                    continue

        return parse_result

        
        return None
    def _markdown_to_table(self,markdown_str):
        table_str = self._extract_markdown_table(markdown_str)
        if table_str:
            parse_result = self._table_prase(table_str)
            return parse_result

        return None
    
    def _calculate_table_metrics(self, pred_table, true_table):
        true_positives = 0
        false_positives = 0
        false_negatives = 0

        # print(f"pred_table:{pred_table}")
        # print(f"true_table:{true_table}")

        # Convert lists to dictionaries for easier comparison
        pred_dict = {tuple(sorted(item[0])): item[1] for item in pred_table}
        true_dict = {tuple(sorted(item[0])): item[1] for item in true_table}

        # pprint.pprint(f"pred_dict:{pred_dict}")
        # pprint.pprint(f"true_dict:{true_dict}")

        # Compare predictions with true values
        for key, pred_value in pred_dict.items():
            if key in true_dict:
                true_value = true_dict[key]
                if true_value == 0 and abs(pred_value) < 0.05:
                    true_positives += 1
                elif true_value != 0 and abs((pred_value - true_value) / true_value) < 0.05:
                    true_positives += 1
                else:
                    false_positives += 1
                    false_negatives += 1
            else:
                false_positives += 1

        # Count false negatives (items in true table but not in predictions)
        for key in true_dict:
            if key not in pred_dict:
                false_negatives += 1

        precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
        recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        return {
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'true_positives': true_positives,
            'false_positives': false_positives,
            'false_negatives': false_negatives
        }

    def _compute(self, predictions, references):
        predictions = "".join(predictions)
        references = "".join(references)
        # print(predictions)
        return self._calculate_table_metrics(self._markdown_to_table(predictions), self._markdown_to_table(references))

def main():
    accuracy_metric = Accuracy()

    # 计算指标
    results = accuracy_metric.compute(
#         predictions=["""
#  check | Values || | 0 || lap | 0


# """],  # 预测的表格
        predictions=["""
||view|denial|finger|check|
|--|--|--|--|--|
|tour|9|8|4|7|
|wall|4|9|5|7|
|sex|2|6|3|1|

"""],  # 预测的表格
        references=["""
|check|lap|
|--|--|
|80|40|
"""],   # 参考的表格
    )
    print(results)  # 输出结果

if __name__ == '__main__':
    main()