File size: 4,276 Bytes
9bf4bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# Copyright (c) OpenMMLab. All rights reserved.
import json
from typing import List

from mmocr.registry import DATA_PACKERS
from .base import BasePacker


@DATA_PACKERS.register_module()
class WildReceiptPacker(BasePacker):
    """Pack the wildreceipt annotation to MMOCR format.

    Args:
        merge_bg_others (bool): If True, give the same label to "background"
                class and "others" class. Defaults to True.
        ignore_idx (int): Index for ``ignore`` class. Defaults to 0.
        others_idx (int): Index for ``others`` class. Defaults to 25.
    """

    def __init__(self,
                 merge_bg_others: bool = False,
                 ignore_idx: int = 0,
                 others_idx: int = 25,
                 **kwargs) -> None:
        super().__init__(**kwargs)

        self.ignore_idx = ignore_idx
        self.others_idx = others_idx
        self.merge_bg_others = merge_bg_others

    def add_meta(self, samples: List) -> List:
        """No meta info is required for the wildreceipt dataset."""
        return samples

    def pack_instance(self, sample: str):
        """Pack line-json str of close set to line-json str of open set.

        Args:
            sample (str): The string to be deserialized to
                the close set dictionary object.
            split (str): The split of the instance.
        """
        # Two labels at the same index of the following two lists
        # make up a key-value pair. For example, in wildreceipt,
        # closeset_key_inds[0] maps to "Store_name_key"
        # and closeset_value_inds[0] maps to "Store_addr_value".
        closeset_key_inds = list(range(2, self.others_idx, 2))
        closeset_value_inds = list(range(1, self.others_idx, 2))

        openset_node_label_mapping = {
            'bg': 0,
            'key': 1,
            'value': 2,
            'others': 3
        }
        if self.merge_bg_others:
            openset_node_label_mapping['others'] = openset_node_label_mapping[
                'bg']

        closeset_obj = json.loads(sample)
        openset_obj = {
            'file_name':
            closeset_obj['file_name'].replace(self.data_root + '/', ''),
            'height':
            closeset_obj['height'],
            'width':
            closeset_obj['width'],
            'annotations': []
        }

        edge_idx = 1
        label_to_edge = {}
        for anno in closeset_obj['annotations']:
            label = anno['label']
            if label == self.ignore_idx:
                anno['label'] = openset_node_label_mapping['bg']
                anno['edge'] = edge_idx
                edge_idx += 1
            elif label == self.others_idx:
                anno['label'] = openset_node_label_mapping['others']
                anno['edge'] = edge_idx
                edge_idx += 1
            else:
                edge = label_to_edge.get(label, None)
                if edge is not None:
                    anno['edge'] = edge
                    if label in closeset_key_inds:
                        anno['label'] = openset_node_label_mapping['key']
                    elif label in closeset_value_inds:
                        anno['label'] = openset_node_label_mapping['value']
                else:
                    tmp_key = 'key'
                    if label in closeset_key_inds:
                        label_with_same_edge = closeset_value_inds[
                            closeset_key_inds.index(label)]
                    elif label in closeset_value_inds:
                        label_with_same_edge = closeset_key_inds[
                            closeset_value_inds.index(label)]
                        tmp_key = 'value'
                    edge_counterpart = label_to_edge.get(
                        label_with_same_edge, None)
                    if edge_counterpart is not None:
                        anno['edge'] = edge_counterpart
                    else:
                        anno['edge'] = edge_idx
                        edge_idx += 1
                    anno['label'] = openset_node_label_mapping[tmp_key]
                    label_to_edge[label] = anno['edge']

        openset_obj['annotations'] = closeset_obj['annotations']

        return json.dumps(openset_obj, ensure_ascii=False)