Spaces:
Sleeping
Sleeping
# Copyright (c) OpenMMLab. All rights reserved. | |
import json | |
from typing import List | |
from mmocr.registry import DATA_PACKERS | |
from .base import BasePacker | |
class WildReceiptPacker(BasePacker): | |
"""Pack the wildreceipt annotation to MMOCR format. | |
Args: | |
merge_bg_others (bool): If True, give the same label to "background" | |
class and "others" class. Defaults to True. | |
ignore_idx (int): Index for ``ignore`` class. Defaults to 0. | |
others_idx (int): Index for ``others`` class. Defaults to 25. | |
""" | |
def __init__(self, | |
merge_bg_others: bool = False, | |
ignore_idx: int = 0, | |
others_idx: int = 25, | |
**kwargs) -> None: | |
super().__init__(**kwargs) | |
self.ignore_idx = ignore_idx | |
self.others_idx = others_idx | |
self.merge_bg_others = merge_bg_others | |
def add_meta(self, samples: List) -> List: | |
"""No meta info is required for the wildreceipt dataset.""" | |
return samples | |
def pack_instance(self, sample: str): | |
"""Pack line-json str of close set to line-json str of open set. | |
Args: | |
sample (str): The string to be deserialized to | |
the close set dictionary object. | |
split (str): The split of the instance. | |
""" | |
# Two labels at the same index of the following two lists | |
# make up a key-value pair. For example, in wildreceipt, | |
# closeset_key_inds[0] maps to "Store_name_key" | |
# and closeset_value_inds[0] maps to "Store_addr_value". | |
closeset_key_inds = list(range(2, self.others_idx, 2)) | |
closeset_value_inds = list(range(1, self.others_idx, 2)) | |
openset_node_label_mapping = { | |
'bg': 0, | |
'key': 1, | |
'value': 2, | |
'others': 3 | |
} | |
if self.merge_bg_others: | |
openset_node_label_mapping['others'] = openset_node_label_mapping[ | |
'bg'] | |
closeset_obj = json.loads(sample) | |
openset_obj = { | |
'file_name': | |
closeset_obj['file_name'].replace(self.data_root + '/', ''), | |
'height': | |
closeset_obj['height'], | |
'width': | |
closeset_obj['width'], | |
'annotations': [] | |
} | |
edge_idx = 1 | |
label_to_edge = {} | |
for anno in closeset_obj['annotations']: | |
label = anno['label'] | |
if label == self.ignore_idx: | |
anno['label'] = openset_node_label_mapping['bg'] | |
anno['edge'] = edge_idx | |
edge_idx += 1 | |
elif label == self.others_idx: | |
anno['label'] = openset_node_label_mapping['others'] | |
anno['edge'] = edge_idx | |
edge_idx += 1 | |
else: | |
edge = label_to_edge.get(label, None) | |
if edge is not None: | |
anno['edge'] = edge | |
if label in closeset_key_inds: | |
anno['label'] = openset_node_label_mapping['key'] | |
elif label in closeset_value_inds: | |
anno['label'] = openset_node_label_mapping['value'] | |
else: | |
tmp_key = 'key' | |
if label in closeset_key_inds: | |
label_with_same_edge = closeset_value_inds[ | |
closeset_key_inds.index(label)] | |
elif label in closeset_value_inds: | |
label_with_same_edge = closeset_key_inds[ | |
closeset_value_inds.index(label)] | |
tmp_key = 'value' | |
edge_counterpart = label_to_edge.get( | |
label_with_same_edge, None) | |
if edge_counterpart is not None: | |
anno['edge'] = edge_counterpart | |
else: | |
anno['edge'] = edge_idx | |
edge_idx += 1 | |
anno['label'] = openset_node_label_mapping[tmp_key] | |
label_to_edge[label] = anno['edge'] | |
openset_obj['annotations'] = closeset_obj['annotations'] | |
return json.dumps(openset_obj, ensure_ascii=False) | |