Spaces:
Sleeping
Sleeping
# Copyright (c) OpenMMLab. All rights reserved. | |
import json | |
import os.path as osp | |
from typing import Dict | |
from mmocr.registry import DATA_PARSERS | |
from mmocr.utils import list_from_file | |
from .base import BaseParser | |
class WildreceiptTextDetAnnParser(BaseParser): | |
"""Wildreceipt Text Detection Parser. | |
The original annotation format of this dataset is stored in txt files, | |
which is formed as the following json line format: | |
{"file_name": "xxx/xxx/xx/xxxx.jpeg", | |
"height": 1200, | |
"width": 1600, | |
"annotations": [ | |
"box": [x1, y1, x2, y2, x3, y3, x4, y4], | |
"text": "xxx", | |
"label": 25, | |
]} | |
Args: | |
data_root (str): The root path of the dataset. | |
ignore (int): The label to be ignored. Defaults to 0. | |
nproc (int): The number of processes to parse the annotation. Defaults | |
to 1. | |
""" | |
def __init__(self, ignore: int = 0, **kwargs) -> None: | |
self.ignore = ignore | |
super().__init__(**kwargs) | |
def parse_files(self, img_dir: str, ann_path) -> Dict: | |
"""Convert single annotation.""" | |
closeset_lines = list_from_file(ann_path) | |
samples = list() | |
for line in closeset_lines: | |
instances = list() | |
line = json.loads(line) | |
img_file = osp.join(img_dir, osp.basename(line['file_name'])) | |
for anno in line['annotations']: | |
poly = anno['box'] | |
text = anno['text'] | |
label = anno['label'] | |
instances.append( | |
dict(poly=poly, text=text, ignore=label == self.ignore)) | |
samples.append((img_file, instances)) | |
return samples | |
class WildreceiptKIEAnnParser(BaseParser): | |
"""Wildreceipt KIE Parser. | |
The original annotation format of this dataset is stored in txt files, | |
which is formed as the following json line format: | |
{"file_name": "xxx/xxx/xx/xxxx.jpeg", | |
"height": 1200, | |
"width": 1600, | |
"annotations": [ | |
"box": [x1, y1, x2, y2, x3, y3, x4, y4], | |
"text": "xxx", | |
"label": 25, | |
]} | |
Args: | |
ignore (int): The label to be ignored. Defaults to 0. | |
nproc (int): The number of processes to parse the annotation. Defaults | |
to 1. | |
""" | |
def __init__(self, ignore: int = 0, **kwargs) -> None: | |
self.ignore = ignore | |
super().__init__(**kwargs) | |
def parse_files(self, img_dir: str, ann_path: str) -> Dict: | |
"""Convert single annotation.""" | |
closeset_lines = list_from_file(ann_path) | |
samples = list() | |
for line in closeset_lines: | |
json_line = json.loads(line) | |
img_file = osp.join(img_dir, osp.basename(json_line['file_name'])) | |
json_line['file_name'] = img_file | |
samples.append(json.dumps(json_line)) | |
return samples | |