Spaces:
Sleeping
Sleeping
File size: 2,984 Bytes
9bf4bd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
from typing import Dict
from mmocr.registry import DATA_PARSERS
from mmocr.utils import list_from_file
from .base import BaseParser
@DATA_PARSERS.register_module()
class WildreceiptTextDetAnnParser(BaseParser):
"""Wildreceipt Text Detection Parser.
The original annotation format of this dataset is stored in txt files,
which is formed as the following json line format:
{"file_name": "xxx/xxx/xx/xxxx.jpeg",
"height": 1200,
"width": 1600,
"annotations": [
"box": [x1, y1, x2, y2, x3, y3, x4, y4],
"text": "xxx",
"label": 25,
]}
Args:
data_root (str): The root path of the dataset.
ignore (int): The label to be ignored. Defaults to 0.
nproc (int): The number of processes to parse the annotation. Defaults
to 1.
"""
def __init__(self, ignore: int = 0, **kwargs) -> None:
self.ignore = ignore
super().__init__(**kwargs)
def parse_files(self, img_dir: str, ann_path) -> Dict:
"""Convert single annotation."""
closeset_lines = list_from_file(ann_path)
samples = list()
for line in closeset_lines:
instances = list()
line = json.loads(line)
img_file = osp.join(img_dir, osp.basename(line['file_name']))
for anno in line['annotations']:
poly = anno['box']
text = anno['text']
label = anno['label']
instances.append(
dict(poly=poly, text=text, ignore=label == self.ignore))
samples.append((img_file, instances))
return samples
@DATA_PARSERS.register_module()
class WildreceiptKIEAnnParser(BaseParser):
"""Wildreceipt KIE Parser.
The original annotation format of this dataset is stored in txt files,
which is formed as the following json line format:
{"file_name": "xxx/xxx/xx/xxxx.jpeg",
"height": 1200,
"width": 1600,
"annotations": [
"box": [x1, y1, x2, y2, x3, y3, x4, y4],
"text": "xxx",
"label": 25,
]}
Args:
ignore (int): The label to be ignored. Defaults to 0.
nproc (int): The number of processes to parse the annotation. Defaults
to 1.
"""
def __init__(self, ignore: int = 0, **kwargs) -> None:
self.ignore = ignore
super().__init__(**kwargs)
def parse_files(self, img_dir: str, ann_path: str) -> Dict:
"""Convert single annotation."""
closeset_lines = list_from_file(ann_path)
samples = list()
for line in closeset_lines:
json_line = json.loads(line)
img_file = osp.join(img_dir, osp.basename(json_line['file_name']))
json_line['file_name'] = img_file
samples.append(json.dumps(json_line))
return samples
|