Spaces:
Sleeping
Sleeping
# Copyright (c) OpenMMLab. All rights reserved. | |
import os.path as osp | |
import re | |
from typing import List, Optional, Tuple | |
from mmocr.registry import DATA_GATHERERS | |
from mmocr.utils import list_files | |
from .base import BaseGatherer | |
class PairGatherer(BaseGatherer): | |
"""Gather the dataset files. Specifically for the paired annotations. That | |
is to say, each image has a corresponding annotation file. For example, | |
img_1.jpg <---> gt_img_1.txt | |
img_2.jpg <---> gt_img_2.txt | |
img_3.jpg <---> gt_img_3.txt | |
Args: | |
img_suffixes (List[str]): File suffixes that used for searching. | |
rule (Sequence): The rule for pairing the files. The first element is | |
the matching pattern for the file, and the second element is the | |
replacement pattern, which should be a regular expression. For | |
example, to map the image name img_1.jpg to the annotation name | |
gt_img_1.txt, the rule is | |
[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt'] # noqa: W605 E501 | |
Note: PairGatherer assumes that each split annotation file is in the | |
correspond split directory. For example, all the train annotation files are | |
in {ann_dir}/train. | |
""" | |
def __init__(self, | |
img_suffixes: Optional[List[str]] = None, | |
rule: Optional[List[str]] = None, | |
**kwargs) -> None: | |
super().__init__(**kwargs) | |
self.rule = rule | |
self.img_suffixes = img_suffixes | |
# ann_dir = {ann_root}/{ann_dir}/{split} | |
self.ann_dir = osp.join(self.ann_dir, self.split) | |
def __call__(self) -> Tuple[List[str], List[str]]: | |
"""tuple(list, list): The list of image paths and the list of | |
annotation paths.""" | |
img_list = list() | |
ann_list = list() | |
for img_path in list_files(self.img_dir, self.img_suffixes): | |
if not re.match(self.rule[0], osp.basename(img_path)): | |
continue | |
ann_name = re.sub(self.rule[0], self.rule[1], | |
osp.basename(img_path)) | |
ann_path = osp.join(self.ann_dir, ann_name) | |
img_list.append(img_path) | |
ann_list.append(ann_path) | |
return img_list, ann_list | |