Spaces:
Sleeping
Sleeping
# Copyright (c) OpenMMLab. All rights reserved. | |
import json | |
import os | |
import os.path as osp | |
import shutil | |
from typing import List, Tuple | |
from mmocr.registry import DATA_GATHERERS | |
from .base import BaseGatherer | |
class NAFGatherer(BaseGatherer): | |
"""Gather the dataset file from NAF dataset. Specifically for the case that | |
there is a split file that contains the names of different splits. For | |
example, | |
img_001.jpg train: img_001.jpg | |
img_002.jpg ---> split_file ---> test: img_002.jpg | |
img_003.jpg val: img_003.jpg | |
Args: | |
split_file (str, optional): The name of the split file. Defaults to | |
"data_split.json". | |
temp_dir (str, optional): The directory of the temporary images. | |
Defaults to "temp_images". | |
""" | |
def __init__(self, | |
split_file='data_split.json', | |
temp_dir: str = 'temp_images', | |
**kwargs) -> None: | |
super().__init__(**kwargs) | |
self.temp_dir = temp_dir | |
self.split_file = split_file | |
def __call__(self) -> Tuple[List[str], List[str]]: | |
""" | |
Returns: | |
tuple(list[str], list[str]): The list of image paths and the list | |
of annotation paths. | |
""" | |
split_file = osp.join(self.data_root, self.split_file) | |
with open(split_file, 'r') as f: | |
split_data = json.load(f) | |
img_list = list() | |
ann_list = list() | |
# Rename the key | |
split_data['val'] = split_data.pop('valid') | |
if not osp.exists(self.img_dir): | |
os.makedirs(self.img_dir) | |
current_split_data = split_data[self.split] | |
for groups in current_split_data: | |
for img_name in current_split_data[groups]: | |
src_img = osp.join(self.data_root, self.temp_dir, img_name) | |
dst_img = osp.join(self.img_dir, img_name) | |
if not osp.exists(src_img): | |
Warning(f'{src_img} does not exist!') | |
continue | |
# move the image to the new path | |
shutil.move(src_img, dst_img) | |
ann = osp.join(self.ann_dir, img_name.replace('.jpg', '.json')) | |
img_list.append(dst_img) | |
ann_list.append(ann) | |
return img_list, ann_list | |