Spaces:
Sleeping
Sleeping
# Copyright (c) OpenMMLab. All rights reserved. | |
import argparse | |
import os | |
import os.path as osp | |
import shutil | |
import xml.etree.ElementTree as ET | |
import zipfile | |
from xml.etree.ElementTree import ParseError | |
def extract(root_path): | |
idx = 0 | |
for language in ['English', 'Korean', 'Mixed']: | |
for camera in ['Digital_Camera', 'Mobile_Phone']: | |
crt_path = osp.join(root_path, 'KAIST', language, camera) | |
zips = os.listdir(crt_path) | |
for zip in zips: | |
extracted_path = osp.join(root_path, 'tmp', zip) | |
extract_zipfile(osp.join(crt_path, zip), extracted_path) | |
for file in os.listdir(extracted_path): | |
if file.endswith('xml'): | |
src_ann = os.path.join(extracted_path, file) | |
# Filtering broken annotations | |
try: | |
ET.parse(src_ann) | |
except ParseError: | |
continue | |
src_img = None | |
img_names = [ | |
file.replace('xml', suffix) | |
for suffix in ['jpg', 'JPG'] | |
] | |
for im in img_names: | |
img_path = osp.join(extracted_path, im) | |
if osp.exists(img_path): | |
src_img = img_path | |
if src_img: | |
shutil.move( | |
src_ann, | |
osp.join(root_path, 'annotations', | |
str(idx).zfill(5) + '.xml')) | |
shutil.move( | |
src_img, | |
osp.join(root_path, 'imgs', | |
str(idx).zfill(5) + '.jpg')) | |
idx += 1 | |
def extract_zipfile(zip_path, dst_dir, delete=True): | |
files = zipfile.ZipFile(zip_path) | |
for file in files.namelist(): | |
files.extract(file, dst_dir) | |
if delete: | |
os.remove(zip_path) | |
def parse_args(): | |
parser = argparse.ArgumentParser(description='Extract KAIST zips') | |
parser.add_argument('root_path', help='Root path of KAIST') | |
args = parser.parse_args() | |
return args | |
def main(): | |
args = parse_args() | |
root_path = args.root_path | |
assert osp.exists(root_path) | |
extract(root_path) | |
shutil.rmtree(osp.join(args.root_path, 'tmp')) | |
shutil.rmtree(osp.join(args.root_path, 'KAIST')) | |
if __name__ == '__main__': | |
main() | |