Mountchicken's picture
Upload 704 files
9bf4bd7
raw
history blame
2.63 kB
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import shutil
import xml.etree.ElementTree as ET
import zipfile
from xml.etree.ElementTree import ParseError
def extract(root_path):
idx = 0
for language in ['English', 'Korean', 'Mixed']:
for camera in ['Digital_Camera', 'Mobile_Phone']:
crt_path = osp.join(root_path, 'KAIST', language, camera)
zips = os.listdir(crt_path)
for zip in zips:
extracted_path = osp.join(root_path, 'tmp', zip)
extract_zipfile(osp.join(crt_path, zip), extracted_path)
for file in os.listdir(extracted_path):
if file.endswith('xml'):
src_ann = os.path.join(extracted_path, file)
# Filtering broken annotations
try:
ET.parse(src_ann)
except ParseError:
continue
src_img = None
img_names = [
file.replace('xml', suffix)
for suffix in ['jpg', 'JPG']
]
for im in img_names:
img_path = osp.join(extracted_path, im)
if osp.exists(img_path):
src_img = img_path
if src_img:
shutil.move(
src_ann,
osp.join(root_path, 'annotations',
str(idx).zfill(5) + '.xml'))
shutil.move(
src_img,
osp.join(root_path, 'imgs',
str(idx).zfill(5) + '.jpg'))
idx += 1
def extract_zipfile(zip_path, dst_dir, delete=True):
files = zipfile.ZipFile(zip_path)
for file in files.namelist():
files.extract(file, dst_dir)
if delete:
os.remove(zip_path)
def parse_args():
parser = argparse.ArgumentParser(description='Extract KAIST zips')
parser.add_argument('root_path', help='Root path of KAIST')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
assert osp.exists(root_path)
extract(root_path)
shutil.rmtree(osp.join(args.root_path, 'tmp'))
shutil.rmtree(osp.join(args.root_path, 'KAIST'))
if __name__ == '__main__':
main()