File size: 2,626 Bytes
9bf4bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import shutil
import xml.etree.ElementTree as ET
import zipfile
from xml.etree.ElementTree import ParseError


def extract(root_path):
    idx = 0
    for language in ['English', 'Korean', 'Mixed']:
        for camera in ['Digital_Camera', 'Mobile_Phone']:
            crt_path = osp.join(root_path, 'KAIST', language, camera)
            zips = os.listdir(crt_path)
            for zip in zips:
                extracted_path = osp.join(root_path, 'tmp', zip)
                extract_zipfile(osp.join(crt_path, zip), extracted_path)
                for file in os.listdir(extracted_path):
                    if file.endswith('xml'):
                        src_ann = os.path.join(extracted_path, file)
                        # Filtering broken annotations
                        try:
                            ET.parse(src_ann)
                        except ParseError:
                            continue
                        src_img = None
                        img_names = [
                            file.replace('xml', suffix)
                            for suffix in ['jpg', 'JPG']
                        ]
                        for im in img_names:
                            img_path = osp.join(extracted_path, im)
                            if osp.exists(img_path):
                                src_img = img_path
                        if src_img:
                            shutil.move(
                                src_ann,
                                osp.join(root_path, 'annotations',
                                         str(idx).zfill(5) + '.xml'))
                            shutil.move(
                                src_img,
                                osp.join(root_path, 'imgs',
                                         str(idx).zfill(5) + '.jpg'))
                            idx += 1


def extract_zipfile(zip_path, dst_dir, delete=True):

    files = zipfile.ZipFile(zip_path)
    for file in files.namelist():
        files.extract(file, dst_dir)
    if delete:
        os.remove(zip_path)


def parse_args():
    parser = argparse.ArgumentParser(description='Extract KAIST zips')
    parser.add_argument('root_path', help='Root path of KAIST')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    root_path = args.root_path
    assert osp.exists(root_path)
    extract(root_path)
    shutil.rmtree(osp.join(args.root_path, 'tmp'))
    shutil.rmtree(osp.join(args.root_path, 'KAIST'))


if __name__ == '__main__':
    main()