glenn-jocher commited on
Commit
1bc0ef8
·
unverified ·
2 Parent(s): 9485f0b 72a1746

Merge pull request #202 from ChristopherSTAN/master

Browse files
Files changed (2) hide show
  1. data/get_voc.sh +205 -0
  2. data/voc.yaml +18 -0
data/get_voc.sh ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ start=`date +%s`
2
+
3
+ # handle optional download dir
4
+ if [ -z "$1" ]
5
+ then
6
+ # navigate to ~/data
7
+ echo "navigating to ../data/ ..."
8
+ mkdir -p ../data
9
+ cd ../data/
10
+ else
11
+ # check if is valid directory
12
+ if [ ! -d $1 ]; then
13
+ echo $1 "is not a valid directory"
14
+ exit 0
15
+ fi
16
+ echo "navigating to" $1 "..."
17
+ cd $1
18
+ fi
19
+
20
+ echo "Downloading VOC2007 trainval ..."
21
+ # Download the data.
22
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
23
+ echo "Downloading VOC2007 test data ..."
24
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
25
+ echo "Done downloading."
26
+
27
+ # Extract data
28
+ echo "Extracting trainval ..."
29
+ tar -xvf VOCtrainval_06-Nov-2007.tar
30
+ echo "Extracting test ..."
31
+ tar -xvf VOCtest_06-Nov-2007.tar
32
+ echo "removing tars ..."
33
+ rm VOCtrainval_06-Nov-2007.tar
34
+ rm VOCtest_06-Nov-2007.tar
35
+
36
+ end=`date +%s`
37
+ runtime=$((end-start))
38
+
39
+ echo "Completed in" $runtime "seconds"
40
+
41
+ start=`date +%s`
42
+
43
+ # handle optional download dir
44
+ if [ -z "$1" ]
45
+ then
46
+ # navigate to ~/data
47
+ echo "navigating to ../data/ ..."
48
+ mkdir -p ../data
49
+ cd ../data/
50
+ else
51
+ # check if is valid directory
52
+ if [ ! -d $1 ]; then
53
+ echo $1 "is not a valid directory"
54
+ exit 0
55
+ fi
56
+ echo "navigating to" $1 "..."
57
+ cd $1
58
+ fi
59
+
60
+ echo "Downloading VOC2012 trainval ..."
61
+ # Download the data.
62
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
63
+ echo "Done downloading."
64
+
65
+
66
+ # Extract data
67
+ echo "Extracting trainval ..."
68
+ tar -xvf VOCtrainval_11-May-2012.tar
69
+ echo "removing tar ..."
70
+ rm VOCtrainval_11-May-2012.tar
71
+
72
+ end=`date +%s`
73
+ runtime=$((end-start))
74
+
75
+ echo "Completed in" $runtime "seconds"
76
+
77
+ cd ../data
78
+ echo "Spliting dataset..."
79
+ python3 - "$@" <<END
80
+ import xml.etree.ElementTree as ET
81
+ import pickle
82
+ import os
83
+ from os import listdir, getcwd
84
+ from os.path import join
85
+
86
+ sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
87
+
88
+ classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
89
+
90
+
91
+ def convert(size, box):
92
+ dw = 1./(size[0])
93
+ dh = 1./(size[1])
94
+ x = (box[0] + box[1])/2.0 - 1
95
+ y = (box[2] + box[3])/2.0 - 1
96
+ w = box[1] - box[0]
97
+ h = box[3] - box[2]
98
+ x = x*dw
99
+ w = w*dw
100
+ y = y*dh
101
+ h = h*dh
102
+ return (x,y,w,h)
103
+
104
+ def convert_annotation(year, image_id):
105
+ in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
106
+ out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
107
+ tree=ET.parse(in_file)
108
+ root = tree.getroot()
109
+ size = root.find('size')
110
+ w = int(size.find('width').text)
111
+ h = int(size.find('height').text)
112
+
113
+ for obj in root.iter('object'):
114
+ difficult = obj.find('difficult').text
115
+ cls = obj.find('name').text
116
+ if cls not in classes or int(difficult)==1:
117
+ continue
118
+ cls_id = classes.index(cls)
119
+ xmlbox = obj.find('bndbox')
120
+ b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
121
+ bb = convert((w,h), b)
122
+ out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
123
+
124
+ wd = getcwd()
125
+
126
+ for year, image_set in sets:
127
+ if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
128
+ os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
129
+ image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
130
+ list_file = open('%s_%s.txt'%(year, image_set), 'w')
131
+ for image_id in image_ids:
132
+ list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
133
+ convert_annotation(year, image_id)
134
+ list_file.close()
135
+
136
+ END
137
+
138
+ cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt
139
+ cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt
140
+
141
+ python3 - "$@" <<END
142
+
143
+ import shutil
144
+ import os
145
+ os.system('mkdir ../VOC/')
146
+ os.system('mkdir ../VOC/images')
147
+ os.system('mkdir ../VOC/images/train')
148
+ os.system('mkdir ../VOC/images/val')
149
+
150
+ os.system('mkdir ../VOC/labels')
151
+ os.system('mkdir ../VOC/labels/train')
152
+ os.system('mkdir ../VOC/labels/val')
153
+
154
+ import os
155
+ print(os.path.exists('../data/train.txt'))
156
+ f = open('../data/train.txt', 'r')
157
+ lines = f.readlines()
158
+
159
+ for line in lines:
160
+ #print(line.split('/')[-1][:-1])
161
+ line = "/".join(line.split('/')[2:])
162
+ #print(line)
163
+ if (os.path.exists("../" + line[:-1])):
164
+ os.system("cp ../"+ line[:-1] + " ../VOC/images/train")
165
+
166
+ print(os.path.exists('../data/train.txt'))
167
+ f = open('../data/train.txt', 'r')
168
+ lines = f.readlines()
169
+
170
+ for line in lines:
171
+ #print(line.split('/')[-1][:-1])
172
+ line = "/".join(line.split('/')[2:])
173
+ line = line.replace('JPEGImages', 'labels')
174
+ line = line.replace('jpg', 'txt')
175
+ #print(line)
176
+ if (os.path.exists("../" + line[:-1])):
177
+ os.system("cp ../"+ line[:-1] + " ../VOC/labels/train")
178
+
179
+ print(os.path.exists('../data/2007_test.txt'))
180
+ f = open('../data/2007_test.txt', 'r')
181
+ lines = f.readlines()
182
+
183
+ for line in lines:
184
+ #print(line.split('/')[-1][:-1])
185
+ line = "/".join(line.split('/')[2:])
186
+
187
+ if (os.path.exists("../" + line[:-1])):
188
+ os.system("cp ../"+ line[:-1] + " ../VOC/images/val")
189
+
190
+ print(os.path.exists('../data/2007_test.txt'))
191
+ f = open('../data/2007_test.txt', 'r')
192
+ lines = f.readlines()
193
+
194
+ for line in lines:
195
+ #print(line.split('/')[-1][:-1])
196
+ line = "/".join(line.split('/')[2:])
197
+ line = line.replace('JPEGImages', 'labels')
198
+ line = line.replace('jpg', 'txt')
199
+ #print(line)
200
+ if (os.path.exists("../" + line[:-1])):
201
+ os.system("cp ../"+ line[:-1] + " ../VOC/labels/val")
202
+
203
+ END
204
+
205
+ rm -rf ../data
data/voc.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
+ # Download command: bash yolov5/data/get_voc.sh
3
+ # Train command: python train.py --data voc.yaml
4
+ # Dataset should be placed next to yolov5 folder:
5
+ # /parent_folder
6
+ # /VOC
7
+ # /yolov5
8
+
9
+ # train and val datasets (image directory or *.txt file with image paths)
10
+ train: ../VOC/images/train/
11
+ val: ../VOC/images/val/
12
+
13
+ # number of classes
14
+ nc: 20
15
+
16
+ # class names
17
+ names: ['aeroplane', 'bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse',
18
+ 'motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']