Feng Wang
commited on
Commit
·
462e84f
1
Parent(s):
1e0e26e
fix(data): revert mixup refactor (#170)
Browse files- yolox/data/data_augment.py +43 -11
yolox/data/data_augment.py
CHANGED
@@ -4,6 +4,9 @@
|
|
4 |
"""
|
5 |
Data augmentation functionality. Passed as callable transformations to
|
6 |
Dataset classes.
|
|
|
|
|
|
|
7 |
"""
|
8 |
|
9 |
import math
|
@@ -14,8 +17,6 @@ import numpy as np
|
|
14 |
|
15 |
import torch
|
16 |
|
17 |
-
from yolox.utils import xyxy2cxcywh
|
18 |
-
|
19 |
|
20 |
def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
|
21 |
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
@@ -196,11 +197,20 @@ class TrainTransform:
|
|
196 |
def __call__(self, image, targets, input_dim):
|
197 |
boxes = targets[:, :4].copy()
|
198 |
labels = targets[:, 4].copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
if len(boxes) == 0:
|
200 |
-
targets = np.zeros((self.max_labels,
|
201 |
image, r_o = preproc(image, input_dim, self.means, self.std)
|
202 |
image = np.ascontiguousarray(image, dtype=np.float32)
|
203 |
-
return
|
204 |
|
205 |
image_o = image.copy()
|
206 |
targets_o = targets.copy()
|
@@ -208,36 +218,58 @@ class TrainTransform:
|
|
208 |
boxes_o = targets_o[:, :4]
|
209 |
labels_o = targets_o[:, 4]
|
210 |
# bbox_o: [xyxy] to [c_x,c_y,w,h]
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
image_t = _distort(image)
|
214 |
image_t, boxes = _mirror(image_t, boxes)
|
215 |
height, width, _ = image_t.shape
|
216 |
image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
|
217 |
-
boxes =
|
218 |
# boxes [xyxy] 2 [cx,cy,w,h]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
boxes *= r_
|
221 |
|
222 |
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 8
|
223 |
boxes_t = boxes[mask_b]
|
224 |
-
labels_t = labels[mask_b]
|
|
|
|
|
225 |
|
226 |
if len(boxes_t) == 0:
|
227 |
image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
|
228 |
boxes_o *= r_o
|
229 |
boxes_t = boxes_o
|
230 |
labels_t = labels_o
|
|
|
231 |
|
232 |
labels_t = np.expand_dims(labels_t, 1)
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
235 |
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
|
236 |
: self.max_labels
|
237 |
]
|
238 |
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
|
239 |
image_t = np.ascontiguousarray(image_t, dtype=np.float32)
|
240 |
-
return
|
241 |
|
242 |
|
243 |
class ValTransform:
|
@@ -266,4 +298,4 @@ class ValTransform:
|
|
266 |
# assume input is cv2 img for now
|
267 |
def __call__(self, img, res, input_size):
|
268 |
img, _ = preproc(img, input_size, self.means, self.std, self.swap)
|
269 |
-
return torch.
|
|
|
4 |
"""
|
5 |
Data augmentation functionality. Passed as callable transformations to
|
6 |
Dataset classes.
|
7 |
+
|
8 |
+
The data augmentation procedures were interpreted from @weiliu89's SSD paper
|
9 |
+
http://arxiv.org/abs/1512.02325
|
10 |
"""
|
11 |
|
12 |
import math
|
|
|
17 |
|
18 |
import torch
|
19 |
|
|
|
|
|
20 |
|
21 |
def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
|
22 |
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
|
|
197 |
def __call__(self, image, targets, input_dim):
|
198 |
boxes = targets[:, :4].copy()
|
199 |
labels = targets[:, 4].copy()
|
200 |
+
if targets.shape[1] > 5:
|
201 |
+
mixup = True
|
202 |
+
ratios = targets[:, -1].copy()
|
203 |
+
ratios_o = targets[:, -1].copy()
|
204 |
+
else:
|
205 |
+
mixup = False
|
206 |
+
ratios = None
|
207 |
+
ratios_o = None
|
208 |
+
lshape = 6 if mixup else 5
|
209 |
if len(boxes) == 0:
|
210 |
+
targets = np.zeros((self.max_labels, lshape), dtype=np.float32)
|
211 |
image, r_o = preproc(image, input_dim, self.means, self.std)
|
212 |
image = np.ascontiguousarray(image, dtype=np.float32)
|
213 |
+
return image, targets
|
214 |
|
215 |
image_o = image.copy()
|
216 |
targets_o = targets.copy()
|
|
|
218 |
boxes_o = targets_o[:, :4]
|
219 |
labels_o = targets_o[:, 4]
|
220 |
# bbox_o: [xyxy] to [c_x,c_y,w,h]
|
221 |
+
b_x_o = (boxes_o[:, 2] + boxes_o[:, 0]) * 0.5
|
222 |
+
b_y_o = (boxes_o[:, 3] + boxes_o[:, 1]) * 0.5
|
223 |
+
b_w_o = (boxes_o[:, 2] - boxes_o[:, 0]) * 1.0
|
224 |
+
b_h_o = (boxes_o[:, 3] - boxes_o[:, 1]) * 1.0
|
225 |
+
boxes_o[:, 0] = b_x_o
|
226 |
+
boxes_o[:, 1] = b_y_o
|
227 |
+
boxes_o[:, 2] = b_w_o
|
228 |
+
boxes_o[:, 3] = b_h_o
|
229 |
|
230 |
image_t = _distort(image)
|
231 |
image_t, boxes = _mirror(image_t, boxes)
|
232 |
height, width, _ = image_t.shape
|
233 |
image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
|
234 |
+
boxes = boxes.copy()
|
235 |
# boxes [xyxy] 2 [cx,cy,w,h]
|
236 |
+
b_x = (boxes[:, 2] + boxes[:, 0]) * 0.5
|
237 |
+
b_y = (boxes[:, 3] + boxes[:, 1]) * 0.5
|
238 |
+
b_w = (boxes[:, 2] - boxes[:, 0]) * 1.0
|
239 |
+
b_h = (boxes[:, 3] - boxes[:, 1]) * 1.0
|
240 |
+
boxes[:, 0] = b_x
|
241 |
+
boxes[:, 1] = b_y
|
242 |
+
boxes[:, 2] = b_w
|
243 |
+
boxes[:, 3] = b_h
|
244 |
|
245 |
boxes *= r_
|
246 |
|
247 |
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 8
|
248 |
boxes_t = boxes[mask_b]
|
249 |
+
labels_t = labels[mask_b].copy()
|
250 |
+
if mixup:
|
251 |
+
ratios_t = ratios[mask_b].copy()
|
252 |
|
253 |
if len(boxes_t) == 0:
|
254 |
image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
|
255 |
boxes_o *= r_o
|
256 |
boxes_t = boxes_o
|
257 |
labels_t = labels_o
|
258 |
+
ratios_t = ratios_o
|
259 |
|
260 |
labels_t = np.expand_dims(labels_t, 1)
|
261 |
+
if mixup:
|
262 |
+
ratios_t = np.expand_dims(ratios_t, 1)
|
263 |
+
targets_t = np.hstack((labels_t, boxes_t, ratios_t))
|
264 |
+
else:
|
265 |
+
targets_t = np.hstack((labels_t, boxes_t))
|
266 |
+
padded_labels = np.zeros((self.max_labels, lshape))
|
267 |
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
|
268 |
: self.max_labels
|
269 |
]
|
270 |
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
|
271 |
image_t = np.ascontiguousarray(image_t, dtype=np.float32)
|
272 |
+
return image_t, padded_labels
|
273 |
|
274 |
|
275 |
class ValTransform:
|
|
|
298 |
# assume input is cv2 img for now
|
299 |
def __call__(self, img, res, input_size):
|
300 |
img, _ = preproc(img, input_size, self.means, self.std, self.swap)
|
301 |
+
return torch.from_numpy(img), torch.zeros(1, 5)
|