Feng Wang commited on
Commit
462e84f
·
1 Parent(s): 1e0e26e

fix(data): revert mixup refactor (#170)

Browse files
Files changed (1) hide show
  1. yolox/data/data_augment.py +43 -11
yolox/data/data_augment.py CHANGED
@@ -4,6 +4,9 @@
4
  """
5
  Data augmentation functionality. Passed as callable transformations to
6
  Dataset classes.
 
 
 
7
  """
8
 
9
  import math
@@ -14,8 +17,6 @@ import numpy as np
14
 
15
  import torch
16
 
17
- from yolox.utils import xyxy2cxcywh
18
-
19
 
20
  def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
21
  r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
@@ -196,11 +197,20 @@ class TrainTransform:
196
  def __call__(self, image, targets, input_dim):
197
  boxes = targets[:, :4].copy()
198
  labels = targets[:, 4].copy()
 
 
 
 
 
 
 
 
 
199
  if len(boxes) == 0:
200
- targets = np.zeros((self.max_labels, 5), dtype=np.float32)
201
  image, r_o = preproc(image, input_dim, self.means, self.std)
202
  image = np.ascontiguousarray(image, dtype=np.float32)
203
- return torch.as_tensor(image), torch.as_tensor(targets)
204
 
205
  image_o = image.copy()
206
  targets_o = targets.copy()
@@ -208,36 +218,58 @@ class TrainTransform:
208
  boxes_o = targets_o[:, :4]
209
  labels_o = targets_o[:, 4]
210
  # bbox_o: [xyxy] to [c_x,c_y,w,h]
211
- boxes_o = xyxy2cxcywh(boxes_o)
 
 
 
 
 
 
 
212
 
213
  image_t = _distort(image)
214
  image_t, boxes = _mirror(image_t, boxes)
215
  height, width, _ = image_t.shape
216
  image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
217
- boxes = xyxy2cxcywh(boxes)
218
  # boxes [xyxy] 2 [cx,cy,w,h]
 
 
 
 
 
 
 
 
219
 
220
  boxes *= r_
221
 
222
  mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 8
223
  boxes_t = boxes[mask_b]
224
- labels_t = labels[mask_b]
 
 
225
 
226
  if len(boxes_t) == 0:
227
  image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
228
  boxes_o *= r_o
229
  boxes_t = boxes_o
230
  labels_t = labels_o
 
231
 
232
  labels_t = np.expand_dims(labels_t, 1)
233
- targets_t = np.hstack((labels_t, boxes_t))
234
- padded_labels = np.zeros((self.max_labels, 5))
 
 
 
 
235
  padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
236
  : self.max_labels
237
  ]
238
  padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
239
  image_t = np.ascontiguousarray(image_t, dtype=np.float32)
240
- return torch.as_tensor(image_t), torch.as_tensor(padded_labels)
241
 
242
 
243
  class ValTransform:
@@ -266,4 +298,4 @@ class ValTransform:
266
  # assume input is cv2 img for now
267
  def __call__(self, img, res, input_size):
268
  img, _ = preproc(img, input_size, self.means, self.std, self.swap)
269
- return torch.as_tensor(img), torch.zeros(1, 5)
 
4
  """
5
  Data augmentation functionality. Passed as callable transformations to
6
  Dataset classes.
7
+
8
+ The data augmentation procedures were interpreted from @weiliu89's SSD paper
9
+ http://arxiv.org/abs/1512.02325
10
  """
11
 
12
  import math
 
17
 
18
  import torch
19
 
 
 
20
 
21
  def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
22
  r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
 
197
  def __call__(self, image, targets, input_dim):
198
  boxes = targets[:, :4].copy()
199
  labels = targets[:, 4].copy()
200
+ if targets.shape[1] > 5:
201
+ mixup = True
202
+ ratios = targets[:, -1].copy()
203
+ ratios_o = targets[:, -1].copy()
204
+ else:
205
+ mixup = False
206
+ ratios = None
207
+ ratios_o = None
208
+ lshape = 6 if mixup else 5
209
  if len(boxes) == 0:
210
+ targets = np.zeros((self.max_labels, lshape), dtype=np.float32)
211
  image, r_o = preproc(image, input_dim, self.means, self.std)
212
  image = np.ascontiguousarray(image, dtype=np.float32)
213
+ return image, targets
214
 
215
  image_o = image.copy()
216
  targets_o = targets.copy()
 
218
  boxes_o = targets_o[:, :4]
219
  labels_o = targets_o[:, 4]
220
  # bbox_o: [xyxy] to [c_x,c_y,w,h]
221
+ b_x_o = (boxes_o[:, 2] + boxes_o[:, 0]) * 0.5
222
+ b_y_o = (boxes_o[:, 3] + boxes_o[:, 1]) * 0.5
223
+ b_w_o = (boxes_o[:, 2] - boxes_o[:, 0]) * 1.0
224
+ b_h_o = (boxes_o[:, 3] - boxes_o[:, 1]) * 1.0
225
+ boxes_o[:, 0] = b_x_o
226
+ boxes_o[:, 1] = b_y_o
227
+ boxes_o[:, 2] = b_w_o
228
+ boxes_o[:, 3] = b_h_o
229
 
230
  image_t = _distort(image)
231
  image_t, boxes = _mirror(image_t, boxes)
232
  height, width, _ = image_t.shape
233
  image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
234
+ boxes = boxes.copy()
235
  # boxes [xyxy] 2 [cx,cy,w,h]
236
+ b_x = (boxes[:, 2] + boxes[:, 0]) * 0.5
237
+ b_y = (boxes[:, 3] + boxes[:, 1]) * 0.5
238
+ b_w = (boxes[:, 2] - boxes[:, 0]) * 1.0
239
+ b_h = (boxes[:, 3] - boxes[:, 1]) * 1.0
240
+ boxes[:, 0] = b_x
241
+ boxes[:, 1] = b_y
242
+ boxes[:, 2] = b_w
243
+ boxes[:, 3] = b_h
244
 
245
  boxes *= r_
246
 
247
  mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 8
248
  boxes_t = boxes[mask_b]
249
+ labels_t = labels[mask_b].copy()
250
+ if mixup:
251
+ ratios_t = ratios[mask_b].copy()
252
 
253
  if len(boxes_t) == 0:
254
  image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
255
  boxes_o *= r_o
256
  boxes_t = boxes_o
257
  labels_t = labels_o
258
+ ratios_t = ratios_o
259
 
260
  labels_t = np.expand_dims(labels_t, 1)
261
+ if mixup:
262
+ ratios_t = np.expand_dims(ratios_t, 1)
263
+ targets_t = np.hstack((labels_t, boxes_t, ratios_t))
264
+ else:
265
+ targets_t = np.hstack((labels_t, boxes_t))
266
+ padded_labels = np.zeros((self.max_labels, lshape))
267
  padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
268
  : self.max_labels
269
  ]
270
  padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
271
  image_t = np.ascontiguousarray(image_t, dtype=np.float32)
272
+ return image_t, padded_labels
273
 
274
 
275
  class ValTransform:
 
298
  # assume input is cv2 img for now
299
  def __call__(self, img, res, input_size):
300
  img, _ = preproc(img, input_size, self.means, self.std, self.swap)
301
+ return torch.from_numpy(img), torch.zeros(1, 5)