LGD-Ti-fighting Feng Wang commited on
Commit
13be1ce
·
1 Parent(s): 49fa100

remove yolov5 codes with new hsv aug and new random affine transform (#781)

Browse files

remove yolov5 codes to make license legal

Co-authored-by: Feng Wang <[email protected]>

yolox/data/data_augment.py CHANGED
@@ -18,124 +18,118 @@ import numpy as np
18
  from yolox.utils import xyxy2cxcywh
19
 
20
 
21
- def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
22
- r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
23
- hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
 
24
  dtype = img.dtype # uint8
25
 
26
- x = np.arange(0, 256, dtype=np.int16)
27
- lut_hue = ((x * r[0]) % 180).astype(dtype)
28
- lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
29
- lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
30
 
31
- img_hsv = cv2.merge(
32
- (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
33
- ).astype(dtype)
34
  cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
35
 
36
 
37
- def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2):
38
- # box1(4,n), box2(4,n)
39
- # Compute candidate boxes which include follwing 5 things:
40
- # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
41
- w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
42
- w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
43
- ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
44
- return (
45
- (w2 > wh_thr)
46
- & (h2 > wh_thr)
47
- & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr)
48
- & (ar < ar_thr)
49
- ) # candidates
50
 
51
 
52
- def random_perspective(
53
- img,
54
- targets=(),
55
  degrees=10,
56
  translate=0.1,
57
- scale=0.1,
58
  shear=10,
59
- perspective=0.0,
60
- border=(0, 0),
61
  ):
62
- # targets = [cls, xyxy]
63
- height = img.shape[0] + border[0] * 2 # shape(h,w,c)
64
- width = img.shape[1] + border[1] * 2
65
-
66
- # Center
67
- C = np.eye(3)
68
- C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
69
- C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
70
 
71
  # Rotation and Scale
72
- R = np.eye(3)
73
- a = random.uniform(-degrees, degrees)
74
- # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
75
- s = random.uniform(scale[0], scale[1])
76
- # s = 2 ** random.uniform(-scale, scale)
77
- R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
 
78
 
 
79
  # Shear
80
- S = np.eye(3)
81
- S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
82
- S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
 
 
83
 
84
  # Translation
85
- T = np.eye(3)
86
- T[0, 2] = (
87
- random.uniform(0.5 - translate, 0.5 + translate) * width
88
- ) # x translation (pixels)
89
- T[1, 2] = (
90
- random.uniform(0.5 - translate, 0.5 + translate) * height
91
- ) # y translation (pixels)
92
-
93
- # Combined rotation matrix
94
- M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT
95
-
96
- ###########################
97
- # For Aug out of Mosaic
98
- # s = 1.
99
- # M = np.eye(3)
100
- ###########################
101
-
102
- if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
103
- if perspective:
104
- img = cv2.warpPerspective(
105
- img, M, dsize=(width, height), borderValue=(114, 114, 114)
106
- )
107
- else: # affine
108
- img = cv2.warpAffine(
109
- img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
110
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # Transform label coordinates
113
- n = len(targets)
114
- if n:
115
- # warp points
116
- xy = np.ones((n * 4, 3))
117
- xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
118
- n * 4, 2
119
- ) # x1y1, x2y2, x1y2, x2y1
120
- xy = xy @ M.T # transform
121
- if perspective:
122
- xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
123
- else: # affine
124
- xy = xy[:, :2].reshape(n, 8)
125
-
126
- # create new boxes
127
- x = xy[:, [0, 2, 4, 6]]
128
- y = xy[:, [1, 3, 5, 7]]
129
- xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
130
-
131
- # clip boxes
132
- xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
133
- xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
134
-
135
- # filter candidates
136
- i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
137
- targets = targets[i]
138
- targets[:, :4] = xy[i]
139
 
140
  return img, targets
141
 
 
18
  from yolox.utils import xyxy2cxcywh
19
 
20
 
21
+ def augment_hsv(img, hgain=5, sgain=30, vgain=30):
22
+ hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains
23
+ hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v
24
+ img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
25
  dtype = img.dtype # uint8
26
 
27
+ hsv_augs = hsv_augs.astype(dtype)
28
+ img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
29
+ img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
30
+ img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
31
 
 
 
 
32
  cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
33
 
34
 
35
+ def get_aug_params(value, center=0):
36
+ if isinstance(value, float):
37
+ return random.uniform(center - value, center + value)
38
+ elif len(value) == 2:
39
+ return random.uniform(value[0], value[1])
40
+ else:
41
+ raise ValueError(
42
+ "Affine params should be either a sequence containing two values\
43
+ or single float values. Got {}".format(
44
+ value
45
+ )
46
+ )
 
47
 
48
 
49
+ def get_affine_matrix(
50
+ target_size,
 
51
  degrees=10,
52
  translate=0.1,
53
+ scales=0.1,
54
  shear=10,
 
 
55
  ):
56
+ twidth, theight = target_size
 
 
 
 
 
 
 
57
 
58
  # Rotation and Scale
59
+ angle = get_aug_params(degrees)
60
+ scale = get_aug_params(scales, center=1.0)
61
+
62
+ if scale <= 0.0:
63
+ raise ValueError("Argument scale should be positive")
64
+
65
+ R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
66
 
67
+ M = np.ones([2, 3])
68
  # Shear
69
+ shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
70
+ shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
71
+
72
+ M[0] = R[0] + shear_y * R[1]
73
+ M[1] = R[1] + shear_x * R[0]
74
 
75
  # Translation
76
+ translation_x = get_aug_params(translate) * twidth # x translation (pixels)
77
+ translation_y = get_aug_params(translate) * theight # y translation (pixels)
78
+
79
+ M[0, 2] = translation_x
80
+ M[1, 2] = translation_y
81
+
82
+ return M, scale
83
+
84
+
85
+ def apply_affine_to_bboxes(targets, target_size, M, scale):
86
+ num_gts = len(targets)
87
+
88
+ # warp corner points
89
+ twidth, theight = target_size
90
+ corner_points = np.ones((4 * num_gts, 3))
91
+ corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
92
+ 4 * num_gts, 2
93
+ ) # x1y1, x2y2, x1y2, x2y1
94
+ corner_points = corner_points @ M.T # apply affine transform
95
+ corner_points = corner_points.reshape(num_gts, 8)
96
+
97
+ # create new boxes
98
+ corner_xs = corner_points[:, 0::2]
99
+ corner_ys = corner_points[:, 1::2]
100
+ new_bboxes = (
101
+ np.concatenate(
102
+ (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
103
+ )
104
+ .reshape(4, num_gts)
105
+ .T
106
+ )
107
+
108
+ # clip boxes
109
+ new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
110
+ new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
111
+
112
+ targets[:, :4] = new_bboxes
113
+
114
+ return targets
115
+
116
+
117
+ def random_affine(
118
+ img,
119
+ targets=(),
120
+ target_size=(640, 640),
121
+ degrees=10,
122
+ translate=0.1,
123
+ scales=0.1,
124
+ shear=10,
125
+ ):
126
+ M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
127
+
128
+ img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
129
 
130
  # Transform label coordinates
131
+ if len(targets) > 0:
132
+ targets = apply_affine_to_bboxes(targets, target_size, M, scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  return img, targets
135
 
yolox/data/datasets/mosaicdetection.py CHANGED
@@ -9,7 +9,7 @@ import numpy as np
9
 
10
  from yolox.utils import adjust_box_anns, get_local_rank
11
 
12
- from ..data_augment import box_candidates, random_perspective
13
  from .datasets_wrapper import Dataset
14
 
15
 
@@ -40,8 +40,8 @@ class MosaicDetection(Dataset):
40
  def __init__(
41
  self, dataset, img_size, mosaic=True, preproc=None,
42
  degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
43
- mixup_scale=(0.5, 1.5), shear=2.0, perspective=0.0,
44
- enable_mixup=True, mosaic_prob=1.0, mixup_prob=1.0, *args
45
  ):
46
  """
47
 
@@ -55,7 +55,6 @@ class MosaicDetection(Dataset):
55
  mosaic_scale (tuple):
56
  mixup_scale (tuple):
57
  shear (float):
58
- perspective (float):
59
  enable_mixup (bool):
60
  *args(tuple) : Additional arguments for mixup random sampler.
61
  """
@@ -66,7 +65,6 @@ class MosaicDetection(Dataset):
66
  self.translate = translate
67
  self.scale = mosaic_scale
68
  self.shear = shear
69
- self.perspective = perspective
70
  self.mixup_scale = mixup_scale
71
  self.enable_mosaic = mosaic
72
  self.enable_mixup = enable_mixup
@@ -127,16 +125,15 @@ class MosaicDetection(Dataset):
127
  np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
128
  np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
129
 
130
- mosaic_img, mosaic_labels = random_perspective(
131
  mosaic_img,
132
  mosaic_labels,
 
133
  degrees=self.degrees,
134
  translate=self.translate,
135
- scale=self.scale,
136
  shear=self.shear,
137
- perspective=self.perspective,
138
- border=[-input_h // 2, -input_w // 2],
139
- ) # border to remove
140
 
141
  # -----------------------------------------------------------------
142
  # CopyPaste: https://arxiv.org/abs/2012.07177
@@ -226,14 +223,12 @@ class MosaicDetection(Dataset):
226
  cp_bboxes_transformed_np[:, 1::2] = np.clip(
227
  cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
228
  )
229
- keep_list = box_candidates(cp_bboxes_origin_np.T, cp_bboxes_transformed_np.T, 5)
230
-
231
- if keep_list.sum() >= 1.0:
232
- cls_labels = cp_labels[keep_list, 4:5].copy()
233
- box_labels = cp_bboxes_transformed_np[keep_list]
234
- labels = np.hstack((box_labels, cls_labels))
235
- origin_labels = np.vstack((origin_labels, labels))
236
- origin_img = origin_img.astype(np.float32)
237
- origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
238
 
239
  return origin_img.astype(np.uint8), origin_labels
 
9
 
10
  from yolox.utils import adjust_box_anns, get_local_rank
11
 
12
+ from ..data_augment import random_affine
13
  from .datasets_wrapper import Dataset
14
 
15
 
 
40
  def __init__(
41
  self, dataset, img_size, mosaic=True, preproc=None,
42
  degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
43
+ mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
44
+ mosaic_prob=1.0, mixup_prob=1.0, *args
45
  ):
46
  """
47
 
 
55
  mosaic_scale (tuple):
56
  mixup_scale (tuple):
57
  shear (float):
 
58
  enable_mixup (bool):
59
  *args(tuple) : Additional arguments for mixup random sampler.
60
  """
 
65
  self.translate = translate
66
  self.scale = mosaic_scale
67
  self.shear = shear
 
68
  self.mixup_scale = mixup_scale
69
  self.enable_mosaic = mosaic
70
  self.enable_mixup = enable_mixup
 
125
  np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
126
  np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
127
 
128
+ mosaic_img, mosaic_labels = random_affine(
129
  mosaic_img,
130
  mosaic_labels,
131
+ target_size=(input_w, input_h),
132
  degrees=self.degrees,
133
  translate=self.translate,
134
+ scales=self.scale,
135
  shear=self.shear,
136
+ )
 
 
137
 
138
  # -----------------------------------------------------------------
139
  # CopyPaste: https://arxiv.org/abs/2012.07177
 
223
  cp_bboxes_transformed_np[:, 1::2] = np.clip(
224
  cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
225
  )
226
+
227
+ cls_labels = cp_labels[:, 4:5].copy()
228
+ box_labels = cp_bboxes_transformed_np
229
+ labels = np.hstack((box_labels, cls_labels))
230
+ origin_labels = np.vstack((origin_labels, labels))
231
+ origin_img = origin_img.astype(np.float32)
232
+ origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
 
 
233
 
234
  return origin_img.astype(np.uint8), origin_labels