Spaces:

tidalove
/

yolox

Sleeping

App Files Files Community

LGD-Ti-fighting Feng Wang commited on Oct 14, 2021

Commit

13be1ce

1 Parent(s): 49fa100

remove yolov5 codes with new hsv aug and new random affine transform (#781)

Browse files

remove yolov5 codes to make license legal

Co-authored-by: Feng Wang <[email protected]>

Files changed (2) hide show

yolox/data/data_augment.py +92 -98
yolox/data/datasets/mosaicdetection.py +14 -19

yolox/data/data_augment.py CHANGED Viewed

@@ -18,124 +18,118 @@ import numpy as np
 from yolox.utils import xyxy2cxcywh
-def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
-    r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
-    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
     dtype = img.dtype  # uint8
-    x = np.arange(0, 256, dtype=np.int16)
-    lut_hue = ((x * r[0]) % 180).astype(dtype)
-    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
-    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-    img_hsv = cv2.merge(
-        (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
-    ).astype(dtype)
     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
-def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2):
-    # box1(4,n), box2(4,n)
-    # Compute candidate boxes which include follwing 5 things:
-    # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
-    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
-    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
-    ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16))  # aspect ratio
-    return (
-        (w2 > wh_thr)
-        & (h2 > wh_thr)
-        & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr)
-        & (ar < ar_thr)
-    )  # candidates
-def random_perspective(
-    img,
-    targets=(),
     degrees=10,
     translate=0.1,
-    scale=0.1,
     shear=10,
-    perspective=0.0,
-    border=(0, 0),
 ):
-    # targets = [cls, xyxy]
-    height = img.shape[0] + border[0] * 2  # shape(h,w,c)
-    width = img.shape[1] + border[1] * 2
-    # Center
-    C = np.eye(3)
-    C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
-    C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
     # Rotation and Scale
-    R = np.eye(3)
-    a = random.uniform(-degrees, degrees)
-    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
-    s = random.uniform(scale[0], scale[1])
-    # s = 2 ** random.uniform(-scale, scale)
-    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
     # Shear
-    S = np.eye(3)
-    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
     # Translation
-    T = np.eye(3)
-    T[0, 2] = (
-        random.uniform(0.5 - translate, 0.5 + translate) * width
-    )  # x translation (pixels)
-    T[1, 2] = (
-        random.uniform(0.5 - translate, 0.5 + translate) * height
-    )  # y translation (pixels)
-    # Combined rotation matrix
-    M = T @ S @ R @ C  # order of operations (right to left) is IMPORTANT
-    ###########################
-    # For Aug out of Mosaic
-    # s = 1.
-    # M = np.eye(3)
-    ###########################
-    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
-        if perspective:
-            img = cv2.warpPerspective(
-                img, M, dsize=(width, height), borderValue=(114, 114, 114)
-            )
-        else:  # affine
-            img = cv2.warpAffine(
-                img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
-            )
     # Transform label coordinates
-    n = len(targets)
-    if n:
-        # warp points
-        xy = np.ones((n * 4, 3))
-        xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
-            n * 4, 2
-        )  # x1y1, x2y2, x1y2, x2y1
-        xy = xy @ M.T  # transform
-        if perspective:
-            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
-        else:  # affine
-            xy = xy[:, :2].reshape(n, 8)
-        # create new boxes
-        x = xy[:, [0, 2, 4, 6]]
-        y = xy[:, [1, 3, 5, 7]]
-        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-        # clip boxes
-        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
-        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
-        # filter candidates
-        i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
-        targets = targets[i]
-        targets[:, :4] = xy[i]
     return img, targets

 from yolox.utils import xyxy2cxcywh
+def augment_hsv(img, hgain=5, sgain=30, vgain=30):
+    hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain]  # random gains
+    hsv_augs *= np.random.randint(0, 2, 3)  # random selection of h, s, v
+    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
     dtype = img.dtype  # uint8
+    hsv_augs = hsv_augs.astype(dtype)
+    img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
+    img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
+    img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
+def get_aug_params(value, center=0):
+    if isinstance(value, float):
+        return random.uniform(center - value, center + value)
+    elif len(value) == 2:
+        return random.uniform(value[0], value[1])
+    else:
+        raise ValueError(
+            "Affine params should be either a sequence containing two values\
+                          or single float values. Got {}".format(
+                value
+            )
+        )
+def get_affine_matrix(
+    target_size,
     degrees=10,
     translate=0.1,
+    scales=0.1,
     shear=10,
 ):
+    twidth, theight = target_size
     # Rotation and Scale
+    angle = get_aug_params(degrees)
+    scale = get_aug_params(scales, center=1.0)
+    if scale <= 0.0:
+        raise ValueError("Argument scale should be positive")
+    R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
+    M = np.ones([2, 3])
     # Shear
+    shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
+    shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
+    M[0] = R[0] + shear_y * R[1]
+    M[1] = R[1] + shear_x * R[0]
     # Translation
+    translation_x = get_aug_params(translate) * twidth  # x translation (pixels)
+    translation_y = get_aug_params(translate) * theight  # y translation (pixels)
+    M[0, 2] = translation_x
+    M[1, 2] = translation_y
+    return M, scale
+def apply_affine_to_bboxes(targets, target_size, M, scale):
+    num_gts = len(targets)
+    # warp corner points
+    twidth, theight = target_size
+    corner_points = np.ones((4 * num_gts, 3))
+    corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+        4 * num_gts, 2
+    )  # x1y1, x2y2, x1y2, x2y1
+    corner_points = corner_points @ M.T  # apply affine transform
+    corner_points = corner_points.reshape(num_gts, 8)
+    # create new boxes
+    corner_xs = corner_points[:, 0::2]
+    corner_ys = corner_points[:, 1::2]
+    new_bboxes = (
+        np.concatenate(
+            (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
+        )
+        .reshape(4, num_gts)
+        .T
+    )
+    # clip boxes
+    new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
+    new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
+    targets[:, :4] = new_bboxes
+    return targets
+def random_affine(
+    img,
+    targets=(),
+    target_size=(640, 640),
+    degrees=10,
+    translate=0.1,
+    scales=0.1,
+    shear=10,
+):
+    M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
+    img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
     # Transform label coordinates
+    if len(targets) > 0:
+        targets = apply_affine_to_bboxes(targets, target_size, M, scale)
     return img, targets

yolox/data/datasets/mosaicdetection.py CHANGED Viewed

@@ -9,7 +9,7 @@ import numpy as np
 from yolox.utils import adjust_box_anns, get_local_rank
-from ..data_augment import box_candidates, random_perspective
 from .datasets_wrapper import Dataset
@@ -40,8 +40,8 @@ class MosaicDetection(Dataset):
     def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
         degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
-        mixup_scale=(0.5, 1.5), shear=2.0, perspective=0.0,
-        enable_mixup=True, mosaic_prob=1.0, mixup_prob=1.0, *args
     ):
         """
@@ -55,7 +55,6 @@ class MosaicDetection(Dataset):
             mosaic_scale (tuple):
             mixup_scale (tuple):
             shear (float):
-            perspective (float):
             enable_mixup (bool):
             *args(tuple) : Additional arguments for mixup random sampler.
         """
@@ -66,7 +65,6 @@ class MosaicDetection(Dataset):
         self.translate = translate
         self.scale = mosaic_scale
         self.shear = shear
-        self.perspective = perspective
         self.mixup_scale = mixup_scale
         self.enable_mosaic = mosaic
         self.enable_mixup = enable_mixup
@@ -127,16 +125,15 @@ class MosaicDetection(Dataset):
                 np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
                 np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
-            mosaic_img, mosaic_labels = random_perspective(
                 mosaic_img,
                 mosaic_labels,
                 degrees=self.degrees,
                 translate=self.translate,
-                scale=self.scale,
                 shear=self.shear,
-                perspective=self.perspective,
-                border=[-input_h // 2, -input_w // 2],
-            )  # border to remove
             # -----------------------------------------------------------------
             # CopyPaste: https://arxiv.org/abs/2012.07177
@@ -226,14 +223,12 @@ class MosaicDetection(Dataset):
         cp_bboxes_transformed_np[:, 1::2] = np.clip(
             cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
         )
-        keep_list = box_candidates(cp_bboxes_origin_np.T, cp_bboxes_transformed_np.T, 5)
-        if keep_list.sum() >= 1.0:
-            cls_labels = cp_labels[keep_list, 4:5].copy()
-            box_labels = cp_bboxes_transformed_np[keep_list]
-            labels = np.hstack((box_labels, cls_labels))
-            origin_labels = np.vstack((origin_labels, labels))
-            origin_img = origin_img.astype(np.float32)
-            origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
         return origin_img.astype(np.uint8), origin_labels

 from yolox.utils import adjust_box_anns, get_local_rank
+from ..data_augment import random_affine
 from .datasets_wrapper import Dataset
     def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
         degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
+        mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
+        mosaic_prob=1.0, mixup_prob=1.0, *args
     ):
         """
             mosaic_scale (tuple):
             mixup_scale (tuple):
             shear (float):
             enable_mixup (bool):
             *args(tuple) : Additional arguments for mixup random sampler.
         """
         self.translate = translate
         self.scale = mosaic_scale
         self.shear = shear
         self.mixup_scale = mixup_scale
         self.enable_mosaic = mosaic
         self.enable_mixup = enable_mixup
                 np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
                 np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
+            mosaic_img, mosaic_labels = random_affine(
                 mosaic_img,
                 mosaic_labels,
+                target_size=(input_w, input_h),
                 degrees=self.degrees,
                 translate=self.translate,
+                scales=self.scale,
                 shear=self.shear,
+            )
             # -----------------------------------------------------------------
             # CopyPaste: https://arxiv.org/abs/2012.07177
         cp_bboxes_transformed_np[:, 1::2] = np.clip(
             cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h
         )
+        cls_labels = cp_labels[:, 4:5].copy()
+        box_labels = cp_bboxes_transformed_np
+        labels = np.hstack((box_labels, cls_labels))
+        origin_labels = np.vstack((origin_labels, labels))
+        origin_img = origin_img.astype(np.float32)
+        origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
         return origin_img.astype(np.uint8), origin_labels