#!/usr/bin/env python3 # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. """ Data augmentation functionality. Passed as callable transformations to Dataset classes. The data augmentation procedures were interpreted from @weiliu89's SSD paper http://arxiv.org/abs/1512.02325 """ import math import random import cv2 import numpy as np from yolox.utils import xyxy2cxcywh def augment_hsv(img, hgain=5, sgain=30, vgain=30): hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v hsv_augs = hsv_augs.astype(np.int16) img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16) img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180 img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255) img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255) cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed def get_aug_params(value, center=0): if isinstance(value, float): return random.uniform(center - value, center + value) elif len(value) == 2: return random.uniform(value[0], value[1]) else: raise ValueError( "Affine params should be either a sequence containing two values\ or single float values. Got {}".format(value) ) def get_affine_matrix( target_size, degrees=10, translate=0.1, scales=0.1, shear=10, ): twidth, theight = target_size # Rotation and Scale angle = get_aug_params(degrees) scale = get_aug_params(scales, center=1.0) if scale <= 0.0: raise ValueError("Argument scale should be positive") R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale) M = np.ones([2, 3]) # Shear shear_x = math.tan(get_aug_params(shear) * math.pi / 180) shear_y = math.tan(get_aug_params(shear) * math.pi / 180) M[0] = R[0] + shear_y * R[1] M[1] = R[1] + shear_x * R[0] # Translation translation_x = get_aug_params(translate) * twidth # x translation (pixels) translation_y = get_aug_params(translate) * theight # y translation (pixels) M[0, 2] = translation_x M[1, 2] = translation_y return M, scale def apply_affine_to_bboxes(targets, target_size, M, scale): num_gts = len(targets) # warp corner points twidth, theight = target_size corner_points = np.ones((4 * num_gts, 3)) corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( 4 * num_gts, 2 ) # x1y1, x2y2, x1y2, x2y1 corner_points = corner_points @ M.T # apply affine transform corner_points = corner_points.reshape(num_gts, 8) # create new boxes corner_xs = corner_points[:, 0::2] corner_ys = corner_points[:, 1::2] new_bboxes = ( np.concatenate( (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)) ) .reshape(4, num_gts) .T ) # clip boxes new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth) new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight) targets[:, :4] = new_bboxes return targets def random_affine( img, targets=(), target_size=(640, 640), degrees=10, translate=0.1, scales=0.1, shear=10, ): M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear) img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114)) # Transform label coordinates if len(targets) > 0: targets = apply_affine_to_bboxes(targets, target_size, M, scale) return img, targets def _mirror(image, boxes, prob=0.5): _, width, _ = image.shape if random.random() < prob: image = image[:, ::-1] boxes[:, 0::2] = width - boxes[:, 2::-2] return image, boxes def preproc(img, input_size, swap=(2, 0, 1)): if len(img.shape) == 3: padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 else: padded_img = np.ones(input_size, dtype=np.uint8) * 114 r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) resized_img = cv2.resize( img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR, ).astype(np.uint8) padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img padded_img = padded_img.transpose(swap) padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r class TrainTransform: def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): self.max_labels = max_labels self.flip_prob = flip_prob self.hsv_prob = hsv_prob def __call__(self, image, targets, input_dim): boxes = targets[:, :4].copy() labels = targets[:, 4].copy() if len(boxes) == 0: targets = np.zeros((self.max_labels, 5), dtype=np.float32) image, r_o = preproc(image, input_dim) return image, targets image_o = image.copy() targets_o = targets.copy() height_o, width_o, _ = image_o.shape boxes_o = targets_o[:, :4] labels_o = targets_o[:, 4] # bbox_o: [xyxy] to [c_x,c_y,w,h] boxes_o = xyxy2cxcywh(boxes_o) if random.random() < self.hsv_prob: augment_hsv(image) image_t, boxes = _mirror(image, boxes, self.flip_prob) height, width, _ = image_t.shape image_t, r_ = preproc(image_t, input_dim) # boxes [xyxy] 2 [cx,cy,w,h] boxes = xyxy2cxcywh(boxes) boxes *= r_ mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 boxes_t = boxes[mask_b] labels_t = labels[mask_b] if len(boxes_t) == 0: image_t, r_o = preproc(image_o, input_dim) boxes_o *= r_o boxes_t = boxes_o labels_t = labels_o labels_t = np.expand_dims(labels_t, 1) targets_t = np.hstack((labels_t, boxes_t)) padded_labels = np.zeros((self.max_labels, 5)) padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ : self.max_labels ] padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) return image_t, padded_labels class ValTransform: """ Defines the transformations that should be applied to test PIL image for input into the network dimension -> tensorize -> color adj Arguments: resize (int): input dimension to SSD rgb_means ((int,int,int)): average RGB of the dataset (104,117,123) swap ((int,int,int)): final order of channels Returns: transform (transform) : callable transform to be applied to test/val data """ def __init__(self, swap=(2, 0, 1), legacy=False): self.swap = swap self.legacy = legacy # assume input is cv2 img for now def __call__(self, img, res, input_size): img, _ = preproc(img, input_size, self.swap) if self.legacy: img = img[::-1, :, :].copy() img /= 255.0 img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) return img, np.zeros((1, 5))