|
|
|
|
|
|
|
""" |
|
Data augmentation functionality. Passed as callable transformations to |
|
Dataset classes. |
|
|
|
The data augmentation procedures were interpreted from @weiliu89's SSD paper |
|
http://arxiv.org/abs/1512.02325 |
|
""" |
|
|
|
import math |
|
import random |
|
|
|
import cv2 |
|
import numpy as np |
|
|
|
|
|
|
|
|
|
def xyxy2cxcywh(bboxes): |
|
bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] |
|
bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] |
|
bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5 |
|
bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5 |
|
return bboxes |
|
|
|
|
|
|
|
def augment_hsv(img, hgain=5, sgain=30, vgain=30): |
|
hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] |
|
hsv_augs *= np.random.randint(0, 2, 3) |
|
hsv_augs = hsv_augs.astype(np.int16) |
|
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16) |
|
|
|
img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180 |
|
img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255) |
|
img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255) |
|
|
|
cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) |
|
|
|
|
|
def get_aug_params(value, center=0): |
|
if isinstance(value, float): |
|
return random.uniform(center - value, center + value) |
|
elif len(value) == 2: |
|
return random.uniform(value[0], value[1]) |
|
else: |
|
raise ValueError( |
|
"Affine params should be either a sequence containing two values\ |
|
or single float values. Got {}".format(value) |
|
) |
|
|
|
|
|
def get_affine_matrix( |
|
target_size, |
|
degrees=10, |
|
translate=0.1, |
|
scales=0.1, |
|
shear=10, |
|
): |
|
twidth, theight = target_size |
|
|
|
|
|
angle = get_aug_params(degrees) |
|
scale = get_aug_params(scales, center=1.0) |
|
|
|
if scale <= 0.0: |
|
raise ValueError("Argument scale should be positive") |
|
|
|
R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale) |
|
|
|
M = np.ones([2, 3]) |
|
|
|
shear_x = math.tan(get_aug_params(shear) * math.pi / 180) |
|
shear_y = math.tan(get_aug_params(shear) * math.pi / 180) |
|
|
|
M[0] = R[0] + shear_y * R[1] |
|
M[1] = R[1] + shear_x * R[0] |
|
|
|
|
|
translation_x = get_aug_params(translate) * twidth |
|
translation_y = get_aug_params(translate) * theight |
|
|
|
M[0, 2] = translation_x |
|
M[1, 2] = translation_y |
|
|
|
return M, scale |
|
|
|
|
|
def apply_affine_to_bboxes(targets, target_size, M, scale): |
|
num_gts = len(targets) |
|
|
|
|
|
twidth, theight = target_size |
|
corner_points = np.ones((4 * num_gts, 3)) |
|
corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( |
|
4 * num_gts, 2 |
|
) |
|
corner_points = corner_points @ M.T |
|
corner_points = corner_points.reshape(num_gts, 8) |
|
|
|
|
|
corner_xs = corner_points[:, 0::2] |
|
corner_ys = corner_points[:, 1::2] |
|
new_bboxes = ( |
|
np.concatenate( |
|
(corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1)) |
|
) |
|
.reshape(4, num_gts) |
|
.T |
|
) |
|
|
|
|
|
new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth) |
|
new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight) |
|
|
|
targets[:, :4] = new_bboxes |
|
|
|
return targets |
|
|
|
|
|
def random_affine( |
|
img, |
|
targets=(), |
|
target_size=(640, 640), |
|
degrees=10, |
|
translate=0.1, |
|
scales=0.1, |
|
shear=10, |
|
): |
|
M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear) |
|
|
|
img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114)) |
|
|
|
|
|
if len(targets) > 0: |
|
targets = apply_affine_to_bboxes(targets, target_size, M, scale) |
|
|
|
return img, targets |
|
|
|
|
|
def _mirror(image, boxes, prob=0.5): |
|
_, width, _ = image.shape |
|
if random.random() < prob: |
|
image = image[:, ::-1] |
|
boxes[:, 0::2] = width - boxes[:, 2::-2] |
|
return image, boxes |
|
|
|
|
|
def preproc(img, input_size, swap=(2, 0, 1)): |
|
if len(img.shape) == 3: |
|
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 |
|
else: |
|
padded_img = np.ones(input_size, dtype=np.uint8) * 114 |
|
|
|
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) |
|
resized_img = cv2.resize( |
|
img, |
|
(int(img.shape[1] * r), int(img.shape[0] * r)), |
|
interpolation=cv2.INTER_LINEAR, |
|
).astype(np.uint8) |
|
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img |
|
|
|
padded_img = padded_img.transpose(swap) |
|
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) |
|
return padded_img, r |
|
|
|
|
|
class TrainTransform: |
|
def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): |
|
self.max_labels = max_labels |
|
self.flip_prob = flip_prob |
|
self.hsv_prob = hsv_prob |
|
|
|
def __call__(self, image, targets, input_dim): |
|
boxes = targets[:, :4].copy() |
|
labels = targets[:, 4].copy() |
|
if len(boxes) == 0: |
|
targets = np.zeros((self.max_labels, 5), dtype=np.float32) |
|
image, r_o = preproc(image, input_dim) |
|
return image, targets |
|
|
|
image_o = image.copy() |
|
targets_o = targets.copy() |
|
height_o, width_o, _ = image_o.shape |
|
boxes_o = targets_o[:, :4] |
|
labels_o = targets_o[:, 4] |
|
|
|
boxes_o = xyxy2cxcywh(boxes_o) |
|
|
|
if random.random() < self.hsv_prob: |
|
augment_hsv(image) |
|
image_t, boxes = _mirror(image, boxes, self.flip_prob) |
|
height, width, _ = image_t.shape |
|
image_t, r_ = preproc(image_t, input_dim) |
|
|
|
boxes = xyxy2cxcywh(boxes) |
|
boxes *= r_ |
|
|
|
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 |
|
boxes_t = boxes[mask_b] |
|
labels_t = labels[mask_b] |
|
|
|
if len(boxes_t) == 0: |
|
image_t, r_o = preproc(image_o, input_dim) |
|
boxes_o *= r_o |
|
boxes_t = boxes_o |
|
labels_t = labels_o |
|
|
|
labels_t = np.expand_dims(labels_t, 1) |
|
|
|
targets_t = np.hstack((labels_t, boxes_t)) |
|
padded_labels = np.zeros((self.max_labels, 5)) |
|
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ |
|
: self.max_labels |
|
] |
|
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) |
|
return image_t, padded_labels |
|
|
|
|
|
class ValTransform: |
|
""" |
|
Defines the transformations that should be applied to test PIL image |
|
for input into the network |
|
|
|
dimension -> tensorize -> color adj |
|
|
|
Arguments: |
|
resize (int): input dimension to SSD |
|
rgb_means ((int,int,int)): average RGB of the dataset |
|
(104,117,123) |
|
swap ((int,int,int)): final order of channels |
|
|
|
Returns: |
|
transform (transform) : callable transform to be applied to test/val |
|
data |
|
""" |
|
|
|
def __init__(self, swap=(2, 0, 1), legacy=False): |
|
self.swap = swap |
|
self.legacy = legacy |
|
|
|
|
|
def __call__(self, img, res, input_size): |
|
max_labels = 80 |
|
|
|
targets = res |
|
boxes = targets[:, :4].copy() |
|
labels = targets[:, 4].copy() |
|
labels = np.expand_dims(labels, 1) |
|
targets = np.hstack((labels, boxes)) |
|
padded_labels = np.zeros((max_labels, 5)) |
|
padded_labels[range(len(targets))[: max_labels]] = targets[ |
|
: max_labels |
|
] |
|
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) |
|
|
|
img, _ = preproc(img, input_size, self.swap) |
|
if self.legacy: |
|
img = img[::-1, :, :].copy() |
|
img /= 255.0 |
|
img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) |
|
img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) |
|
|
|
|
|
|
|
|
|
return img, padded_labels |
|
|