chendl's picture
Add application file
0b7b08a
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
"""
Data augmentation functionality. Passed as callable transformations to
Dataset classes.
The data augmentation procedures were interpreted from @weiliu89's SSD paper
http://arxiv.org/abs/1512.02325
"""
import math
import random
import cv2
import numpy as np
from yolox.utils import xyxy2cxcywh
def augment_hsv(img, hgain=5, sgain=30, vgain=30):
hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains
hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v
hsv_augs = hsv_augs.astype(np.int16)
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed
def get_aug_params(value, center=0):
if isinstance(value, float):
return random.uniform(center - value, center + value)
elif len(value) == 2:
return random.uniform(value[0], value[1])
else:
raise ValueError(
"Affine params should be either a sequence containing two values\
or single float values. Got {}".format(value)
)
def get_affine_matrix(
target_size,
degrees=10,
translate=0.1,
scales=0.1,
shear=10,
):
twidth, theight = target_size
# Rotation and Scale
angle = get_aug_params(degrees)
scale = get_aug_params(scales, center=1.0)
if scale <= 0.0:
raise ValueError("Argument scale should be positive")
R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
M = np.ones([2, 3])
# Shear
shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
M[0] = R[0] + shear_y * R[1]
M[1] = R[1] + shear_x * R[0]
# Translation
translation_x = get_aug_params(translate) * twidth # x translation (pixels)
translation_y = get_aug_params(translate) * theight # y translation (pixels)
M[0, 2] = translation_x
M[1, 2] = translation_y
return M, scale
def apply_affine_to_bboxes(targets, target_size, M, scale):
num_gts = len(targets)
# warp corner points
twidth, theight = target_size
corner_points = np.ones((4 * num_gts, 3))
corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
4 * num_gts, 2
) # x1y1, x2y2, x1y2, x2y1
corner_points = corner_points @ M.T # apply affine transform
corner_points = corner_points.reshape(num_gts, 8)
# create new boxes
corner_xs = corner_points[:, 0::2]
corner_ys = corner_points[:, 1::2]
new_bboxes = (
np.concatenate(
(corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
)
.reshape(4, num_gts)
.T
)
# clip boxes
new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
targets[:, :4] = new_bboxes
return targets
def random_affine(
img,
targets=(),
target_size=(640, 640),
degrees=10,
translate=0.1,
scales=0.1,
shear=10,
):
M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
# Transform label coordinates
if len(targets) > 0:
targets = apply_affine_to_bboxes(targets, target_size, M, scale)
return img, targets
def _mirror(image, boxes, prob=0.5):
_, width, _ = image.shape
if random.random() < prob:
image = image[:, ::-1]
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes
def preproc(img, input_size, swap=(2, 0, 1)):
if len(img.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_size, dtype=np.uint8) * 114
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
class TrainTransform:
def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
self.max_labels = max_labels
self.flip_prob = flip_prob
self.hsv_prob = hsv_prob
def __call__(self, image, targets, input_dim):
boxes = targets[:, :4].copy()
labels = targets[:, 4].copy()
if len(boxes) == 0:
targets = np.zeros((self.max_labels, 5), dtype=np.float32)
image, r_o = preproc(image, input_dim)
return image, targets
image_o = image.copy()
targets_o = targets.copy()
height_o, width_o, _ = image_o.shape
boxes_o = targets_o[:, :4]
labels_o = targets_o[:, 4]
# bbox_o: [xyxy] to [c_x,c_y,w,h]
boxes_o = xyxy2cxcywh(boxes_o)
if random.random() < self.hsv_prob:
augment_hsv(image)
image_t, boxes = _mirror(image, boxes, self.flip_prob)
height, width, _ = image_t.shape
image_t, r_ = preproc(image_t, input_dim)
# boxes [xyxy] 2 [cx,cy,w,h]
boxes = xyxy2cxcywh(boxes)
boxes *= r_
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
boxes_t = boxes[mask_b]
labels_t = labels[mask_b]
if len(boxes_t) == 0:
image_t, r_o = preproc(image_o, input_dim)
boxes_o *= r_o
boxes_t = boxes_o
labels_t = labels_o
labels_t = np.expand_dims(labels_t, 1)
targets_t = np.hstack((labels_t, boxes_t))
padded_labels = np.zeros((self.max_labels, 5))
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
: self.max_labels
]
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
return image_t, padded_labels
class ValTransform:
"""
Defines the transformations that should be applied to test PIL image
for input into the network
dimension -> tensorize -> color adj
Arguments:
resize (int): input dimension to SSD
rgb_means ((int,int,int)): average RGB of the dataset
(104,117,123)
swap ((int,int,int)): final order of channels
Returns:
transform (transform) : callable transform to be applied to test/val
data
"""
def __init__(self, swap=(2, 0, 1), legacy=False):
self.swap = swap
self.legacy = legacy
# assume input is cv2 img for now
def __call__(self, img, res, input_size):
img, _ = preproc(img, input_size, self.swap)
if self.legacy:
img = img[::-1, :, :].copy()
img /= 255.0
img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
return img, np.zeros((1, 5))