mshukor
init
3eb682b
raw
history blame
26.5 kB
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import math
import numpy as np
import cv2
def clip_boxes_to_image(boxes, height, width):
"""
Clip the boxes with the height and width of the image size.
Args:
boxes (ndarray): bounding boxes to peform crop. The dimension is
`num boxes` x 4.
height (int): the height of the image.
width (int): the width of the image.
Returns:
boxes (ndarray): cropped bounding boxes.
"""
boxes[:, [0, 2]] = np.minimum(
width - 1.0, np.maximum(0.0, boxes[:, [0, 2]])
)
boxes[:, [1, 3]] = np.minimum(
height - 1.0, np.maximum(0.0, boxes[:, [1, 3]])
)
return boxes
def random_short_side_scale_jitter_list(images, min_size, max_size, boxes=None):
"""
Perform a spatial short scale jittering on the given images and
corresponding boxes.
Args:
images (list): list of images to perform scale jitter. Dimension is
`height` x `width` x `channel`.
min_size (int): the minimal size to scale the frames.
max_size (int): the maximal size to scale the frames.
boxes (list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
Returns:
(list): the list of scaled images with dimension of
`new height` x `new width` x `channel`.
(ndarray or None): the scaled boxes with dimension of
`num boxes` x 4.
"""
size = int(round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size)))
height = images[0].shape[0]
width = images[0].shape[1]
if (width <= height and width == size) or (
height <= width and height == size
):
return images, boxes
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
if boxes is not None:
boxes = [
proposal * float(new_height) / height for proposal in boxes
]
else:
new_width = int(math.floor((float(width) / height) * size))
if boxes is not None:
boxes = [proposal * float(new_width) / width for proposal in boxes]
return (
[
cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
).astype(np.float32)
for image in images
],
boxes,
)
def scale(size, image):
"""
Scale the short side of the image to size.
Args:
size (int): size to scale the image.
image (array): image to perform short side scale. Dimension is
`height` x `width` x `channel`.
Returns:
(ndarray): the scaled image with dimension of
`height` x `width` x `channel`.
"""
height = image.shape[0]
width = image.shape[1]
if (width <= height and width == size) or (
height <= width and height == size
):
return image
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
else:
new_width = int(math.floor((float(width) / height) * size))
img = cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
)
return img.astype(np.float32)
def scale_boxes(size, boxes, height, width):
"""
Scale the short side of the box to size.
Args:
size (int): size to scale the image.
boxes (ndarray): bounding boxes to peform scale. The dimension is
`num boxes` x 4.
height (int): the height of the image.
width (int): the width of the image.
Returns:
boxes (ndarray): scaled bounding boxes.
"""
if (width <= height and width == size) or (
height <= width and height == size
):
return boxes
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
boxes *= float(new_height) / height
else:
new_width = int(math.floor((float(width) / height) * size))
boxes *= float(new_width) / width
return boxes
def horizontal_flip_list(prob, images, order="CHW", boxes=None):
"""
Horizontally flip the list of image and optional boxes.
Args:
prob (float): probability to flip.
image (list): ilist of images to perform short side scale. Dimension is
`height` x `width` x `channel` or `channel` x `height` x `width`.
order (str): order of the `height`, `channel` and `width`.
boxes (list): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
(ndarray): the scaled image with dimension of
`height` x `width` x `channel`.
(list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
"""
_, width, _ = images[0].shape
if np.random.uniform() < prob:
if boxes is not None:
boxes = [flip_boxes(proposal, width) for proposal in boxes]
if order == "CHW":
out_images = []
for image in images:
image = np.asarray(image).swapaxes(2, 0)
image = image[::-1]
out_images.append(image.swapaxes(0, 2))
return out_images, boxes
elif order == "HWC":
return [cv2.flip(image, 1) for image in images], boxes
return images, boxes
def spatial_shift_crop_list(size, images, spatial_shift_pos, boxes=None):
"""
Perform left, center, or right crop of the given list of images.
Args:
size (int): size to crop.
image (list): ilist of images to perform short side scale. Dimension is
`height` x `width` x `channel` or `channel` x `height` x `width`.
spatial_shift_pos (int): option includes 0 (left), 1 (middle), and
2 (right) crop.
boxes (list): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
cropped (ndarray): the cropped list of images with dimension of
`height` x `width` x `channel`.
boxes (list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
"""
assert spatial_shift_pos in [0, 1, 2]
height = images[0].shape[0]
width = images[0].shape[1]
y_offset = int(math.ceil((height - size) / 2))
x_offset = int(math.ceil((width - size) / 2))
if height > width:
if spatial_shift_pos == 0:
y_offset = 0
elif spatial_shift_pos == 2:
y_offset = height - size
else:
if spatial_shift_pos == 0:
x_offset = 0
elif spatial_shift_pos == 2:
x_offset = width - size
cropped = [
image[y_offset : y_offset + size, x_offset : x_offset + size, :]
for image in images
]
assert cropped[0].shape[0] == size, "Image height not cropped properly"
assert cropped[0].shape[1] == size, "Image width not cropped properly"
if boxes is not None:
for i in range(len(boxes)):
boxes[i][:, [0, 2]] -= x_offset
boxes[i][:, [1, 3]] -= y_offset
return cropped, boxes
def CHW2HWC(image):
"""
Transpose the dimension from `channel` x `height` x `width` to
`height` x `width` x `channel`.
Args:
image (array): image to transpose.
Returns
(array): transposed image.
"""
return image.transpose([1, 2, 0])
def HWC2CHW(image):
"""
Transpose the dimension from `height` x `width` x `channel` to
`channel` x `height` x `width`.
Args:
image (array): image to transpose.
Returns
(array): transposed image.
"""
return image.transpose([2, 0, 1])
def color_jitter_list(
images, img_brightness=0, img_contrast=0, img_saturation=0
):
"""
Perform color jitter on the list of images.
Args:
images (list): list of images to perform color jitter.
img_brightness (float): jitter ratio for brightness.
img_contrast (float): jitter ratio for contrast.
img_saturation (float): jitter ratio for saturation.
Returns:
images (list): the jittered list of images.
"""
jitter = []
if img_brightness != 0:
jitter.append("brightness")
if img_contrast != 0:
jitter.append("contrast")
if img_saturation != 0:
jitter.append("saturation")
if len(jitter) > 0:
order = np.random.permutation(np.arange(len(jitter)))
for idx in range(0, len(jitter)):
if jitter[order[idx]] == "brightness":
images = brightness_list(img_brightness, images)
elif jitter[order[idx]] == "contrast":
images = contrast_list(img_contrast, images)
elif jitter[order[idx]] == "saturation":
images = saturation_list(img_saturation, images)
return images
def lighting_list(imgs, alphastd, eigval, eigvec, alpha=None):
"""
Perform AlexNet-style PCA jitter on the given list of images.
Args:
images (list): list of images to perform lighting jitter.
alphastd (float): jitter ratio for PCA jitter.
eigval (list): eigenvalues for PCA jitter.
eigvec (list[list]): eigenvectors for PCA jitter.
Returns:
out_images (list): the list of jittered images.
"""
if alphastd == 0:
return imgs
# generate alpha1, alpha2, alpha3
alpha = np.random.normal(0, alphastd, size=(1, 3))
eig_vec = np.array(eigvec)
eig_val = np.reshape(eigval, (1, 3))
rgb = np.sum(
eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0),
axis=1,
)
out_images = []
for img in imgs:
for idx in range(img.shape[0]):
img[idx] = img[idx] + rgb[2 - idx]
out_images.append(img)
return out_images
def color_normalization(image, mean, stddev):
"""
Perform color normalization on the image with the given mean and stddev.
Args:
image (array): image to perform color normalization.
mean (float): mean value to subtract.
stddev (float): stddev to devide.
"""
# Input image should in format of CHW
assert len(mean) == image.shape[0], "channel mean not computed properly"
assert len(stddev) == image.shape[0], "channel stddev not computed properly"
for idx in range(image.shape[0]):
image[idx] = image[idx] - mean[idx]
image[idx] = image[idx] / stddev[idx]
return image
def pad_image(image, pad_size, order="CHW"):
"""
Pad the given image with the size of pad_size.
Args:
image (array): image to pad.
pad_size (int): size to pad.
order (str): order of the `height`, `channel` and `width`.
Returns:
img (array): padded image.
"""
if order == "CHW":
img = np.pad(
image,
((0, 0), (pad_size, pad_size), (pad_size, pad_size)),
mode=str("constant"),
)
elif order == "HWC":
img = np.pad(
image,
((pad_size, pad_size), (pad_size, pad_size), (0, 0)),
mode=str("constant"),
)
return img
def horizontal_flip(prob, image, order="CHW"):
"""
Horizontally flip the image.
Args:
prob (float): probability to flip.
image (array): image to pad.
order (str): order of the `height`, `channel` and `width`.
Returns:
img (array): flipped image.
"""
assert order in ["CHW", "HWC"], "order {} is not supported".format(order)
if np.random.uniform() < prob:
if order == "CHW":
image = image[:, :, ::-1]
elif order == "HWC":
image = image[:, ::-1, :]
else:
raise NotImplementedError("Unknown order {}".format(order))
return image
def flip_boxes(boxes, im_width):
"""
Horizontally flip the boxes.
Args:
boxes (array): box to flip.
im_width (int): width of the image.
Returns:
boxes_flipped (array): flipped box.
"""
boxes_flipped = boxes.copy()
boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1
boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1
return boxes_flipped
def crop_boxes(boxes, x_offset, y_offset):
"""
Crop the boxes given the offsets.
Args:
boxes (array): boxes to crop.
x_offset (int): offset on x.
y_offset (int): offset on y.
"""
boxes[:, [0, 2]] = boxes[:, [0, 2]] - x_offset
boxes[:, [1, 3]] = boxes[:, [1, 3]] - y_offset
return boxes
def random_crop_list(images, size, pad_size=0, order="CHW", boxes=None):
"""
Perform random crop on a list of images.
Args:
images (list): list of images to perform random crop.
size (int): size to crop.
pad_size (int): padding size.
order (str): order of the `height`, `channel` and `width`.
boxes (list): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
cropped (ndarray): the cropped list of images with dimension of
`height` x `width` x `channel`.
boxes (list): optional. Corresponding boxes to images. Dimension is
`num boxes` x 4.
"""
# explicitly dealing processing per image order to avoid flipping images.
if pad_size > 0:
images = [
pad_image(pad_size=pad_size, image=image, order=order)
for image in images
]
# image format should be CHW.
if order == "CHW":
if images[0].shape[1] == size and images[0].shape[2] == size:
return images, boxes
height = images[0].shape[1]
width = images[0].shape[2]
y_offset = 0
if height > size:
y_offset = int(np.random.randint(0, height - size))
x_offset = 0
if width > size:
x_offset = int(np.random.randint(0, width - size))
cropped = [
image[:, y_offset : y_offset + size, x_offset : x_offset + size]
for image in images
]
assert cropped[0].shape[1] == size, "Image not cropped properly"
assert cropped[0].shape[2] == size, "Image not cropped properly"
elif order == "HWC":
if images[0].shape[0] == size and images[0].shape[1] == size:
return images, boxes
height = images[0].shape[0]
width = images[0].shape[1]
y_offset = 0
if height > size:
y_offset = int(np.random.randint(0, height - size))
x_offset = 0
if width > size:
x_offset = int(np.random.randint(0, width - size))
cropped = [
image[y_offset : y_offset + size, x_offset : x_offset + size, :]
for image in images
]
assert cropped[0].shape[0] == size, "Image not cropped properly"
assert cropped[0].shape[1] == size, "Image not cropped properly"
if boxes is not None:
boxes = [crop_boxes(proposal, x_offset, y_offset) for proposal in boxes]
return cropped, boxes
def center_crop(size, image):
"""
Perform center crop on input images.
Args:
size (int): size of the cropped height and width.
image (array): the image to perform center crop.
"""
height = image.shape[0]
width = image.shape[1]
y_offset = int(math.ceil((height - size) / 2))
x_offset = int(math.ceil((width - size) / 2))
cropped = image[y_offset : y_offset + size, x_offset : x_offset + size, :]
assert cropped.shape[0] == size, "Image height not cropped properly"
assert cropped.shape[1] == size, "Image width not cropped properly"
return cropped
# ResNet style scale jittering: randomly select the scale from
# [1/max_size, 1/min_size]
def random_scale_jitter(image, min_size, max_size):
"""
Perform ResNet style random scale jittering: randomly select the scale from
[1/max_size, 1/min_size].
Args:
image (array): image to perform random scale.
min_size (int): min size to scale.
max_size (int) max size to scale.
Returns:
image (array): scaled image.
"""
img_scale = int(
round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size))
)
image = scale(img_scale, image)
return image
def random_scale_jitter_list(images, min_size, max_size):
"""
Perform ResNet style random scale jittering on a list of image: randomly
select the scale from [1/max_size, 1/min_size]. Note that all the image
will share the same scale.
Args:
images (list): list of images to perform random scale.
min_size (int): min size to scale.
max_size (int) max size to scale.
Returns:
images (list): list of scaled image.
"""
img_scale = int(
round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size))
)
return [scale(img_scale, image) for image in images]
def random_sized_crop(image, size, area_frac=0.08):
"""
Perform random sized cropping on the given image. Random crop with size
8% - 100% image area and aspect ratio in [3/4, 4/3].
Args:
image (array): image to crop.
size (int): size to crop.
area_frac (float): area of fraction.
Returns:
(array): cropped image.
"""
for _ in range(0, 10):
height = image.shape[0]
width = image.shape[1]
area = height * width
target_area = np.random.uniform(area_frac, 1.0) * area
aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
w = int(round(math.sqrt(float(target_area) * aspect_ratio)))
h = int(round(math.sqrt(float(target_area) / aspect_ratio)))
if np.random.uniform() < 0.5:
w, h = h, w
if h <= height and w <= width:
if height == h:
y_offset = 0
else:
y_offset = np.random.randint(0, height - h)
if width == w:
x_offset = 0
else:
x_offset = np.random.randint(0, width - w)
y_offset = int(y_offset)
x_offset = int(x_offset)
cropped = image[y_offset : y_offset + h, x_offset : x_offset + w, :]
assert (
cropped.shape[0] == h and cropped.shape[1] == w
), "Wrong crop size"
cropped = cv2.resize(
cropped, (size, size), interpolation=cv2.INTER_LINEAR
)
return cropped.astype(np.float32)
return center_crop(size, scale(size, image))
def lighting(img, alphastd, eigval, eigvec):
"""
Perform AlexNet-style PCA jitter on the given image.
Args:
image (array): list of images to perform lighting jitter.
alphastd (float): jitter ratio for PCA jitter.
eigval (array): eigenvalues for PCA jitter.
eigvec (list): eigenvectors for PCA jitter.
Returns:
img (tensor): the jittered image.
"""
if alphastd == 0:
return img
# generate alpha1, alpha2, alpha3.
alpha = np.random.normal(0, alphastd, size=(1, 3))
eig_vec = np.array(eigvec)
eig_val = np.reshape(eigval, (1, 3))
rgb = np.sum(
eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0),
axis=1,
)
for idx in range(img.shape[0]):
img[idx] = img[idx] + rgb[2 - idx]
return img
def random_sized_crop_list(images, size, crop_area_fraction=0.08):
"""
Perform random sized cropping on the given list of images. Random crop with
size 8% - 100% image area and aspect ratio in [3/4, 4/3].
Args:
images (list): image to crop.
size (int): size to crop.
area_frac (float): area of fraction.
Returns:
(list): list of cropped image.
"""
for _ in range(0, 10):
height = images[0].shape[0]
width = images[0].shape[1]
area = height * width
target_area = np.random.uniform(crop_area_fraction, 1.0) * area
aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
w = int(round(math.sqrt(float(target_area) * aspect_ratio)))
h = int(round(math.sqrt(float(target_area) / aspect_ratio)))
if np.random.uniform() < 0.5:
w, h = h, w
if h <= height and w <= width:
if height == h:
y_offset = 0
else:
y_offset = np.random.randint(0, height - h)
if width == w:
x_offset = 0
else:
x_offset = np.random.randint(0, width - w)
y_offset = int(y_offset)
x_offset = int(x_offset)
croppsed_images = []
for image in images:
cropped = image[
y_offset : y_offset + h, x_offset : x_offset + w, :
]
assert (
cropped.shape[0] == h and cropped.shape[1] == w
), "Wrong crop size"
cropped = cv2.resize(
cropped, (size, size), interpolation=cv2.INTER_LINEAR
)
croppsed_images.append(cropped.astype(np.float32))
return croppsed_images
return [center_crop(size, scale(size, image)) for image in images]
def blend(image1, image2, alpha):
return image1 * alpha + image2 * (1 - alpha)
def grayscale(image):
"""
Convert the image to gray scale.
Args:
image (tensor): image to convert to gray scale. Dimension is
`channel` x `height` x `width`.
Returns:
img_gray (tensor): image in gray scale.
"""
# R -> 0.299, G -> 0.587, B -> 0.114.
img_gray = np.copy(image)
gray_channel = 0.299 * image[2] + 0.587 * image[1] + 0.114 * image[0]
img_gray[0] = gray_channel
img_gray[1] = gray_channel
img_gray[2] = gray_channel
return img_gray
def saturation(var, image):
"""
Perform color saturation on the given image.
Args:
var (float): variance.
image (array): image to perform color saturation.
Returns:
(array): image that performed color saturation.
"""
img_gray = grayscale(image)
alpha = 1.0 + np.random.uniform(-var, var)
return blend(image, img_gray, alpha)
def brightness(var, image):
"""
Perform color brightness on the given image.
Args:
var (float): variance.
image (array): image to perform color brightness.
Returns:
(array): image that performed color brightness.
"""
img_bright = np.zeros(image.shape).astype(image.dtype)
alpha = 1.0 + np.random.uniform(-var, var)
return blend(image, img_bright, alpha)
def contrast(var, image):
"""
Perform color contrast on the given image.
Args:
var (float): variance.
image (array): image to perform color contrast.
Returns:
(array): image that performed color contrast.
"""
img_gray = grayscale(image)
img_gray.fill(np.mean(img_gray[0]))
alpha = 1.0 + np.random.uniform(-var, var)
return blend(image, img_gray, alpha)
def saturation_list(var, images):
"""
Perform color saturation on the list of given images.
Args:
var (float): variance.
images (list): list of images to perform color saturation.
Returns:
(list): list of images that performed color saturation.
"""
alpha = 1.0 + np.random.uniform(-var, var)
out_images = []
for image in images:
img_gray = grayscale(image)
out_images.append(blend(image, img_gray, alpha))
return out_images
def brightness_list(var, images):
"""
Perform color brightness on the given list of images.
Args:
var (float): variance.
images (list): list of images to perform color brightness.
Returns:
(array): list of images that performed color brightness.
"""
alpha = 1.0 + np.random.uniform(-var, var)
out_images = []
for image in images:
img_bright = np.zeros(image.shape).astype(image.dtype)
out_images.append(blend(image, img_bright, alpha))
return out_images
def contrast_list(var, images):
"""
Perform color contrast on the given list of images.
Args:
var (float): variance.
images (list): list of images to perform color contrast.
Returns:
(array): image that performed color contrast.
"""
alpha = 1.0 + np.random.uniform(-var, var)
out_images = []
for image in images:
img_gray = grayscale(image)
img_gray.fill(np.mean(img_gray[0]))
out_images.append(blend(image, img_gray, alpha))
return out_images
def color_jitter(image, img_brightness=0, img_contrast=0, img_saturation=0):
"""
Perform color jitter on the given image.
Args:
image (array): image to perform color jitter.
img_brightness (float): jitter ratio for brightness.
img_contrast (float): jitter ratio for contrast.
img_saturation (float): jitter ratio for saturation.
Returns:
image (array): the jittered image.
"""
jitter = []
if img_brightness != 0:
jitter.append("brightness")
if img_contrast != 0:
jitter.append("contrast")
if img_saturation != 0:
jitter.append("saturation")
if len(jitter) > 0:
order = np.random.permutation(np.arange(len(jitter)))
for idx in range(0, len(jitter)):
if jitter[order[idx]] == "brightness":
image = brightness(img_brightness, image)
elif jitter[order[idx]] == "contrast":
image = contrast(img_contrast, image)
elif jitter[order[idx]] == "saturation":
image = saturation(img_saturation, image)
return image
def revert_scaled_boxes(size, boxes, img_height, img_width):
"""
Revert scaled input boxes to match the original image size.
Args:
size (int): size of the cropped image.
boxes (array): shape (num_boxes, 4).
img_height (int): height of original image.
img_width (int): width of original image.
Returns:
reverted_boxes (array): boxes scaled back to the original image size.
"""
scaled_aspect = np.min([img_height, img_width])
scale_ratio = scaled_aspect / size
reverted_boxes = boxes * scale_ratio
return reverted_boxes