Spaces:

mshukor
/

eP-ALM

Runtime error

File size: 26,453 Bytes

3eb682b

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

import math
import numpy as np
import cv2


def clip_boxes_to_image(boxes, height, width):
    """
    Clip the boxes with the height and width of the image size.
    Args:
        boxes (ndarray): bounding boxes to peform crop. The dimension is
        `num boxes` x 4.
        height (int): the height of the image.
        width (int): the width of the image.
    Returns:
        boxes (ndarray): cropped bounding boxes.
    """
    boxes[:, [0, 2]] = np.minimum(
        width - 1.0, np.maximum(0.0, boxes[:, [0, 2]])
    )
    boxes[:, [1, 3]] = np.minimum(
        height - 1.0, np.maximum(0.0, boxes[:, [1, 3]])
    )
    return boxes


def random_short_side_scale_jitter_list(images, min_size, max_size, boxes=None):
    """
    Perform a spatial short scale jittering on the given images and
    corresponding boxes.
    Args:
        images (list): list of images to perform scale jitter. Dimension is
            `height` x `width` x `channel`.
        min_size (int): the minimal size to scale the frames.
        max_size (int): the maximal size to scale the frames.
        boxes (list): optional. Corresponding boxes to images. Dimension is
            `num boxes` x 4.
    Returns:
        (list): the list of scaled images with dimension of
            `new height` x `new width` x `channel`.
        (ndarray or None): the scaled boxes with dimension of
            `num boxes` x 4.
    """
    size = int(round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size)))

    height = images[0].shape[0]
    width = images[0].shape[1]
    if (width <= height and width == size) or (
        height <= width and height == size
    ):
        return images, boxes
    new_width = size
    new_height = size
    if width < height:
        new_height = int(math.floor((float(height) / width) * size))
        if boxes is not None:
            boxes = [
                proposal * float(new_height) / height for proposal in boxes
            ]
    else:
        new_width = int(math.floor((float(width) / height) * size))
        if boxes is not None:
            boxes = [proposal * float(new_width) / width for proposal in boxes]
    return (
        [
            cv2.resize(
                image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
            ).astype(np.float32)
            for image in images
        ],
        boxes,
    )


def scale(size, image):
    """
    Scale the short side of the image to size.
    Args:
        size (int): size to scale the image.
        image (array): image to perform short side scale. Dimension is
            `height` x `width` x `channel`.
    Returns:
        (ndarray): the scaled image with dimension of
            `height` x `width` x `channel`.
    """
    height = image.shape[0]
    width = image.shape[1]
    if (width <= height and width == size) or (
        height <= width and height == size
    ):
        return image
    new_width = size
    new_height = size
    if width < height:
        new_height = int(math.floor((float(height) / width) * size))
    else:
        new_width = int(math.floor((float(width) / height) * size))
    img = cv2.resize(
        image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
    )
    return img.astype(np.float32)


def scale_boxes(size, boxes, height, width):
    """
    Scale the short side of the box to size.
    Args:
        size (int): size to scale the image.
        boxes (ndarray): bounding boxes to peform scale. The dimension is
        `num boxes` x 4.
        height (int): the height of the image.
        width (int): the width of the image.
    Returns:
        boxes (ndarray): scaled bounding boxes.
    """
    if (width <= height and width == size) or (
        height <= width and height == size
    ):
        return boxes

    new_width = size
    new_height = size
    if width < height:
        new_height = int(math.floor((float(height) / width) * size))
        boxes *= float(new_height) / height
    else:
        new_width = int(math.floor((float(width) / height) * size))
        boxes *= float(new_width) / width
    return boxes


def horizontal_flip_list(prob, images, order="CHW", boxes=None):
    """
    Horizontally flip the list of image and optional boxes.
    Args:
        prob (float): probability to flip.
        image (list): ilist of images to perform short side scale. Dimension is
            `height` x `width` x `channel` or `channel` x `height` x `width`.
        order (str): order of the `height`, `channel` and `width`.
        boxes (list): optional. Corresponding boxes to images.
            Dimension is `num boxes` x 4.
    Returns:
        (ndarray): the scaled image with dimension of
            `height` x `width` x `channel`.
        (list): optional. Corresponding boxes to images. Dimension is
            `num boxes` x 4.
    """
    _, width, _ = images[0].shape
    if np.random.uniform() < prob:
        if boxes is not None:
            boxes = [flip_boxes(proposal, width) for proposal in boxes]
        if order == "CHW":
            out_images = []
            for image in images:
                image = np.asarray(image).swapaxes(2, 0)
                image = image[::-1]
                out_images.append(image.swapaxes(0, 2))
            return out_images, boxes
        elif order == "HWC":
            return [cv2.flip(image, 1) for image in images], boxes
    return images, boxes


def spatial_shift_crop_list(size, images, spatial_shift_pos, boxes=None):
    """
    Perform left, center, or right crop of the given list of images.
    Args:
        size (int): size to crop.
        image (list): ilist of images to perform short side scale. Dimension is
            `height` x `width` x `channel` or `channel` x `height` x `width`.
        spatial_shift_pos (int): option includes 0 (left), 1 (middle), and
            2 (right) crop.
        boxes (list): optional. Corresponding boxes to images.
            Dimension is `num boxes` x 4.
    Returns:
        cropped (ndarray): the cropped list of images with dimension of
            `height` x `width` x `channel`.
        boxes (list): optional. Corresponding boxes to images. Dimension is
            `num boxes` x 4.
    """

    assert spatial_shift_pos in [0, 1, 2]

    height = images[0].shape[0]
    width = images[0].shape[1]
    y_offset = int(math.ceil((height - size) / 2))
    x_offset = int(math.ceil((width - size) / 2))

    if height > width:
        if spatial_shift_pos == 0:
            y_offset = 0
        elif spatial_shift_pos == 2:
            y_offset = height - size
    else:
        if spatial_shift_pos == 0:
            x_offset = 0
        elif spatial_shift_pos == 2:
            x_offset = width - size

    cropped = [
        image[y_offset : y_offset + size, x_offset : x_offset + size, :]
        for image in images
    ]
    assert cropped[0].shape[0] == size, "Image height not cropped properly"
    assert cropped[0].shape[1] == size, "Image width not cropped properly"

    if boxes is not None:
        for i in range(len(boxes)):
            boxes[i][:, [0, 2]] -= x_offset
            boxes[i][:, [1, 3]] -= y_offset
    return cropped, boxes


def CHW2HWC(image):
    """
    Transpose the dimension from `channel` x `height` x `width` to
        `height` x `width` x `channel`.
    Args:
        image (array): image to transpose.
    Returns
        (array): transposed image.
    """
    return image.transpose([1, 2, 0])


def HWC2CHW(image):
    """
    Transpose the dimension from `height` x `width` x `channel` to
        `channel` x `height` x `width`.
    Args:
        image (array): image to transpose.
    Returns
        (array): transposed image.
    """
    return image.transpose([2, 0, 1])


def color_jitter_list(
    images, img_brightness=0, img_contrast=0, img_saturation=0
):
    """
    Perform color jitter on the list of images.
    Args:
        images (list): list of images to perform color jitter.
        img_brightness (float): jitter ratio for brightness.
        img_contrast (float): jitter ratio for contrast.
        img_saturation (float): jitter ratio for saturation.
    Returns:
        images (list): the jittered list of images.
    """
    jitter = []
    if img_brightness != 0:
        jitter.append("brightness")
    if img_contrast != 0:
        jitter.append("contrast")
    if img_saturation != 0:
        jitter.append("saturation")

    if len(jitter) > 0:
        order = np.random.permutation(np.arange(len(jitter)))
        for idx in range(0, len(jitter)):
            if jitter[order[idx]] == "brightness":
                images = brightness_list(img_brightness, images)
            elif jitter[order[idx]] == "contrast":
                images = contrast_list(img_contrast, images)
            elif jitter[order[idx]] == "saturation":
                images = saturation_list(img_saturation, images)
    return images


def lighting_list(imgs, alphastd, eigval, eigvec, alpha=None):
    """
    Perform AlexNet-style PCA jitter on the given list of images.
    Args:
        images (list): list of images to perform lighting jitter.
        alphastd (float): jitter ratio for PCA jitter.
        eigval (list): eigenvalues for PCA jitter.
        eigvec (list[list]): eigenvectors for PCA jitter.
    Returns:
        out_images (list): the list of jittered images.
    """
    if alphastd == 0:
        return imgs
    # generate alpha1, alpha2, alpha3
    alpha = np.random.normal(0, alphastd, size=(1, 3))
    eig_vec = np.array(eigvec)
    eig_val = np.reshape(eigval, (1, 3))
    rgb = np.sum(
        eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0),
        axis=1,
    )
    out_images = []
    for img in imgs:
        for idx in range(img.shape[0]):
            img[idx] = img[idx] + rgb[2 - idx]
        out_images.append(img)
    return out_images


def color_normalization(image, mean, stddev):
    """
    Perform color normalization on the image with the given mean and stddev.
    Args:
        image (array): image to perform color normalization.
        mean (float): mean value to subtract.
        stddev (float): stddev to devide.
    """
    # Input image should in format of CHW
    assert len(mean) == image.shape[0], "channel mean not computed properly"
    assert len(stddev) == image.shape[0], "channel stddev not computed properly"
    for idx in range(image.shape[0]):
        image[idx] = image[idx] - mean[idx]
        image[idx] = image[idx] / stddev[idx]
    return image


def pad_image(image, pad_size, order="CHW"):
    """
    Pad the given image with the size of pad_size.
    Args:
        image (array): image to pad.
        pad_size (int): size to pad.
        order (str): order of the `height`, `channel` and `width`.
    Returns:
        img (array): padded image.
    """
    if order == "CHW":
        img = np.pad(
            image,
            ((0, 0), (pad_size, pad_size), (pad_size, pad_size)),
            mode=str("constant"),
        )
    elif order == "HWC":
        img = np.pad(
            image,
            ((pad_size, pad_size), (pad_size, pad_size), (0, 0)),
            mode=str("constant"),
        )
    return img


def horizontal_flip(prob, image, order="CHW"):
    """
    Horizontally flip the image.
    Args:
        prob (float): probability to flip.
        image (array): image to pad.
        order (str): order of the `height`, `channel` and `width`.
    Returns:
        img (array): flipped image.
    """
    assert order in ["CHW", "HWC"], "order {} is not supported".format(order)
    if np.random.uniform() < prob:
        if order == "CHW":
            image = image[:, :, ::-1]
        elif order == "HWC":
            image = image[:, ::-1, :]
        else:
            raise NotImplementedError("Unknown order {}".format(order))
    return image


def flip_boxes(boxes, im_width):
    """
    Horizontally flip the boxes.
    Args:
        boxes (array): box to flip.
        im_width (int): width of the image.
    Returns:
        boxes_flipped (array): flipped box.
    """

    boxes_flipped = boxes.copy()
    boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1
    boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1
    return boxes_flipped


def crop_boxes(boxes, x_offset, y_offset):
    """
    Crop the boxes given the offsets.
    Args:
        boxes (array): boxes to crop.
        x_offset (int): offset on x.
        y_offset (int): offset on y.
    """
    boxes[:, [0, 2]] = boxes[:, [0, 2]] - x_offset
    boxes[:, [1, 3]] = boxes[:, [1, 3]] - y_offset
    return boxes


def random_crop_list(images, size, pad_size=0, order="CHW", boxes=None):
    """
    Perform random crop on a list of images.
    Args:
        images (list): list of images to perform random crop.
        size (int): size to crop.
        pad_size (int): padding size.
        order (str): order of the `height`, `channel` and `width`.
        boxes (list): optional. Corresponding boxes to images.
            Dimension is `num boxes` x 4.
    Returns:
        cropped (ndarray): the cropped list of images with dimension of
            `height` x `width` x `channel`.
        boxes (list): optional. Corresponding boxes to images. Dimension is
            `num boxes` x 4.
    """
    # explicitly dealing processing per image order to avoid flipping images.
    if pad_size > 0:
        images = [
            pad_image(pad_size=pad_size, image=image, order=order)
            for image in images
        ]

    # image format should be CHW.
    if order == "CHW":
        if images[0].shape[1] == size and images[0].shape[2] == size:
            return images, boxes
        height = images[0].shape[1]
        width = images[0].shape[2]
        y_offset = 0
        if height > size:
            y_offset = int(np.random.randint(0, height - size))
        x_offset = 0
        if width > size:
            x_offset = int(np.random.randint(0, width - size))
        cropped = [
            image[:, y_offset : y_offset + size, x_offset : x_offset + size]
            for image in images
        ]
        assert cropped[0].shape[1] == size, "Image not cropped properly"
        assert cropped[0].shape[2] == size, "Image not cropped properly"
    elif order == "HWC":
        if images[0].shape[0] == size and images[0].shape[1] == size:
            return images, boxes
        height = images[0].shape[0]
        width = images[0].shape[1]
        y_offset = 0
        if height > size:
            y_offset = int(np.random.randint(0, height - size))
        x_offset = 0
        if width > size:
            x_offset = int(np.random.randint(0, width - size))
        cropped = [
            image[y_offset : y_offset + size, x_offset : x_offset + size, :]
            for image in images
        ]
        assert cropped[0].shape[0] == size, "Image not cropped properly"
        assert cropped[0].shape[1] == size, "Image not cropped properly"

    if boxes is not None:
        boxes = [crop_boxes(proposal, x_offset, y_offset) for proposal in boxes]
    return cropped, boxes


def center_crop(size, image):
    """
    Perform center crop on input images.
    Args:
        size (int): size of the cropped height and width.
        image (array): the image to perform center crop.
    """
    height = image.shape[0]
    width = image.shape[1]
    y_offset = int(math.ceil((height - size) / 2))
    x_offset = int(math.ceil((width - size) / 2))
    cropped = image[y_offset : y_offset + size, x_offset : x_offset + size, :]
    assert cropped.shape[0] == size, "Image height not cropped properly"
    assert cropped.shape[1] == size, "Image width not cropped properly"
    return cropped


# ResNet style scale jittering: randomly select the scale from
# [1/max_size, 1/min_size]
def random_scale_jitter(image, min_size, max_size):
    """
    Perform ResNet style random scale jittering: randomly select the scale from
        [1/max_size, 1/min_size].
    Args:
        image (array): image to perform random scale.
        min_size (int): min size to scale.
        max_size (int) max size to scale.
    Returns:
        image (array): scaled image.
    """
    img_scale = int(
        round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size))
    )
    image = scale(img_scale, image)
    return image


def random_scale_jitter_list(images, min_size, max_size):
    """
    Perform ResNet style random scale jittering on a list of image: randomly
        select the scale from [1/max_size, 1/min_size]. Note that all the image
        will share the same scale.
    Args:
        images (list): list of images to perform random scale.
        min_size (int): min size to scale.
        max_size (int) max size to scale.
    Returns:
        images (list): list of scaled image.
    """
    img_scale = int(
        round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size))
    )
    return [scale(img_scale, image) for image in images]


def random_sized_crop(image, size, area_frac=0.08):
    """
    Perform random sized cropping on the given image. Random crop with size
        8% - 100% image area and aspect ratio in [3/4, 4/3].
    Args:
        image (array): image to crop.
        size (int): size to crop.
        area_frac (float): area of fraction.
    Returns:
        (array): cropped image.
    """
    for _ in range(0, 10):
        height = image.shape[0]
        width = image.shape[1]
        area = height * width
        target_area = np.random.uniform(area_frac, 1.0) * area
        aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
        w = int(round(math.sqrt(float(target_area) * aspect_ratio)))
        h = int(round(math.sqrt(float(target_area) / aspect_ratio)))
        if np.random.uniform() < 0.5:
            w, h = h, w
        if h <= height and w <= width:
            if height == h:
                y_offset = 0
            else:
                y_offset = np.random.randint(0, height - h)
            if width == w:
                x_offset = 0
            else:
                x_offset = np.random.randint(0, width - w)
            y_offset = int(y_offset)
            x_offset = int(x_offset)
            cropped = image[y_offset : y_offset + h, x_offset : x_offset + w, :]
            assert (
                cropped.shape[0] == h and cropped.shape[1] == w
            ), "Wrong crop size"
            cropped = cv2.resize(
                cropped, (size, size), interpolation=cv2.INTER_LINEAR
            )
            return cropped.astype(np.float32)
    return center_crop(size, scale(size, image))


def lighting(img, alphastd, eigval, eigvec):
    """
    Perform AlexNet-style PCA jitter on the given image.
    Args:
        image (array): list of images to perform lighting jitter.
        alphastd (float): jitter ratio for PCA jitter.
        eigval (array): eigenvalues for PCA jitter.
        eigvec (list): eigenvectors for PCA jitter.
    Returns:
        img (tensor): the jittered image.
    """
    if alphastd == 0:
        return img
    # generate alpha1, alpha2, alpha3.
    alpha = np.random.normal(0, alphastd, size=(1, 3))
    eig_vec = np.array(eigvec)
    eig_val = np.reshape(eigval, (1, 3))
    rgb = np.sum(
        eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0),
        axis=1,
    )
    for idx in range(img.shape[0]):
        img[idx] = img[idx] + rgb[2 - idx]
    return img


def random_sized_crop_list(images, size, crop_area_fraction=0.08):
    """
    Perform random sized cropping on the given list of images. Random crop with
        size 8% - 100% image area and aspect ratio in [3/4, 4/3].
    Args:
        images (list): image to crop.
        size (int): size to crop.
        area_frac (float): area of fraction.
    Returns:
        (list): list of cropped image.
    """
    for _ in range(0, 10):
        height = images[0].shape[0]
        width = images[0].shape[1]
        area = height * width
        target_area = np.random.uniform(crop_area_fraction, 1.0) * area
        aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
        w = int(round(math.sqrt(float(target_area) * aspect_ratio)))
        h = int(round(math.sqrt(float(target_area) / aspect_ratio)))
        if np.random.uniform() < 0.5:
            w, h = h, w
        if h <= height and w <= width:
            if height == h:
                y_offset = 0
            else:
                y_offset = np.random.randint(0, height - h)
            if width == w:
                x_offset = 0
            else:
                x_offset = np.random.randint(0, width - w)
            y_offset = int(y_offset)
            x_offset = int(x_offset)

            croppsed_images = []
            for image in images:
                cropped = image[
                    y_offset : y_offset + h, x_offset : x_offset + w, :
                ]
                assert (
                    cropped.shape[0] == h and cropped.shape[1] == w
                ), "Wrong crop size"
                cropped = cv2.resize(
                    cropped, (size, size), interpolation=cv2.INTER_LINEAR
                )
                croppsed_images.append(cropped.astype(np.float32))
            return croppsed_images

    return [center_crop(size, scale(size, image)) for image in images]


def blend(image1, image2, alpha):
    return image1 * alpha + image2 * (1 - alpha)


def grayscale(image):
    """
    Convert the image to gray scale.
    Args:
        image (tensor): image to convert to gray scale. Dimension is
            `channel` x `height` x `width`.
    Returns:
        img_gray (tensor): image in gray scale.
    """
    # R -> 0.299, G -> 0.587, B -> 0.114.
    img_gray = np.copy(image)
    gray_channel = 0.299 * image[2] + 0.587 * image[1] + 0.114 * image[0]
    img_gray[0] = gray_channel
    img_gray[1] = gray_channel
    img_gray[2] = gray_channel
    return img_gray


def saturation(var, image):
    """
    Perform color saturation on the given image.
    Args:
        var (float): variance.
        image (array): image to perform color saturation.
    Returns:
        (array): image that performed color saturation.
    """
    img_gray = grayscale(image)
    alpha = 1.0 + np.random.uniform(-var, var)
    return blend(image, img_gray, alpha)


def brightness(var, image):
    """
    Perform color brightness on the given image.
    Args:
        var (float): variance.
        image (array): image to perform color brightness.
    Returns:
        (array): image that performed color brightness.
    """
    img_bright = np.zeros(image.shape).astype(image.dtype)
    alpha = 1.0 + np.random.uniform(-var, var)
    return blend(image, img_bright, alpha)


def contrast(var, image):
    """
    Perform color contrast on the given image.
    Args:
        var (float): variance.
        image (array): image to perform color contrast.
    Returns:
        (array): image that performed color contrast.
    """
    img_gray = grayscale(image)
    img_gray.fill(np.mean(img_gray[0]))
    alpha = 1.0 + np.random.uniform(-var, var)
    return blend(image, img_gray, alpha)


def saturation_list(var, images):
    """
    Perform color saturation on the list of given images.
    Args:
        var (float): variance.
        images (list): list of images to perform color saturation.
    Returns:
        (list): list of images that performed color saturation.
    """
    alpha = 1.0 + np.random.uniform(-var, var)

    out_images = []
    for image in images:
        img_gray = grayscale(image)
        out_images.append(blend(image, img_gray, alpha))
    return out_images


def brightness_list(var, images):
    """
    Perform color brightness on the given list of images.
    Args:
        var (float): variance.
        images (list): list of images to perform color brightness.
    Returns:
        (array): list of images that performed color brightness.
    """
    alpha = 1.0 + np.random.uniform(-var, var)

    out_images = []
    for image in images:
        img_bright = np.zeros(image.shape).astype(image.dtype)
        out_images.append(blend(image, img_bright, alpha))
    return out_images


def contrast_list(var, images):
    """
    Perform color contrast on the given list of images.
    Args:
        var (float): variance.
        images (list): list of images to perform color contrast.
    Returns:
        (array): image that performed color contrast.
    """
    alpha = 1.0 + np.random.uniform(-var, var)

    out_images = []
    for image in images:
        img_gray = grayscale(image)
        img_gray.fill(np.mean(img_gray[0]))
        out_images.append(blend(image, img_gray, alpha))
    return out_images


def color_jitter(image, img_brightness=0, img_contrast=0, img_saturation=0):
    """
    Perform color jitter on the given image.
    Args:
        image (array): image to perform color jitter.
        img_brightness (float): jitter ratio for brightness.
        img_contrast (float): jitter ratio for contrast.
        img_saturation (float): jitter ratio for saturation.
    Returns:
        image (array): the jittered image.
    """
    jitter = []
    if img_brightness != 0:
        jitter.append("brightness")
    if img_contrast != 0:
        jitter.append("contrast")
    if img_saturation != 0:
        jitter.append("saturation")

    if len(jitter) > 0:
        order = np.random.permutation(np.arange(len(jitter)))
        for idx in range(0, len(jitter)):
            if jitter[order[idx]] == "brightness":
                image = brightness(img_brightness, image)
            elif jitter[order[idx]] == "contrast":
                image = contrast(img_contrast, image)
            elif jitter[order[idx]] == "saturation":
                image = saturation(img_saturation, image)
    return image


def revert_scaled_boxes(size, boxes, img_height, img_width):
    """
    Revert scaled input boxes to match the original image size.
    Args:
        size (int): size of the cropped image.
        boxes (array): shape (num_boxes, 4).
        img_height (int): height of original image.
        img_width (int): width of original image.
    Returns:
        reverted_boxes (array): boxes scaled back to the original image size.
    """
    scaled_aspect = np.min([img_height, img_width])
    scale_ratio = scaled_aspect / size
    reverted_boxes = boxes * scale_ratio
    return reverted_boxes