Spaces:

veichta
/

GeoCalib

Running on Zero

File size: 12,383 Bytes

205a7af

from typing import Union

import albumentations as A
import cv2
import numpy as np
import torch
from albumentations.pytorch.transforms import ToTensorV2
from omegaconf import OmegaConf


# flake8: noqa
# mypy: ignore-errors
class IdentityTransform(A.ImageOnlyTransform):
    def apply(self, img, **params):
        return img

    def get_transform_init_args_names(self):
        return ()


class RandomAdditiveShade(A.ImageOnlyTransform):
    def __init__(
        self,
        nb_ellipses=10,
        transparency_limit=[-0.5, 0.8],
        kernel_size_limit=[150, 350],
        always_apply=False,
        p=0.5,
    ):
        super().__init__(always_apply, p)
        self.nb_ellipses = nb_ellipses
        self.transparency_limit = transparency_limit
        self.kernel_size_limit = kernel_size_limit

    def apply(self, img, **params):
        if img.dtype == np.float32:
            shaded = self._py_additive_shade(img * 255.0)
            shaded /= 255.0
        elif img.dtype == np.uint8:
            shaded = self._py_additive_shade(img.astype(np.float32))
            shaded = shaded.astype(np.uint8)
        else:
            raise NotImplementedError(f"Data augmentation not available for type: {img.dtype}")
        return shaded

    def _py_additive_shade(self, img):
        grayscale = len(img.shape) == 2
        if grayscale:
            img = img[None]
        min_dim = min(img.shape[:2]) / 4
        mask = np.zeros(img.shape[:2], img.dtype)
        for i in range(self.nb_ellipses):
            ax = int(max(np.random.rand() * min_dim, min_dim / 5))
            ay = int(max(np.random.rand() * min_dim, min_dim / 5))
            max_rad = max(ax, ay)
            x = np.random.randint(max_rad, img.shape[1] - max_rad)  # center
            y = np.random.randint(max_rad, img.shape[0] - max_rad)
            angle = np.random.rand() * 90
            cv2.ellipse(mask, (x, y), (ax, ay), angle, 0, 360, 255, -1)

        transparency = np.random.uniform(*self.transparency_limit)
        ks = np.random.randint(*self.kernel_size_limit)
        if (ks % 2) == 0:  # kernel_size has to be odd
            ks += 1
        mask = cv2.GaussianBlur(mask.astype(np.float32), (ks, ks), 0)
        shaded = img * (1 - transparency * mask[..., np.newaxis] / 255.0)
        out = np.clip(shaded, 0, 255)
        if grayscale:
            out = out.squeeze(0)
        return out

    def get_transform_init_args_names(self):
        return "transparency_limit", "kernel_size_limit", "nb_ellipses"


def kw(entry: Union[float, dict], n=None, **default):
    if not isinstance(entry, dict):
        entry = {"p": entry}
    entry = OmegaConf.create(entry)
    if n is not None:
        entry = default.get(n, entry)
    return OmegaConf.merge(default, entry)


def kwi(entry: Union[float, dict], n=None, **default):
    conf = kw(entry, n=n, **default)
    return {k: conf[k] for k in set(default.keys()).union(set(["p"]))}


def replay_str(transforms, s="Replay:\n", log_inactive=True):
    for t in transforms:
        if "transforms" in t.keys():
            s = replay_str(t["transforms"], s=s)
        elif t["applied"] or log_inactive:
            s += t["__class_fullname__"] + " " + str(t["applied"]) + "\n"
    return s


class BaseAugmentation(object):
    base_default_conf = {
        "name": "???",
        "shuffle": False,
        "p": 1.0,
        "verbose": False,
        "dtype": "uint8",  # (byte, float)
    }

    default_conf = {}

    def __init__(self, conf={}):
        """Perform some logic and call the _init method of the child model."""
        default_conf = OmegaConf.merge(
            OmegaConf.create(self.base_default_conf),
            OmegaConf.create(self.default_conf),
        )
        OmegaConf.set_struct(default_conf, True)
        if isinstance(conf, dict):
            conf = OmegaConf.create(conf)
        self.conf = OmegaConf.merge(default_conf, conf)
        OmegaConf.set_readonly(self.conf, True)
        self._init(self.conf)

        self.conf = OmegaConf.merge(self.conf, conf)
        if self.conf.verbose:
            self.compose = A.ReplayCompose
        else:
            self.compose = A.Compose
        if self.conf.dtype == "uint8":
            self.dtype = np.uint8
            self.preprocess = A.FromFloat(always_apply=True, dtype="uint8")
            self.postprocess = A.ToFloat(always_apply=True)
        elif self.conf.dtype == "float32":
            self.dtype = np.float32
            self.preprocess = A.ToFloat(always_apply=True)
            self.postprocess = IdentityTransform()
        else:
            raise ValueError(f"Unsupported dtype {self.conf.dtype}")
        self.to_tensor = ToTensorV2()

    def _init(self, conf):
        """Child class overwrites this, setting up a list of transforms"""
        self.transforms = []

    def __call__(self, image, return_tensor=False):
        """image as HW or HWC"""
        if isinstance(image, torch.Tensor):
            image = image.cpu().numpy()
        data = {"image": image}
        if image.dtype != self.dtype:
            data = self.preprocess(**data)
        transforms = self.transforms
        if self.conf.shuffle:
            order = [i for i, _ in enumerate(transforms)]
            np.random.shuffle(order)
            transforms = [transforms[i] for i in order]
        transformed = self.compose(transforms, p=self.conf.p)(**data)
        if self.conf.verbose:
            print(replay_str(transformed["replay"]["transforms"]))
        transformed = self.postprocess(**transformed)
        if return_tensor:
            return self.to_tensor(**transformed)["image"]
        else:
            return transformed["image"]


class IdentityAugmentation(BaseAugmentation):
    default_conf = {}

    def _init(self, conf):
        self.transforms = [IdentityTransform(p=1.0)]


class DarkAugmentation(BaseAugmentation):
    default_conf = {"p": 0.75}

    def _init(self, conf):
        bright_contr = 0.5
        blur = 0.1
        random_gamma = 0.1
        hue = 0.1
        self.transforms = [
            A.RandomRain(p=0.2),
            A.RandomBrightnessContrast(
                **kw(
                    bright_contr,
                    brightness_limit=(-0.4, 0.0),
                    contrast_limit=(-0.3, 0.0),
                )
            ),
            A.OneOf(
                [
                    A.Blur(**kwi(blur, p=0.1, blur_limit=(3, 9), n="blur")),
                    A.MotionBlur(**kwi(blur, p=0.2, blur_limit=(3, 25), n="motion_blur")),
                    A.ISONoise(),
                    A.ImageCompression(),
                ],
                **kwi(blur, p=0.1),
            ),
            A.RandomGamma(**kw(random_gamma, gamma_limit=(15, 65))),
            A.OneOf(
                [
                    A.Equalize(),
                    A.CLAHE(p=0.2),
                    A.ToGray(),
                    A.ToSepia(p=0.1),
                    A.HueSaturationValue(**kw(hue, val_shift_limit=(-100, -40))),
                ],
                p=0.5,
            ),
        ]


class DefaultAugmentation(BaseAugmentation):
    default_conf = {"p": 1.0}

    def _init(self, conf):
        self.transforms = [
            A.RandomBrightnessContrast(p=0.2),
            A.HueSaturationValue(p=0.2),
            A.ToGray(p=0.2),
            A.ImageCompression(quality_lower=30, quality_upper=100, p=0.5),
            A.OneOf(
                [
                    A.MotionBlur(p=0.2),
                    A.MedianBlur(blur_limit=3, p=0.1),
                    A.Blur(blur_limit=3, p=0.1),
                ],
                p=0.2,
            ),
        ]


class PerspectiveAugmentation(BaseAugmentation):
    default_conf = {"p": 1.0}

    def _init(self, conf):
        self.transforms = [
            A.RandomBrightnessContrast(p=0.2),
            A.HueSaturationValue(p=0.2),
            A.ToGray(p=0.2),
            A.ImageCompression(quality_lower=30, quality_upper=100, p=0.5),
            A.OneOf(
                [
                    A.MotionBlur(p=0.2),
                    A.MedianBlur(blur_limit=3, p=0.1),
                    A.Blur(blur_limit=3, p=0.1),
                ],
                p=0.2,
            ),
        ]


class DeepCalibAugmentations(BaseAugmentation):
    default_conf = {"p": 1.0}

    def _init(self, conf):
        self.transforms = [
            A.RandomBrightnessContrast(p=0.5),
            A.GaussNoise(var_limit=(5.0, 112.0), mean=0, per_channel=True, p=0.75),
            A.Downscale(
                scale_min=0.5,
                scale_max=0.95,
                interpolation=dict(downscale=cv2.INTER_AREA, upscale=cv2.INTER_LINEAR),
                p=0.5,
            ),
            A.Downscale(scale_min=0.5, scale_max=0.95, interpolation=cv2.INTER_LINEAR, p=0.5),
            A.ImageCompression(quality_lower=20, quality_upper=85, p=1, always_apply=True),
            A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.4),
            A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),
            A.ToGray(always_apply=False, p=0.2),
            A.GaussianBlur(blur_limit=(3, 5), sigma_limit=0, p=0.25),
            A.MotionBlur(blur_limit=5, allow_shifted=True, p=0.25),
            A.MultiplicativeNoise(multiplier=[0.85, 1.15], elementwise=True, p=0.5),
        ]


class GeoCalibAugmentations(BaseAugmentation):
    default_conf = {"p": 1.0}

    def _init(self, conf):
        self.color_transforms = [
            A.RandomGamma(gamma_limit=(80, 180), p=0.8),
            A.RandomToneCurve(scale=0.1, p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.4),
            A.OneOf([A.ToGray(p=0.1), A.ToSepia(p=0.1), IdentityTransform(p=0.8)], p=1),
        ]

        self.noise_transforms = [
            A.GaussNoise(var_limit=(5.0, 112.0), mean=0, per_channel=True, p=0.75),
            A.ImageCompression(quality_lower=20, quality_upper=100, p=1),
            A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.5),
            A.OneOrOther(
                first=A.Compose(
                    [
                        A.AdvancedBlur(
                            p=1,
                            blur_limit=(3, 7),
                            sigmaX_limit=(0.2, 1.0),
                            sigmaY_limit=(0.2, 1.0),
                            rotate_limit=(-90, 90),
                            beta_limit=(0.5, 8.0),
                            noise_limit=(0.9, 1.1),
                        ),
                        A.Sharpen(p=0.5, alpha=(0.2, 0.5), lightness=(0.5, 1.0)),
                    ]
                ),
                second=A.Compose(
                    [
                        A.Sharpen(p=0.5, alpha=(0.2, 0.5), lightness=(0.5, 1.0)),
                        A.AdvancedBlur(
                            p=1,
                            blur_limit=(3, 7),
                            sigmaX_limit=(0.2, 1.0),
                            sigmaY_limit=(0.2, 1.0),
                            rotate_limit=(-90, 90),
                            beta_limit=(0.5, 8.0),
                            noise_limit=(0.9, 1.1),
                        ),
                    ]
                ),
            ),
        ]

        self.image_transforms = [
            A.OneOf(
                [
                    A.Downscale(
                        scale_min=0.5,
                        scale_max=0.99,
                        interpolation=dict(downscale=down, upscale=up),
                        p=1,
                    )
                    for down, up in [
                        (cv2.INTER_AREA, cv2.INTER_LINEAR),
                        (cv2.INTER_LINEAR, cv2.INTER_CUBIC),
                        (cv2.INTER_CUBIC, cv2.INTER_LINEAR),
                        (cv2.INTER_LINEAR, cv2.INTER_AREA),
                    ]
                ],
                p=1,
            )
        ]

        self.transforms = [
            *self.color_transforms,
            *self.noise_transforms,
            *self.image_transforms,
        ]


augmentations = {
    "default": DefaultAugmentation,
    "dark": DarkAugmentation,
    "perspective": PerspectiveAugmentation,
    "deepcalib": DeepCalibAugmentations,
    "geocalib": GeoCalibAugmentations,
    "identity": IdentityAugmentation,
}