Spaces:

veichta
/

GeoCalib

Running on Zero

App Files Files Community

GeoCalib / siclib /datasets /augmentations.py

veichta

Upload folder using huggingface_hub

205a7af verified 10 months ago

raw

history blame

12.4 kB

	from typing import Union

	import albumentations as A
	import cv2
	import numpy as np
	import torch
	from albumentations.pytorch.transforms import ToTensorV2
	from omegaconf import OmegaConf


	# flake8: noqa
	# mypy: ignore-errors
	class IdentityTransform(A.ImageOnlyTransform):
	def apply(self, img, **params):
	return img

	def get_transform_init_args_names(self):
	return ()


	class RandomAdditiveShade(A.ImageOnlyTransform):
	def __init__(
	self,
	nb_ellipses=10,
	transparency_limit=[-0.5, 0.8],
	kernel_size_limit=[150, 350],
	always_apply=False,
	p=0.5,
	):
	super().__init__(always_apply, p)
	self.nb_ellipses = nb_ellipses
	self.transparency_limit = transparency_limit
	self.kernel_size_limit = kernel_size_limit

	def apply(self, img, **params):
	if img.dtype == np.float32:
	shaded = self._py_additive_shade(img * 255.0)
	shaded /= 255.0
	elif img.dtype == np.uint8:
	shaded = self._py_additive_shade(img.astype(np.float32))
	shaded = shaded.astype(np.uint8)
	else:
	raise NotImplementedError(f"Data augmentation not available for type: {img.dtype}")
	return shaded

	def _py_additive_shade(self, img):
	grayscale = len(img.shape) == 2
	if grayscale:
	img = img[None]
	min_dim = min(img.shape[:2]) / 4
	mask = np.zeros(img.shape[:2], img.dtype)
	for i in range(self.nb_ellipses):
	ax = int(max(np.random.rand() * min_dim, min_dim / 5))
	ay = int(max(np.random.rand() * min_dim, min_dim / 5))
	max_rad = max(ax, ay)
	x = np.random.randint(max_rad, img.shape[1] - max_rad) # center
	y = np.random.randint(max_rad, img.shape[0] - max_rad)
	angle = np.random.rand() * 90
	cv2.ellipse(mask, (x, y), (ax, ay), angle, 0, 360, 255, -1)

	transparency = np.random.uniform(*self.transparency_limit)
	ks = np.random.randint(*self.kernel_size_limit)
	if (ks % 2) == 0: # kernel_size has to be odd
	ks += 1
	mask = cv2.GaussianBlur(mask.astype(np.float32), (ks, ks), 0)
	shaded = img * (1 - transparency * mask[..., np.newaxis] / 255.0)
	out = np.clip(shaded, 0, 255)
	if grayscale:
	out = out.squeeze(0)
	return out

	def get_transform_init_args_names(self):
	return "transparency_limit", "kernel_size_limit", "nb_ellipses"


	def kw(entry: Union[float, dict], n=None, **default):
	if not isinstance(entry, dict):
	entry = {"p": entry}
	entry = OmegaConf.create(entry)
	if n is not None:
	entry = default.get(n, entry)
	return OmegaConf.merge(default, entry)


	def kwi(entry: Union[float, dict], n=None, **default):
	conf = kw(entry, n=n, **default)
	return {k: conf[k] for k in set(default.keys()).union(set(["p"]))}


	def replay_str(transforms, s="Replay:\n", log_inactive=True):
	for t in transforms:
	if "transforms" in t.keys():
	s = replay_str(t["transforms"], s=s)
	elif t["applied"] or log_inactive:
	s += t["__class_fullname__"] + " " + str(t["applied"]) + "\n"
	return s


	class BaseAugmentation(object):
	base_default_conf = {
	"name": "???",
	"shuffle": False,
	"p": 1.0,
	"verbose": False,
	"dtype": "uint8", # (byte, float)
	}

	default_conf = {}

	def __init__(self, conf={}):
	"""Perform some logic and call the _init method of the child model."""
	default_conf = OmegaConf.merge(
	OmegaConf.create(self.base_default_conf),
	OmegaConf.create(self.default_conf),
	)
	OmegaConf.set_struct(default_conf, True)
	if isinstance(conf, dict):
	conf = OmegaConf.create(conf)
	self.conf = OmegaConf.merge(default_conf, conf)
	OmegaConf.set_readonly(self.conf, True)
	self._init(self.conf)

	self.conf = OmegaConf.merge(self.conf, conf)
	if self.conf.verbose:
	self.compose = A.ReplayCompose
	else:
	self.compose = A.Compose
	if self.conf.dtype == "uint8":
	self.dtype = np.uint8
	self.preprocess = A.FromFloat(always_apply=True, dtype="uint8")
	self.postprocess = A.ToFloat(always_apply=True)
	elif self.conf.dtype == "float32":
	self.dtype = np.float32
	self.preprocess = A.ToFloat(always_apply=True)
	self.postprocess = IdentityTransform()
	else:
	raise ValueError(f"Unsupported dtype {self.conf.dtype}")
	self.to_tensor = ToTensorV2()

	def _init(self, conf):
	"""Child class overwrites this, setting up a list of transforms"""
	self.transforms = []

	def __call__(self, image, return_tensor=False):
	"""image as HW or HWC"""
	if isinstance(image, torch.Tensor):
	image = image.cpu().numpy()
	data = {"image": image}
	if image.dtype != self.dtype:
	data = self.preprocess(**data)
	transforms = self.transforms
	if self.conf.shuffle:
	order = [i for i, _ in enumerate(transforms)]
	np.random.shuffle(order)
	transforms = [transforms[i] for i in order]
	transformed = self.compose(transforms, p=self.conf.p)(**data)
	if self.conf.verbose:
	print(replay_str(transformed["replay"]["transforms"]))
	transformed = self.postprocess(**transformed)
	if return_tensor:
	return self.to_tensor(**transformed)["image"]
	else:
	return transformed["image"]


	class IdentityAugmentation(BaseAugmentation):
	default_conf = {}

	def _init(self, conf):
	self.transforms = [IdentityTransform(p=1.0)]


	class DarkAugmentation(BaseAugmentation):
	default_conf = {"p": 0.75}

	def _init(self, conf):
	bright_contr = 0.5
	blur = 0.1
	random_gamma = 0.1
	hue = 0.1
	self.transforms = [
	A.RandomRain(p=0.2),
	A.RandomBrightnessContrast(
	**kw(
	bright_contr,
	brightness_limit=(-0.4, 0.0),
	contrast_limit=(-0.3, 0.0),
	)
	),
	A.OneOf(
	[
	A.Blur(**kwi(blur, p=0.1, blur_limit=(3, 9), n="blur")),
	A.MotionBlur(**kwi(blur, p=0.2, blur_limit=(3, 25), n="motion_blur")),
	A.ISONoise(),
	A.ImageCompression(),
	],
	**kwi(blur, p=0.1),
	),
	A.RandomGamma(**kw(random_gamma, gamma_limit=(15, 65))),
	A.OneOf(
	[
	A.Equalize(),
	A.CLAHE(p=0.2),
	A.ToGray(),
	A.ToSepia(p=0.1),
	A.HueSaturationValue(**kw(hue, val_shift_limit=(-100, -40))),
	],
	p=0.5,
	),
	]


	class DefaultAugmentation(BaseAugmentation):
	default_conf = {"p": 1.0}

	def _init(self, conf):
	self.transforms = [
	A.RandomBrightnessContrast(p=0.2),
	A.HueSaturationValue(p=0.2),
	A.ToGray(p=0.2),
	A.ImageCompression(quality_lower=30, quality_upper=100, p=0.5),
	A.OneOf(
	[
	A.MotionBlur(p=0.2),
	A.MedianBlur(blur_limit=3, p=0.1),
	A.Blur(blur_limit=3, p=0.1),
	],
	p=0.2,
	),
	]


	class PerspectiveAugmentation(BaseAugmentation):
	default_conf = {"p": 1.0}

	def _init(self, conf):
	self.transforms = [
	A.RandomBrightnessContrast(p=0.2),
	A.HueSaturationValue(p=0.2),
	A.ToGray(p=0.2),
	A.ImageCompression(quality_lower=30, quality_upper=100, p=0.5),
	A.OneOf(
	[
	A.MotionBlur(p=0.2),
	A.MedianBlur(blur_limit=3, p=0.1),
	A.Blur(blur_limit=3, p=0.1),
	],
	p=0.2,
	),
	]


	class DeepCalibAugmentations(BaseAugmentation):
	default_conf = {"p": 1.0}

	def _init(self, conf):
	self.transforms = [
	A.RandomBrightnessContrast(p=0.5),
	A.GaussNoise(var_limit=(5.0, 112.0), mean=0, per_channel=True, p=0.75),
	A.Downscale(
	scale_min=0.5,
	scale_max=0.95,
	interpolation=dict(downscale=cv2.INTER_AREA, upscale=cv2.INTER_LINEAR),
	p=0.5,
	),
	A.Downscale(scale_min=0.5, scale_max=0.95, interpolation=cv2.INTER_LINEAR, p=0.5),
	A.ImageCompression(quality_lower=20, quality_upper=85, p=1, always_apply=True),
	A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.4),
	A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),
	A.ToGray(always_apply=False, p=0.2),
	A.GaussianBlur(blur_limit=(3, 5), sigma_limit=0, p=0.25),
	A.MotionBlur(blur_limit=5, allow_shifted=True, p=0.25),
	A.MultiplicativeNoise(multiplier=[0.85, 1.15], elementwise=True, p=0.5),
	]


	class GeoCalibAugmentations(BaseAugmentation):
	default_conf = {"p": 1.0}

	def _init(self, conf):
	self.color_transforms = [
	A.RandomGamma(gamma_limit=(80, 180), p=0.8),
	A.RandomToneCurve(scale=0.1, p=0.5),
	A.RandomBrightnessContrast(p=0.5),
	A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.4),
	A.OneOf([A.ToGray(p=0.1), A.ToSepia(p=0.1), IdentityTransform(p=0.8)], p=1),
	]

	self.noise_transforms = [
	A.GaussNoise(var_limit=(5.0, 112.0), mean=0, per_channel=True, p=0.75),
	A.ImageCompression(quality_lower=20, quality_upper=100, p=1),
	A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.5),
	A.OneOrOther(
	first=A.Compose(
	[
	A.AdvancedBlur(
	p=1,
	blur_limit=(3, 7),
	sigmaX_limit=(0.2, 1.0),
	sigmaY_limit=(0.2, 1.0),
	rotate_limit=(-90, 90),
	beta_limit=(0.5, 8.0),
	noise_limit=(0.9, 1.1),
	),
	A.Sharpen(p=0.5, alpha=(0.2, 0.5), lightness=(0.5, 1.0)),
	]
	),
	second=A.Compose(
	[
	A.Sharpen(p=0.5, alpha=(0.2, 0.5), lightness=(0.5, 1.0)),
	A.AdvancedBlur(
	p=1,
	blur_limit=(3, 7),
	sigmaX_limit=(0.2, 1.0),
	sigmaY_limit=(0.2, 1.0),
	rotate_limit=(-90, 90),
	beta_limit=(0.5, 8.0),
	noise_limit=(0.9, 1.1),
	),
	]
	),
	),
	]

	self.image_transforms = [
	A.OneOf(
	[
	A.Downscale(
	scale_min=0.5,
	scale_max=0.99,
	interpolation=dict(downscale=down, upscale=up),
	p=1,
	)
	for down, up in [
	(cv2.INTER_AREA, cv2.INTER_LINEAR),
	(cv2.INTER_LINEAR, cv2.INTER_CUBIC),
	(cv2.INTER_CUBIC, cv2.INTER_LINEAR),
	(cv2.INTER_LINEAR, cv2.INTER_AREA),
	]
	],
	p=1,
	)
	]

	self.transforms = [
	*self.color_transforms,
	*self.noise_transforms,
	*self.image_transforms,
	]


	augmentations = {
	"default": DefaultAugmentation,
	"dark": DarkAugmentation,
	"perspective": PerspectiveAugmentation,
	"deepcalib": DeepCalibAugmentations,
	"geocalib": GeoCalibAugmentations,
	"identity": IdentityAugmentation,
	}