Spaces:
Sleeping
Sleeping
''' | |
Fmix paper from arxiv: https://arxiv.org/abs/2002.12047 | |
Fmix code from github : https://github.com/ecs-vlc/FMix | |
''' | |
import math | |
import random | |
import numpy as np | |
from scipy.stats import beta | |
def fftfreqnd(h, w=None, z=None): | |
""" Get bin values for discrete fourier transform of size (h, w, z) | |
:param h: Required, first dimension size | |
:param w: Optional, second dimension size | |
:param z: Optional, third dimension size | |
""" | |
fz = fx = 0 | |
fy = np.fft.fftfreq(h) | |
if w is not None: | |
fy = np.expand_dims(fy, -1) | |
if w % 2 == 1: | |
fx = np.fft.fftfreq(w)[: w // 2 + 2] | |
else: | |
fx = np.fft.fftfreq(w)[: w // 2 + 1] | |
if z is not None: | |
fy = np.expand_dims(fy, -1) | |
if z % 2 == 1: | |
fz = np.fft.fftfreq(z)[:, None] | |
else: | |
fz = np.fft.fftfreq(z)[:, None] | |
return np.sqrt(fx * fx + fy * fy + fz * fz) | |
def get_spectrum(freqs, decay_power, ch, h, w=0, z=0): | |
""" Samples a fourier image with given size and frequencies decayed by decay power | |
:param freqs: Bin values for the discrete fourier transform | |
:param decay_power: Decay power for frequency decay prop 1/f**d | |
:param ch: Number of channels for the resulting mask | |
:param h: Required, first dimension size | |
:param w: Optional, second dimension size | |
:param z: Optional, third dimension size | |
""" | |
scale = np.ones(1) / (np.maximum(freqs, np.array([1. / max(w, h, z)])) ** decay_power) | |
param_size = [ch] + list(freqs.shape) + [2] | |
param = np.random.randn(*param_size) | |
scale = np.expand_dims(scale, -1)[None, :] | |
return scale * param | |
def make_low_freq_image(decay, shape, ch=1): | |
""" Sample a low frequency image from fourier space | |
:param decay_power: Decay power for frequency decay prop 1/f**d | |
:param shape: Shape of desired mask, list up to 3 dims | |
:param ch: Number of channels for desired mask | |
""" | |
freqs = fftfreqnd(*shape) | |
spectrum = get_spectrum(freqs, decay, ch, *shape)#.reshape((1, *shape[:-1], -1)) | |
spectrum = spectrum[:, 0] + 1j * spectrum[:, 1] | |
mask = np.real(np.fft.irfftn(spectrum, shape)) | |
if len(shape) == 1: | |
mask = mask[:1, :shape[0]] | |
if len(shape) == 2: | |
mask = mask[:1, :shape[0], :shape[1]] | |
if len(shape) == 3: | |
mask = mask[:1, :shape[0], :shape[1], :shape[2]] | |
mask = mask | |
mask = (mask - mask.min()) | |
mask = mask / mask.max() | |
return mask | |
def sample_lam(alpha, reformulate=False): | |
""" Sample a lambda from symmetric beta distribution with given alpha | |
:param alpha: Alpha value for beta distribution | |
:param reformulate: If True, uses the reformulation of [1]. | |
""" | |
if reformulate: | |
lam = beta.rvs(alpha+1, alpha) # rvs(arg1,arg2,loc=期望, scale=标准差, size=生成随机数的个数) 从分布中生成指定个数的随机数 | |
else: | |
lam = beta.rvs(alpha, alpha) # rvs(arg1,arg2,loc=期望, scale=标准差, size=生成随机数的个数) 从分布中生成指定个数的随机数 | |
return lam | |
def binarise_mask(mask, lam, in_shape, max_soft=0.0): | |
""" Binarises a given low frequency image such that it has mean lambda. | |
:param mask: Low frequency image, usually the result of `make_low_freq_image` | |
:param lam: Mean value of final mask | |
:param in_shape: Shape of inputs | |
:param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. | |
:return: | |
""" | |
idx = mask.reshape(-1).argsort()[::-1] | |
mask = mask.reshape(-1) | |
num = math.ceil(lam * mask.size) if random.random() > 0.5 else math.floor(lam * mask.size) | |
eff_soft = max_soft | |
if max_soft > lam or max_soft > (1-lam): | |
eff_soft = min(lam, 1-lam) | |
soft = int(mask.size * eff_soft) | |
num_low = num - soft | |
num_high = num + soft | |
mask[idx[:num_high]] = 1 | |
mask[idx[num_low:]] = 0 | |
mask[idx[num_low:num_high]] = np.linspace(1, 0, (num_high - num_low)) | |
mask = mask.reshape((1, *in_shape)) | |
return mask | |
def sample_mask(alpha, decay_power, shape, max_soft=0.0, reformulate=False): | |
""" Samples a mean lambda from beta distribution parametrised by alpha, creates a low frequency image and binarises | |
it based on this lambda | |
:param alpha: Alpha value for beta distribution from which to sample mean of mask | |
:param decay_power: Decay power for frequency decay prop 1/f**d | |
:param shape: Shape of desired mask, list up to 3 dims | |
:param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. | |
:param reformulate: If True, uses the reformulation of [1]. | |
""" | |
if isinstance(shape, int): | |
shape = (shape,) | |
# Choose lambda | |
lam = sample_lam(alpha, reformulate) | |
# Make mask, get mean / std | |
mask = make_low_freq_image(decay_power, shape) | |
mask = binarise_mask(mask, lam, shape, max_soft) | |
return lam, mask | |
def sample_and_apply(x, alpha, decay_power, shape, max_soft=0.0, reformulate=False): | |
""" | |
:param x: Image batch on which to apply fmix of shape [b, c, shape*] | |
:param alpha: Alpha value for beta distribution from which to sample mean of mask | |
:param decay_power: Decay power for frequency decay prop 1/f**d | |
:param shape: Shape of desired mask, list up to 3 dims | |
:param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. | |
:param reformulate: If True, uses the reformulation of [1]. | |
:return: mixed input, permutation indices, lambda value of mix, | |
""" | |
lam, mask = sample_mask(alpha, decay_power, shape, max_soft, reformulate) | |
index = np.random.permutation(x.shape[0]) | |
x1, x2 = x * mask, x[index] * (1-mask) | |
return x1+x2, index, lam | |
class FMixBase: | |
""" FMix augmentation | |
Args: | |
decay_power (float): Decay power for frequency decay prop 1/f**d | |
alpha (float): Alpha value for beta distribution from which to sample mean of mask | |
size ([int] | [int, int] | [int, int, int]): Shape of desired mask, list up to 3 dims | |
max_soft (float): Softening value between 0 and 0.5 which smooths hard edges in the mask. | |
reformulate (bool): If True, uses the reformulation of [1]. | |
""" | |
def __init__(self, decay_power=3, alpha=1, size=(32, 32), max_soft=0.0, reformulate=False): | |
super().__init__() | |
self.decay_power = decay_power | |
self.reformulate = reformulate | |
self.size = size | |
self.alpha = alpha | |
self.max_soft = max_soft | |
self.index = None | |
self.lam = None | |
def __call__(self, x): | |
raise NotImplementedError | |
def loss(self, *args, **kwargs): | |
raise NotImplementedError | |
if __name__ == '__main__': | |
# para = {'alpha':1.,'decay_power':3.,'shape':(10,10),'max_soft':0.0,'reformulate':False} | |
# lam, mask = sample_mask(**para) | |
# mask = mask.transpose(1, 2, 0) | |
# img1 = np.zeros((10, 10, 3)) | |
# img2 = np.ones((10, 10, 3)) | |
# img_gt = mask * img1 + (1. - mask) * img2 | |
# import ipdb; ipdb.set_trace() | |
# test | |
import cv2 | |
i1 = cv2.imread('output/ILSVRC2012_val_00000001.JPEG') | |
i2 = cv2.imread('output/ILSVRC2012_val_00000002.JPEG') | |
para = {'alpha':1.,'decay_power':3.,'shape':(256, 256),'max_soft':0.0,'reformulate':False} | |
lam, mask = sample_mask(**para) | |
mask = mask.transpose(1, 2, 0) | |
i = mask * i1 + (1. - mask) * i2 | |
#i = i.astype(np.uint8) | |
cv2.imwrite('fmix.jpg', i) |