|
|
|
from __future__ import division |
|
|
|
import math |
|
import random |
|
import warnings |
|
from collections import Iterable |
|
|
|
import numpy as np |
|
import torch |
|
from torchvision.transforms import functional as F |
|
|
|
try: |
|
from torchvision.transforms import InterpolationMode |
|
|
|
NEAREST = InterpolationMode.NEAREST |
|
BILINEAR = InterpolationMode.BILINEAR |
|
BICUBIC = InterpolationMode.BICUBIC |
|
LANCZOS = InterpolationMode.LANCZOS |
|
HAMMING = InterpolationMode.HAMMING |
|
HAMMING = InterpolationMode.HAMMING |
|
|
|
_pil_interpolation_to_str = { |
|
InterpolationMode.NEAREST: 'InterpolationMode.NEAREST', |
|
InterpolationMode.BILINEAR: 'InterpolationMode.BILINEAR', |
|
InterpolationMode.BICUBIC: 'InterpolationMode.BICUBIC', |
|
InterpolationMode.LANCZOS: 'InterpolationMode.LANCZOS', |
|
InterpolationMode.HAMMING: 'InterpolationMode.HAMMING', |
|
InterpolationMode.BOX: 'InterpolationMode.BOX', |
|
} |
|
|
|
except: |
|
from PIL import Image |
|
|
|
NEAREST = Image.NEAREST |
|
BILINEAR = Image.BILINEAR |
|
BICUBIC = Image.BICUBIC |
|
LANCZOS = Image.LANCZOS |
|
HAMMING = Image.HAMMING |
|
HAMMING = Image.HAMMING |
|
|
|
_pil_interpolation_to_str = { |
|
Image.NEAREST: 'PIL.Image.NEAREST', |
|
Image.BILINEAR: 'PIL.Image.BILINEAR', |
|
Image.BICUBIC: 'PIL.Image.BICUBIC', |
|
Image.LANCZOS: 'PIL.Image.LANCZOS', |
|
Image.HAMMING: 'PIL.Image.HAMMING', |
|
Image.BOX: 'PIL.Image.BOX', |
|
} |
|
|
|
def _get_image_size(img): |
|
if F._is_pil_image(img): |
|
return img.size |
|
elif isinstance(img, torch.Tensor) and img.dim() > 2: |
|
return img.shape[-2:][::-1] |
|
else: |
|
raise TypeError('Unexpected type {}'.format(type(img))) |
|
|
|
|
|
class Compose(object): |
|
"""Composes several transforms together. |
|
|
|
Args: |
|
transforms (list of ``Transform`` objects): |
|
list of transforms to compose. |
|
|
|
Example: |
|
>>> transforms.Compose([ |
|
>>> transforms.CenterCrop(10), |
|
>>> transforms.ToTensor(), |
|
>>> ]) |
|
""" |
|
def __init__(self, transforms): |
|
self.transforms = transforms |
|
|
|
def __call__(self, img, gt): |
|
for t in self.transforms: |
|
if 'RandomResizedCrop' in t.__class__.__name__: |
|
img, gt = t(img, gt) |
|
elif 'Flip' in t.__class__.__name__: |
|
img, gt = t(img, gt) |
|
elif 'ToTensor' in t.__class__.__name__: |
|
img, gt = t(img, gt) |
|
else: |
|
img = t(img) |
|
gt = gt.float() |
|
|
|
return img, gt |
|
|
|
def __repr__(self): |
|
format_string = self.__class__.__name__ + '(' |
|
for t in self.transforms: |
|
format_string += '\n' |
|
format_string += ' {0}'.format(t) |
|
format_string += '\n)' |
|
return format_string |
|
|
|
|
|
class RandomHorizontalFlip(object): |
|
"""Horizontally flip the given PIL Image randomly with a given probability. |
|
|
|
Args: |
|
p (float): probability of the image being flipped. Default value is 0.5 |
|
""" |
|
def __init__(self, p=0.5): |
|
self.p = p |
|
|
|
def __call__(self, img, gt): |
|
""" |
|
Args: |
|
img (PIL Image): Image to be flipped. |
|
|
|
Returns: |
|
PIL Image: Randomly flipped image. |
|
""" |
|
if random.random() < self.p: |
|
return F.hflip(img), F.hflip(gt) |
|
return img, gt |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + '(p={})'.format(self.p) |
|
|
|
|
|
class RandomResizedCrop(object): |
|
"""Crop the given PIL Image to random size and aspect ratio. |
|
|
|
A crop of random size (default: of 0.08 to 1.0) of the original size |
|
and a random aspect ratio (default: of 3/4 to 4/3) of the original |
|
aspect ratio is made. This crop is finally resized to given size. |
|
This is popularly used to train the Inception networks. |
|
|
|
Args: |
|
size: expected output size of each edge |
|
scale: range of size of the origin size cropped |
|
ratio: range of aspect ratio of the origin aspect ratio cropped |
|
interpolation: Default: PIL.Image.BILINEAR |
|
""" |
|
def __init__(self, |
|
size, |
|
scale=(0.08, 1.0), |
|
ratio=(3. / 4., 4. / 3.), |
|
interpolation=BILINEAR): |
|
if isinstance(size, (tuple, list)): |
|
self.size = size |
|
else: |
|
self.size = (size, size) |
|
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): |
|
warnings.warn('range should be of kind (min, max)') |
|
|
|
self.interpolation = interpolation |
|
self.scale = scale |
|
self.ratio = ratio |
|
|
|
@staticmethod |
|
def get_params(img, scale, ratio): |
|
"""Get parameters for ``crop`` for a random sized crop. |
|
|
|
Args: |
|
img (PIL Image): Image to be cropped. |
|
scale (tuple): |
|
range of size of the origin size cropped |
|
ratio (tuple): |
|
range of aspect ratio of the origin aspect ratio cropped |
|
|
|
Returns: |
|
tuple: params (i, j, h, w) to be passed to ``crop`` for a random |
|
sized crop. |
|
""" |
|
width, height = _get_image_size(img) |
|
area = height * width |
|
|
|
for attempt in range(10): |
|
target_area = random.uniform(*scale) * area |
|
log_ratio = (math.log(ratio[0]), math.log(ratio[1])) |
|
aspect_ratio = math.exp(random.uniform(*log_ratio)) |
|
|
|
w = int(round(math.sqrt(target_area * aspect_ratio))) |
|
h = int(round(math.sqrt(target_area / aspect_ratio))) |
|
|
|
if 0 < w <= width and 0 < h <= height: |
|
i = random.randint(0, height - h) |
|
j = random.randint(0, width - w) |
|
return i, j, h, w |
|
|
|
|
|
in_ratio = float(width) / float(height) |
|
if (in_ratio < min(ratio)): |
|
w = width |
|
h = int(round(w / min(ratio))) |
|
elif (in_ratio > max(ratio)): |
|
h = height |
|
w = int(round(h * max(ratio))) |
|
else: |
|
w = width |
|
h = height |
|
i = (height - h) // 2 |
|
j = (width - w) // 2 |
|
return i, j, h, w |
|
|
|
def __call__(self, img, gt): |
|
""" |
|
Args: |
|
img (PIL Image): Image to be cropped and resized. |
|
|
|
Returns: |
|
PIL Image: Randomly cropped and resized image. |
|
""" |
|
i, j, h, w = self.get_params(img, self.scale, self.ratio) |
|
return F.resized_crop( |
|
img, i, j, h, w, self.size, self.interpolation), \ |
|
F.resized_crop( |
|
gt, i, j, h, w, self.size, NEAREST) |
|
|
|
def __repr__(self): |
|
interpolate_str = _pil_interpolation_to_str[self.interpolation] |
|
format_string = self.__class__.__name__ + '(size={0}'.format(self.size) |
|
format_string += ', scale={0}'.format( |
|
tuple(round(s, 4) for s in self.scale)) |
|
format_string += ', ratio={0}'.format( |
|
tuple(round(r, 4) for r in self.ratio)) |
|
format_string += ', interpolation={0})'.format(interpolate_str) |
|
return format_string |
|
|
|
class ToTensor(object): |
|
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. |
|
|
|
Converts a PIL Image or numpy.ndarray (H x W x C) in the range |
|
[0, 255] to a torch.FloatTensor of |
|
shape (C x H x W) in the range [0.0, 1.0] |
|
if the PIL Image belongs to one of the |
|
modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) |
|
or if the numpy.ndarray has dtype = np.uint8 |
|
|
|
In the other cases, tensors are returned without scaling. |
|
""" |
|
def __call__(self, pic, gt): |
|
""" |
|
Args: |
|
pic (PIL Image or numpy.ndarray): Image to be converted to tensor. |
|
|
|
Returns: |
|
Tensor: Converted image. |
|
""" |
|
return F.to_tensor(pic), torch.from_numpy(np.array(gt)) |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + '()' |