#copied from https://raw.githubusercontent.com/pytorch/vision/f0d3daa7f65bcde560e242d9bccc284721368f02/torchvision/transforms/functional_video.py #copied from https://raw.githubusercontent.com/pytorch/vision/f0d3daa7f65bcde560e242d9bccc284721368f02/torchvision/transforms/transforms_video.py #!/usr/bin/env python3 import numbers import random import torch try: import accimage except: pass from torchvision.transforms import ( RandomResizedCrop, ) from . import functional_video as F def _get_image_size(img): if isinstance(img, torch.Tensor) and img.dim() > 2: return img.shape[-2:][::-1] else: raise TypeError("Unexpected type {}".format(type(img))) class RandomCrop(object): """Crop the given PIL Image at a random location. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. padding (int or sequence, optional): Optional padding on each border of the image. Default is None, i.e no padding. If a sequence of length 4 is provided, it is used to pad left, top, right, bottom borders respectively. If a sequence of length 2 is provided, it is used to pad left/right, top/bottom borders, respectively. pad_if_needed (boolean): It will pad the image if smaller than the desired size to avoid raising an exception. Since cropping is done after padding, the padding seems to be done at a random offset. fill: Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels respectively. This value is only used when the padding_mode is constant padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. - constant: pads with a constant value, this value is specified with fill - edge: pads with the last value on the edge of the image - reflect: pads with reflection of image (without repeating the last value on the edge) padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode will result in [3, 2, 1, 2, 3, 4, 3, 2] - symmetric: pads with reflection of image (repeating the last value on the edge) padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] """ def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size self.padding = padding self.pad_if_needed = pad_if_needed self.fill = fill self.padding_mode = padding_mode @staticmethod def get_params(img, output_size): """Get parameters for ``crop`` for a random crop. Args: img (PIL Image): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. """ w, h = _get_image_size(img) th, tw = output_size if w == tw and h == th: return 0, 0, h, w i = random.randint(0, h - th) j = random.randint(0, w - tw) return i, j, th, tw def __call__(self, img): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) # pad the width if needed if self.pad_if_needed and img.size[0] < self.size[1]: img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and img.size[1] < self.size[0]: img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode) i, j, h, w = self.get_params(img, self.size) return F.crop(img, i, j, h, w) def __repr__(self): return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding) class RandomCropVideo(RandomCrop): def __init__(self, size): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size def __call__(self, clip): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: randomly cropped/resized video clip. size is (C, T, OH, OW) """ i, j, h, w = self.get_params(clip, self.size) return F.crop(clip, i, j, h, w) def __repr__(self): return self.__class__.__name__ + '(size={0})'.format(self.size) class RandomResizedCropVideo(RandomResizedCrop): def __init__( self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation_mode="bilinear", ): if isinstance(size, tuple): assert len(size) == 2, "size should be tuple (height, width)" self.size = size else: self.size = (size, size) self.interpolation_mode = interpolation_mode self.scale = scale self.ratio = ratio def __call__(self, clip): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: randomly cropped/resized video clip. size is (C, T, H, W) """ i, j, h, w = self.get_params(clip, self.scale, self.ratio) return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode) def __repr__(self): return self.__class__.__name__ + \ '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format( self.size, self.interpolation_mode, self.scale, self.ratio ) class CenterCropVideo(object): def __init__(self, crop_size): if isinstance(crop_size, numbers.Number): self.crop_size = (int(crop_size), int(crop_size)) else: self.crop_size = crop_size def __call__(self, clip): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size) """ return F.center_crop(clip, self.crop_size) def __repr__(self): return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size) class CornerCropVideo(object): def __init__(self, crop_size, loc="tr"): if isinstance(crop_size, numbers.Number): self.crop_size = (int(crop_size), int(crop_size)) else: self.crop_size = crop_size def __call__(self, clip, loc="tr"): """ Args: clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) Returns: torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size) """ if loc == "tr": i = 0 j = 0 elif loc == "center": return F.corner_crop(clip, self.crop_size) else: i = clip.size(-2) - self.crop_size j = clip.size(-1) - self.crop_size return F.corner_crop(clip, self.crop_size, i, j) def __repr__(self): return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size) class NormalizeVideo(object): """ Normalize the video clip by mean subtraction and division by standard deviation Args: mean (3-tuple): pixel RGB mean std (3-tuple): pixel RGB standard deviation inplace (boolean): whether do in-place normalization """ def __init__(self, mean, std, inplace=False): self.mean = mean self.std = std self.inplace = inplace def __call__(self, clip): """ Args: clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W) """ return F.normalize(clip, self.mean, self.std, self.inplace) def __repr__(self): return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format( self.mean, self.std, self.inplace) class ToTensorVideo(object): """ Convert tensor data type from uint8 to float, divide value by 255.0 and permute the dimenions of clip tensor """ def __init__(self): pass def __call__(self, clip): """ Args: clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) Return: clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W) """ return F.to_tensor(clip) def __repr__(self): return self.__class__.__name__ class RandomHorizontalFlipVideo(object): """ Flip the video clip along the horizonal direction with a given probability Args: p (float): probability of the clip being flipped. Default value is 0.5 """ def __init__(self, p=0.5): self.p = p def __call__(self, clip): """ Args: clip (torch.tensor): Size is (C, T, H, W) Return: clip (torch.tensor): Size is (C, T, H, W) """ if random.random() < self.p: clip = F.hflip(clip) return clip def __repr__(self): return self.__class__.__name__ + "(p={0})".format(self.p) class ResizeVideo(object): """ Resize the video clip """ def __init__(self, w,h): self.w = w self.h = h def __call__(self, clip): """ Args: clip (torch.tensor): Size is (C, T, H, W) Return: clip (torch.tensor): Size is (C, T, h, w) """ #interpolare needs (T,C, H, W) order while clip is (C, T, H, W) return torch.nn.functional.interpolate( clip.permute(1,0,2,3),(self.h,self.w),mode="bilinear",align_corners=False).permute(1,0,2,3) def __repr__(self): return self.__class__.__name__ + "(w=%d,h=%d)"%(self.w,self.h)