from mmdet.registry import TRANSFORMS import mmcv import numpy as np from mmdet.structures.bbox.box_type import autocast_box_type from mmcv.image.geometric import _scale_size from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union from mmcv.transforms.base import BaseTransform def rescale_size(old_size: tuple, scale: Union[float, int, tuple], return_scale: bool = False) -> tuple: """Calculate the new size to be rescaled to. Args: old_size (tuple[int]): The old size (w, h) of image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image size. Returns: tuple[int]: The new rescaled image size. """ w, h = old_size if isinstance(scale, (float, int)): if scale <= 0: raise ValueError(f'Invalid scale {scale}, must be positive.') scale_factor = scale elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) else: raise TypeError( f'Scale must be a number or tuple of int, but got {type(scale)}') new_size = _scale_size((w, h), scale_factor) if return_scale: return new_size, scale_factor else: return new_size def hsiresize( img: np.ndarray, size: Tuple[int, int], return_scale: bool = False, ) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: """Resize image to a given size. Args: img (ndarray): The input image. size (tuple[int]): Target size (w, h). return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. out (ndarray): The output destination. backend (str | None): The image resize backend type. Options are `cv2`, `pillow`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = img.shape[:2] # assert (size[1]%h == 0) and (size[0]%w == 0), 'size error' w_scale = int(size[0] / w) h_scale = int(size[1] / h) resized_img = np.repeat(np.repeat(img, w_scale, axis=0), h_scale, axis=1) if not return_scale: return resized_img else: return resized_img, w_scale, h_scale @TRANSFORMS.register_module() class HSIResize(BaseTransform): """Resize images & bbox & seg & keypoints. This transform resizes the input image according to ``scale`` or ``scale_factor``. Bboxes, seg map and keypoints are then resized with the same scale factor. if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to resize. Required Keys: - img - gt_bboxes (optional) - gt_seg_map (optional) - gt_keypoints (optional) Modified Keys: - img - gt_bboxes - gt_seg_map - gt_keypoints - img_shape Added Keys: - scale - scale_factor - keep_ratio Args: scale (int or tuple): Images scales for resizing. Defaults to None scale_factor (float or tuple[float]): Scale factors for resizing. Defaults to None. keep_ratio (bool): Whether to keep the aspect ratio when resizing the image. Defaults to False. clip_object_border (bool): Whether to clip the objects outside the border of the image. In some dataset like MOT17, the gt bboxes are allowed to cross the border of images. Therefore, we don't need to clip the gt bboxes in these cases. Defaults to True. backend (str): Image resize backend, choices are 'cv2' and 'pillow'. These two backends generates slightly different results. Defaults to 'cv2'. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. Defaults to 'bilinear'. """ def __init__(self, scale: Optional[Union[int, Tuple[int, int]]] = None, scale_factor: Optional[Union[int, Tuple[int, int]]] = None, keep_ratio: bool = False, clip_object_border: bool = True, backend: str = 'cv2', interpolation='bilinear') -> None: assert scale is not None or scale_factor is not None, ( '`scale` and' '`scale_factor` can not both be `None`') assert scale is None, ('please input scale_factor instead of scale' ) if scale is None: self.scale = None else: if isinstance(scale, int): self.scale = (scale, scale) else: self.scale = scale self.backend = backend self.interpolation = interpolation self.keep_ratio = keep_ratio self.clip_object_border = clip_object_border if scale_factor is None: self.scale_factor = None elif isinstance(scale_factor, int): self.scale_factor = (scale_factor, scale_factor) elif isinstance(scale_factor, tuple): assert (len(scale_factor)) == 2 self.scale_factor = scale_factor else: raise TypeError( f'expect scale_factor is float or Tuple(float), but' f'get {type(scale_factor)}') # assert (isinstance(scale_factor[0], int) and isinstance(scale_factor[1], int)),\ # 'scale_factor should be an integer' def _resize_img(self, results: dict) -> None: """Resize images with ``results['scale']``.""" if results.get('img', None) is not None: if self.keep_ratio: h, w = results['img'].shape[:2] new_size, scale_factor = rescale_size((w, h), results['scale'], return_scale=True) img, w_scale, h_scale = hsiresize(results['img'], new_size, return_scale=True,) else: img, w_scale, h_scale = hsiresize(results['img'], results['scale'],return_scale=True,) results['img'] = img results['img_shape'] = img.shape[:2] results['scale_factor'] = (w_scale, h_scale) results['keep_ratio'] = self.keep_ratio def _resize_masks(self, results: dict) -> None: """Resize masks with ``results['scale']``""" if results.get('gt_masks', None) is not None: if self.keep_ratio: results['gt_masks'] = results['gt_masks'].rescale( results['scale']) else: results['gt_masks'] = results['gt_masks'].resize( results['img_shape']) def _resize_bboxes(self, results: dict) -> None: """Resize bounding boxes with ``results['scale_factor']``.""" if results.get('gt_bboxes', None) is not None: results['gt_bboxes'].rescale_(results['scale_factor']) if self.clip_object_border: results['gt_bboxes'].clip_(results['img_shape']) def _resize_seg(self, results: dict) -> None: """Resize semantic segmentation map with ``results['scale']``.""" if results.get('gt_seg_map', None) is not None: if self.keep_ratio: gt_seg = mmcv.imrescale( results['gt_seg_map'], results['scale'], interpolation='nearest', backend=self.backend) else: gt_seg = mmcv.imresize( results['gt_seg_map'], results['scale'], interpolation='nearest', backend=self.backend) results['gt_seg_map'] = gt_seg def _resize_keypoints(self, results: dict) -> None: """Resize keypoints with ``results['scale_factor']``.""" if results.get('gt_keypoints', None) is not None: keypoints = results['gt_keypoints'] keypoints[:, :, :2] = keypoints[:, :, :2] * np.array( results['scale_factor']) if self.clip_object_border: keypoints[:, :, 0] = np.clip(keypoints[:, :, 0], 0, results['img_shape'][1]) keypoints[:, :, 1] = np.clip(keypoints[:, :, 1], 0, results['img_shape'][0]) results['gt_keypoints'] = keypoints def _record_homography_matrix(self, results: dict) -> None: """Record the homography matrix for the Resize.""" w_scale, h_scale = results['scale_factor'] homography_matrix = np.array( [[w_scale, 0, 0], [0, h_scale, 0], [0, 0, 1]], dtype=np.float32) if results.get('homography_matrix', None) is None: results['homography_matrix'] = homography_matrix else: results['homography_matrix'] = homography_matrix @ results[ 'homography_matrix'] @autocast_box_type() def transform(self, results: dict) -> dict: """Transform function to resize images, bounding boxes and semantic segmentation map. Args: results (dict): Result dict from loading pipeline. Returns: dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', 'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys are updated in result dict. """ if self.scale: results['scale'] = self.scale else: img_shape = results['img'].shape[:2] results['scale'] = _scale_size(img_shape[::-1], self.scale_factor) self._resize_img(results) self._resize_bboxes(results) self._resize_masks(results) self._resize_seg(results) self._record_homography_matrix(results) return results def __repr__(self) -> str: repr_str = self.__class__.__name__ repr_str += f'(scale={self.scale}, ' repr_str += f'scale_factor={self.scale_factor}, ' repr_str += f'keep_ratio={self.keep_ratio}, ' repr_str += f'clip_object_border={self.clip_object_border}), ' repr_str += f'backend={self.backend}), ' repr_str += f'interpolation={self.interpolation})' return repr_str @TRANSFORMS.register_module() class ResizePiexlTarget(BaseTransform): """Resize images & bbox & seg & keypoints. This transform resizes the input image according to ``scale`` or ``scale_factor``. Bboxes, seg map and keypoints are then resized with the same scale factor. if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to resize. Required Keys: - img - gt_bboxes (optional) - gt_seg_map (optional) - gt_keypoints (optional) Modified Keys: - img - gt_bboxes - gt_seg_map - gt_keypoints - img_shape Added Keys: - scale - scale_factor - keep_ratio Args: scale (int or tuple): Images scales for resizing. Defaults to None scale_factor (float or tuple[float]): Scale factors for resizing. Defaults to None. keep_ratio (bool): Whether to keep the aspect ratio when resizing the image. Defaults to False. clip_object_border (bool): Whether to clip the objects outside the border of the image. In some dataset like MOT17, the gt bboxes are allowed to cross the border of images. Therefore, we don't need to clip the gt bboxes in these cases. Defaults to True. backend (str): Image resize backend, choices are 'cv2' and 'pillow'. These two backends generates slightly different results. Defaults to 'cv2'. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. Defaults to 'bilinear'. """ def __init__(self, scale_factor: float = 1.0, clip_object_border: bool = True,) -> None: self.clip_object_border = clip_object_border self.scale_factor = scale_factor self.keep_ratio = True def _resize_img(self, results: dict) -> None: """Resize images with ``results['scale']``.""" h, w = results['img'].shape[:2] new_size = _scale_size((w, h), self.scale_factor) img, w_scale, h_scale = hsiresize(results['img'], new_size, return_scale=True,) results['img'] = img results['img_shape'] = img.shape[:2] results['scale_factor'] = (w_scale, h_scale) results['keep_ratio'] = self.keep_ratio def _resize_bboxes(self, results: dict) -> None: """Resize bounding boxes with ``results['scale_factor']``.""" if results.get('gt_bboxes', None) is not None: results['gt_bboxes'].rescale_(results['scale_factor']) if self.clip_object_border: results['gt_bboxes'].clip_(results['img_shape']) def _resize_masks(self, results: dict) -> None: """Resize masks with ``results['scale']``""" if results.get('gt_masks', None) is not None: if self.keep_ratio: results['gt_masks'] = results['gt_masks'].rescale( results['scale']) else: results['gt_masks'] = results['gt_masks'].resize( results['img_shape']) def _resize_seg(self, results: dict) -> None: """Resize semantic segmentation map with ``results['scale']``.""" if results.get('gt_seg', None) is not None: h, w = results['gt_seg'].shape[:2] new_size = _scale_size((w, h), self.scale_factor) gt_seg = hsiresize(results['gt_seg'], new_size, return_scale=False,) results['gt_seg'] = gt_seg def _resize_abu(self, results: dict) -> None: """Resize semantic segmentation map with ``results['scale']``.""" if results.get('gt_abu', None) is not None: h, w = results['gt_abu'].shape[:2] new_size = _scale_size((w, h), self.scale_factor) gt_abu = hsiresize(results['gt_abu'], new_size, return_scale=False,) results['gt_abu'] = gt_abu def _record_homography_matrix(self, results: dict) -> None: """Record the homography matrix for the Resize.""" w_scale, h_scale = results['scale_factor'] homography_matrix = np.array( [[w_scale, 0, 0], [0, h_scale, 0], [0, 0, 1]], dtype=np.float32) if results.get('homography_matrix', None) is None: results['homography_matrix'] = homography_matrix else: results['homography_matrix'] = homography_matrix @ results[ 'homography_matrix'] def transform(self, results: dict) -> dict: """Transform function to resize images, bounding boxes, semantic segmentation map and keypoints. Args: results (dict): Result dict from loading pipeline. Returns: dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', 'gt_keypoints', 'scale', 'scale_factor', 'img_shape', and 'keep_ratio' keys are updated in result dict. """ # if self.scale: # results['scale'] = self.scale # else: img_shape = results['img'].shape[:2] results['scale'] = _scale_size(img_shape[::-1], self.scale_factor) # type: ignore self._resize_img(results) self._resize_bboxes(results) self._resize_masks(results) self._resize_seg(results) self._resize_abu(results) self._record_homography_matrix(results) return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += f'scale_factor={self.scale_factor}, ' return repr_str