import mmcv import numpy as np from mmcv.transforms import RandomFlip as MMCV_RandomFlip from mmdet.registry import TRANSFORMS from mmdet.structures.bbox import HorizontalBoxes, autocast_box_type @TRANSFORMS.register_module() class RandomFlipPiexlTarget(MMCV_RandomFlip): """Flip the image & bbox & mask & segmentation map. Added or Updated keys: flip, flip_direction, img, gt_bboxes, and gt_seg_map. There are 3 flip modes: - ``prob`` is float, ``direction`` is string: the image will be ``direction``ly flipped with probability of ``prob`` . E.g., ``prob=0.5``, ``direction='horizontal'``, then image will be horizontally flipped with probability of 0.5. - ``prob`` is float, ``direction`` is list of string: the image will be ``direction[i]``ly flipped with probability of ``prob/len(direction)``. E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``, then image will be horizontally flipped with probability of 0.25, vertically with probability of 0.25. - ``prob`` is list of float, ``direction`` is list of string: given ``len(prob) == len(direction)``, the image will be ``direction[i]``ly flipped with probability of ``prob[i]``. E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal', 'vertical']``, then image will be horizontally flipped with probability of 0.3, vertically with probability of 0.5. Required Keys: - img - gt_bboxes (BaseBoxes[torch.float32]) (optional) - gt_masks (BitmapMasks | PolygonMasks) (optional) - gt_seg_map (np.uint8) (optional) Modified Keys: - img - gt_bboxes - gt_masks - gt_seg_map Added Keys: - flip - flip_direction - homography_matrix Args: prob (float | list[float], optional): The flipping probability. Defaults to None. direction(str | list[str]): The flipping direction. Options If input is a list, the length must equal ``prob``. Each element in ``prob`` indicates the flip probability of corresponding direction. Defaults to 'horizontal'. """ def _record_homography_matrix(self, results: dict) -> None: """Record the homography matrix for the RandomFlip.""" cur_dir = results['flip_direction'] h, w = results['img'].shape[:2] if cur_dir == 'horizontal': homography_matrix = np.array([[-1, 0, w], [0, 1, 0], [0, 0, 1]], dtype=np.float32) elif cur_dir == 'vertical': homography_matrix = np.array([[1, 0, 0], [0, -1, h], [0, 0, 1]], dtype=np.float32) elif cur_dir == 'diagonal': homography_matrix = np.array([[-1, 0, w], [0, -1, h], [0, 0, 1]], dtype=np.float32) else: homography_matrix = np.eye(3, dtype=np.float32) if results.get('homography_matrix', None) is None: results['homography_matrix'] = homography_matrix else: results['homography_matrix'] = homography_matrix @ results[ 'homography_matrix'] @autocast_box_type() def _flip(self, results: dict) -> None: """Flip images, bounding boxes, and semantic segmentation map.""" # flip image results['img'] = mmcv.imflip( results['img'], direction=results['flip_direction']) img_shape = results['img'].shape[:2] # flip bboxes if results.get('gt_bboxes', None) is not None: results['gt_bboxes'].flip_(img_shape, results['flip_direction']) # flip masks if results.get('gt_masks', None) is not None: results['gt_masks'] = results['gt_masks'].flip( results['flip_direction']) if results.get('gt_seg', None) is not None: results['gt_seg'] = mmcv.imflip( results['gt_seg'], direction=results['flip_direction']) if results.get('gt_abu', None) is not None: results['gt_abu'] = mmcv.imflip( results['gt_abu'], direction=results['flip_direction']) # record homography matrix for flip self._record_homography_matrix(results)