Spaces:

tidalove
/

yolox

Sleeping

App Files Files Community

Feng Wang commited on Jul 21, 2021

Commit

1b23f0f

1 Parent(s): 67f8016

refactor(data): refactor dataset logic.

Browse files

Files changed (1) hide show

yolox/data/datasets/mosaicdetection.py +82 -61

yolox/data/datasets/mosaicdetection.py CHANGED Viewed

@@ -13,22 +13,51 @@ from ..data_augment import box_candidates, random_perspective
 from .datasets_wrapper import Dataset
-class MosaicDetection(Dataset):
-    """Detection dataset wrapper that performs mixup for normal dataset.
-    Parameters
-    ----------
-    dataset : Pytorch Dataset
-        Gluon dataset object.
-    *args : list
-        Additional arguments for mixup random sampler.
-    """
     def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
         degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5),
         shear=2.0, perspective=0.0, enable_mixup=True, *args
     ):
         super().__init__(img_size, mosaic=mosaic)
         self._dataset = dataset
         self.preproc = preproc
@@ -38,7 +67,7 @@ class MosaicDetection(Dataset):
         self.shear = shear
         self.perspective = perspective
         self.mixup_scale = mscale
-        self._mosaic = mosaic
         self.enable_mixup = enable_mixup
     def __len__(self):
@@ -46,79 +75,71 @@ class MosaicDetection(Dataset):
     @Dataset.resize_getitem
     def __getitem__(self, idx):
-        if self._mosaic:
-            labels4 = []
             input_dim = self._dataset.input_dim
             # yc, xc = s, s  # mosaic center x, y
-            yc = int(random.uniform(0.5 * input_dim[0], 1.5 * input_dim[0]))
-            xc = int(random.uniform(0.5 * input_dim[1], 1.5 * input_dim[1]))
             # 3 additional image indices
             indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
-            for i, index in enumerate(indices):
                 img, _labels, _, _ = self._dataset.pull_item(index)
                 h0, w0 = img.shape[:2]  # orig hw
-                scale = min(1. * input_dim[0] / h0, 1. * input_dim[1] / w0)
-                interp = cv2.INTER_LINEAR
-                img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=interp)
-                (h, w) = img.shape[:2]
-                if i == 0:  # top left
-                    # base image with 4 tiles
-                    img4 = np.full(
-                        (input_dim[0] * 2, input_dim[1] * 2, img.shape[2]), 114, dtype=np.uint8
-                    )
-                    # xmin, ymin, xmax, ymax (large image)
-                    x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
-                    # xmin, ymin, xmax, ymax (small image)
-                    x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
-                elif i == 1:  # top right
-                    x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, input_dim[1] * 2), yc
-                    x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
-                elif i == 2:  # bottom left
-                    x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(input_dim[0] * 2, yc + h)
-                    x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
-                elif i == 3:  # bottom right
-                    x1a, y1a, x2a, y2a = xc, yc, min(xc + w, input_dim[1] * 2), min(input_dim[0] * 2, yc + h)  # noqa
-                    x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
-                img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-                padw = x1a - x1b
-                padh = y1a - y1b
-                labels = _labels.copy()  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
-                if _labels.size > 0:  # Normalized xywh to pixel xyxy format
                     labels[:, 0] = scale * _labels[:, 0] + padw
                     labels[:, 1] = scale * _labels[:, 1] + padh
                     labels[:, 2] = scale * _labels[:, 2] + padw
                     labels[:, 3] = scale * _labels[:, 3] + padh
-                labels4.append(labels)
-            if len(labels4):
-                labels4 = np.concatenate(labels4, 0)
-                np.clip(labels4[:, 0], 0, 2 * input_dim[1], out=labels4[:, 0])
-                np.clip(labels4[:, 1], 0, 2 * input_dim[0], out=labels4[:, 1])
-                np.clip(labels4[:, 2], 0, 2 * input_dim[1], out=labels4[:, 2])
-                np.clip(labels4[:, 3], 0, 2 * input_dim[0], out=labels4[:, 3])
-            img4, labels4 = random_perspective(
-                img4,
-                labels4,
                 degrees=self.degrees,
                 translate=self.translate,
                 scale=self.scale,
                 shear=self.shear,
                 perspective=self.perspective,
-                border=[-input_dim[0] // 2, -input_dim[1] // 2],
             )  # border to remove
             # -----------------------------------------------------------------
             # CopyPaste: https://arxiv.org/abs/2012.07177
             # -----------------------------------------------------------------
-            if self.enable_mixup and not len(labels4) == 0:
-                img4, labels4 = self.mixup(img4, labels4, self.input_dim)
-            mix_img, padded_labels = self.preproc(img4, labels4, self.input_dim)
             img_info = (mix_img.shape[1], mix_img.shape[0])
             return mix_img, padded_labels, img_info, int(idx)

 from .datasets_wrapper import Dataset
+def get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w):
+    # TODO update doc
+    # index0 to top left part of image
+    if mosaic_index == 0:
+        x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
+        small_coord = w - (x2 - x1), h - (y2 - y1), w, h
+    # index1 to top right part of image
+    elif mosaic_index == 1:
+        x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
+        small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h
+    # index2 to bottom left part of image
+    elif mosaic_index == 2:
+        x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
+        small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h)
+    # index2 to bottom right part of image
+    elif mosaic_index == 3:
+        x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h)  # noqa
+        small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
+    return (x1, y1, x2, y2), small_coord
+class MosaicDetection(Dataset):
+    """Detection dataset wrapper that performs mixup for normal dataset."""
     def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
         degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5),
         shear=2.0, perspective=0.0, enable_mixup=True, *args
     ):
+        """
+        Args:
+            dataset(Dataset) : Pytorch dataset object.
+            img_size (tuple):
+            mosaic (bool): enable mosaic augmentation or not.
+            preproc (func):
+            degrees (float):
+            translate (float):
+            scale (tuple):
+            mscale (tuple):
+            shear (float):
+            perspective (float):
+            enable_mixup (bool):
+            *args(tuple) : Additional arguments for mixup random sampler.
+        """
         super().__init__(img_size, mosaic=mosaic)
         self._dataset = dataset
         self.preproc = preproc
         self.shear = shear
         self.perspective = perspective
         self.mixup_scale = mscale
+        self.enable_mosaic = mosaic
         self.enable_mixup = enable_mixup
     def __len__(self):
     @Dataset.resize_getitem
     def __getitem__(self, idx):
+        if self.enable_mosaic:
+            mosaic_labels = []
             input_dim = self._dataset.input_dim
+            input_h, input_w = input_dim[0], input_dim[1]
             # yc, xc = s, s  # mosaic center x, y
+            yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
+            xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
             # 3 additional image indices
             indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
+            for i_mosaic, index in enumerate(indices):
                 img, _labels, _, _ = self._dataset.pull_item(index)
                 h0, w0 = img.shape[:2]  # orig hw
+                scale = min(1. * input_h / h0, 1. * input_w / w0)
+                img = cv2.resize(
+                    img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR
+                )
+                # generate output mosaic image
+                (h, w, c) = img.shape[:3]
+                if i_mosaic == 0:
+                    mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8)
+                # suffix l means large image, while s means small image in mosaic aug.
+                (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate(
+                    mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w
+                )
+                mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
+                padw, padh = l_x1 - s_x1, l_y1 - s_y1
+                labels = _labels.copy()
+                # Normalized xywh to pixel xyxy format
+                if _labels.size > 0:
                     labels[:, 0] = scale * _labels[:, 0] + padw
                     labels[:, 1] = scale * _labels[:, 1] + padh
                     labels[:, 2] = scale * _labels[:, 2] + padw
                     labels[:, 3] = scale * _labels[:, 3] + padh
+                mosaic_labels.append(labels)
+            if len(mosaic_labels):
+                mosaic_labels = np.concatenate(mosaic_labels, 0)
+                np.clip(mosaic_labels[:, 0], 0, 2 * input_w, out=mosaic_labels[:, 0])
+                np.clip(mosaic_labels[:, 1], 0, 2 * input_h, out=mosaic_labels[:, 1])
+                np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])
+                np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])
+            mosaic_img, mosaic_labels = random_perspective(
+                mosaic_img,
+                mosaic_labels,
                 degrees=self.degrees,
                 translate=self.translate,
                 scale=self.scale,
                 shear=self.shear,
                 perspective=self.perspective,
+                border=[-input_h // 2, -input_w // 2],
             )  # border to remove
             # -----------------------------------------------------------------
             # CopyPaste: https://arxiv.org/abs/2012.07177
             # -----------------------------------------------------------------
+            if self.enable_mixup and not len(mosaic_labels) == 0:
+                mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
+            mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
             img_info = (mix_img.shape[1], mix_img.shape[0])
             return mix_img, padded_labels, img_info, int(idx)