Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Apr 23, 2024

Commit

237de06

1 Parent(s): d8aafaa

✨ [Add] a Mosaic data augment in dataloader

Browse files

Files changed (3) hide show

config/data/augmentation.yaml +2 -1
utils/dataargument.py +57 -6
utils/dataloader.py +14 -3

config/data/augmentation.yaml CHANGED Viewed

	@@ -1 +1,2 @@
1	- RandomHorizontalFlip: 0.5


1	+ RandomHorizontalFlip: 0.5
2	+ Mosaic: 0.5

utils/dataargument.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import torch
 from torchvision.transforms import functional as TF
@@ -8,21 +10,70 @@ class Compose:
     def __init__(self, transforms):
         self.transforms = transforms
     def __call__(self, image, boxes):
-        for t in self.transforms:
-            image, boxes = t(image, boxes)
         return image, boxes
 class RandomHorizontalFlip:
     """Randomly horizontally flips the image along with the bounding boxes."""
-    def __init__(self, p=0.5):
-        self.p = p
     def __call__(self, image, boxes):
-        if torch.rand(1) < self.p:
             image = TF.hflip(image)
-            # Assuming boxes are in the format [cls, xmin, ymin, xmax, ymax]
             boxes[:, [1, 3]] = 1 - boxes[:, [3, 1]]
         return image, boxes

+from PIL import Image
+import numpy as np
 import torch
 from torchvision.transforms import functional as TF
     def __init__(self, transforms):
         self.transforms = transforms
+        for transform in self.transforms:
+            if hasattr(transform, "set_parent"):
+                transform.set_parent(self)
     def __call__(self, image, boxes):
+        for transform in self.transforms:
+            image, boxes = transform(image, boxes)
         return image, boxes
+    def get_more_data(self):
+        raise NotImplementedError("This method should be overridden by subclass instances!")
 class RandomHorizontalFlip:
     """Randomly horizontally flips the image along with the bounding boxes."""
+    def __init__(self, prob=0.5):
+        self.prob = prob
     def __call__(self, image, boxes):
+        if torch.rand(1) < self.prob:
             image = TF.hflip(image)
             boxes[:, [1, 3]] = 1 - boxes[:, [3, 1]]
         return image, boxes
+class Mosaic:
+    """Applies the Mosaic augmentation to a batch of images and their corresponding boxes."""
+    def __init__(self, prob=0.5):
+        self.prob = prob
+        self.parent = None
+    def set_parent(self, parent):
+        self.parent = parent
+    def __call__(self, image, boxes):
+        if torch.rand(1) >= self.prob:
+            return image, boxes
+        assert self.parent is not None, "Parent is not set. Mosaic cannot retrieve image size."
+        img_sz = self.parent.image_size  # Assuming `image_size` is defined in parent
+        more_data = self.parent.get_more_data(3)  # get 3 more images randomly
+        data = [(image, boxes)] + more_data
+        mosaic_image = Image.new("RGB", (2 * img_sz, 2 * img_sz))
+        vectors = np.array([(-1, -1), (0, -1), (-1, 0), (0, 0)])
+        center = np.array([img_sz, img_sz])
+        all_labels = []
+        for (image, boxes), vector in zip(data, vectors):
+            this_w, this_h = image.size
+            coord = tuple(center + vector * np.array([this_w, this_h]))
+            mosaic_image.paste(image, coord)
+            xmin, ymin, xmax, ymax = boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
+            xmin = (xmin * this_w + coord[0]) / (2 * img_sz)
+            xmax = (xmax * this_w + coord[0]) / (2 * img_sz)
+            ymin = (ymin * this_h + coord[1]) / (2 * img_sz)
+            ymax = (ymax * this_h + coord[1]) / (2 * img_sz)
+            adjusted_boxes = torch.stack([boxes[:, 0], xmin, ymin, xmax, ymax], dim=1)
+            all_labels.append(adjusted_boxes)
+        all_labels = torch.cat(all_labels, dim=0)
+        return mosaic_image, all_labels

utils/dataloader.py CHANGED Viewed

@@ -10,14 +10,17 @@ from tqdm.rich import tqdm
 import diskcache as dc
 from typing import Union
 from drawer import draw_bboxes
-from dataargument import Compose, RandomHorizontalFlip
 class YoloDataset(Dataset):
-    def __init__(self, dataset_cfg: dict, phase: str = "train", transform=None):
         phase_name = dataset_cfg.get(phase, phase)
         self.transform = transform
         self.data = self.load_data(dataset_cfg.path, phase_name)
     def load_data(self, dataset_path, phase_name):
@@ -105,9 +108,17 @@ class YoloDataset(Dataset):
             logger.warning("No valid BBox in {}", label_path)
             return None
-    def __getitem__(self, idx) -> Union[Image.Image, torch.Tensor]:
         img_path, bboxes = self.data[idx]
         img = Image.open(img_path).convert("RGB")
         if self.transform:
             img, bboxes = self.transform(img, bboxes)
         return img, bboxes

 import diskcache as dc
 from typing import Union
 from drawer import draw_bboxes
+from dataargument import Compose, RandomHorizontalFlip, Mosaic
 class YoloDataset(Dataset):
+    def __init__(self, dataset_cfg: dict, phase: str = "train", image_size: int = 640, transform=None):
         phase_name = dataset_cfg.get(phase, phase)
+        self.image_size = image_size
         self.transform = transform
+        self.transform.get_more_data = self.get_more_data
+        self.transform.image_size = self.image_size
         self.data = self.load_data(dataset_cfg.path, phase_name)
     def load_data(self, dataset_path, phase_name):
             logger.warning("No valid BBox in {}", label_path)
             return None
+    def get_data(self, idx):
         img_path, bboxes = self.data[idx]
         img = Image.open(img_path).convert("RGB")
+        return img, bboxes
+    def get_more_data(self, num: int = 1):
+        indices = torch.randint(0, len(self), (num,))
+        return [self.get_data(idx) for idx in indices]
+    def __getitem__(self, idx) -> Union[Image.Image, torch.Tensor]:
+        img, bboxes = self.get_data(idx)
         if self.transform:
             img, bboxes = self.transform(img, bboxes)
         return img, bboxes