Spaces:

tidalove
/

yolox

Sleeping

App Files Files Community

Ge Zheng commited on Aug 23, 2021

Commit

7416b67

1 Parent(s): d4c3ee7

refact (exp, docs) training scales controller and add the corresponding tutorial (#562)

Browse files

Files changed (10) hide show

README.md +2 -1
docs/manipulate_training_image_size.md +59 -0
exps/default/nano.py +2 -1
exps/default/yolov3.py +0 -60
exps/default/yolox_tiny.py +2 -1
exps/example/custom/nano.py +2 -1
exps/example/yolox_voc/yolox_voc_s.py +2 -1
yolox/core/trainer.py +1 -1
yolox/data/datasets/mosaicdetection.py +7 -7
yolox/exp/yolox_base.py +14 -4

README.md CHANGED Viewed

@@ -186,7 +186,8 @@ python tools/eval.py -n  yolox-s -c yolox_s.pth -b 1 -d 1 --conf 0.001 --fp16 --
 <details>
 <summary>Tutorials</summary>
-*  [Training on custom data](docs/train_custom_data.md).
 </details>

 <details>
 <summary>Tutorials</summary>
+*  [Training on custom data](docs/train_custom_data.md)
+*  [Manipulating training image size](docs/manipulate_training_image_size.md)
 </details>

docs/manipulate_training_image_size.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# Manipulating Your Training Image Size
+This tutorial explains how to control your image size when training on your own data.
+## 1. Introduction
+There are 3 hyperparamters control the training size:
+- self.input_size = (640, 640)
+- self.multiscale_range = 5
+- self.random_size = (14, 26)
+There is 1 hyperparameter constrols the testing size:
+- self.test_size = (640, 640)
+The self.input_size is suggested to set to the same value as self.test_size. By default, it is set to (640, 640) for most models and (416, 416) for yolox-tiny and yolox-nano.
+## 2. Multi Scale Training
+When training on your custom dataset, you can use multiscale training in 2 ways:
+1. **【Default】Only specifying the self.input_size and leaving others unchanged.**
+   If so, the actual multiscale sizes range from:
+   [self.input_size[0] - self.multiscale_range\*32,  self.input_size[0] + self.multiscale_range\*32]
+   For example, if you only set:
+   ```python
+   self.input_size = (640, 640)
+   ```
+   the actual multiscale range is [640 - 5*32, 640 + 5\*32], i.e., [480, 800].
+   You can modify self.multiscale_range to change the multiscale range.
+2. **Simultaneously specifying the self.input_size and self.random_size**
+   ```python
+   self.input_size = (416, 416)
+   self.random_size = (10, 20)
+   ```
+   In this case, the actual multiscale range is [self.random_size[0]\*32, self.random_size[1]\*32], i.e., [320, 640]
+   **Note: You must specify the self.input_size because it is used for initializing resize aug in dataset.**
+## 3. Single Scale Training
+If you want to train in a single scale. You need to specify the self.input_size and self.multiscale_range=0:
+```python
+self.input_size = (416, 416)
+self.multiscale_range = 0
+```
+**DO NOT** set the self.random_size.

exps/default/nano.py CHANGED Viewed

@@ -14,8 +14,9 @@ class Exp(MyExp):
         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.25
-        self.scale = (0.5, 1.5)
         self.random_size = (10, 20)
         self.test_size = (416, 416)
         self.mosaic_prob = 0.5
         self.enable_mixup = False

         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.25
+        self.input_size = (416, 416)
         self.random_size = (10, 20)
+        self.mosaic_scale = (0.5, 1.5)
         self.test_size = (416, 416)
         self.mosaic_prob = 0.5
         self.enable_mixup = False

exps/default/yolov3.py CHANGED Viewed

@@ -33,63 +33,3 @@ class Exp(MyExp):
         return self.model
-    def get_data_loader(self, batch_size, is_distributed, no_aug=False):
-        import torch.distributed as dist
-        from yolox.data import (
-            COCODataset,
-            DataLoader,
-            InfiniteSampler,
-            MosaicDetection,
-            TrainTransform,
-            YoloBatchSampler
-        )
-        dataset = COCODataset(
-                data_dir='data/COCO/',
-                json_file=self.train_ann,
-                img_size=self.input_size,
-                preproc=TrainTransform(
-                    rgb_means=(0.485, 0.456, 0.406),
-                    std=(0.229, 0.224, 0.225),
-                    max_labels=50
-                ),
-        )
-        dataset = MosaicDetection(
-            dataset,
-            mosaic=not no_aug,
-            img_size=self.input_size,
-            preproc=TrainTransform(
-                rgb_means=(0.485, 0.456, 0.406),
-                std=(0.229, 0.224, 0.225),
-                max_labels=120
-            ),
-            degrees=self.degrees,
-            translate=self.translate,
-            scale=self.scale,
-            shear=self.shear,
-            perspective=self.perspective,
-        )
-        self.dataset = dataset
-        if is_distributed:
-            batch_size = batch_size // dist.get_world_size()
-            sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
-        else:
-            sampler = torch.utils.data.RandomSampler(self.dataset)
-        batch_sampler = YoloBatchSampler(
-            sampler=sampler,
-            batch_size=batch_size,
-            drop_last=False,
-            input_dimension=self.input_size,
-            mosaic=not no_aug
-        )
-        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
-        dataloader_kwargs["batch_sampler"] = batch_sampler
-        train_loader = DataLoader(self.dataset, **dataloader_kwargs)
-        return train_loader


33
34	return self.model
35

exps/default/yolox_tiny.py CHANGED Viewed

@@ -12,7 +12,8 @@ class Exp(MyExp):
         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.375
-        self.scale = (0.5, 1.5)
         self.random_size = (10, 20)
         self.test_size = (416, 416)
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.375
+        self.input_scale = (416, 416)
+        self.mosaic_scale = (0.5, 1.5)
         self.random_size = (10, 20)
         self.test_size = (416, 416)
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

exps/example/custom/nano.py CHANGED Viewed

@@ -14,7 +14,8 @@ class Exp(MyExp):
         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.25
-        self.scale = (0.5, 1.5)
         self.random_size = (10, 20)
         self.test_size = (416, 416)
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.25
+        self.input_size = (416, 416)
+        self.mosaic_scale = (0.5, 1.5)
         self.random_size = (10, 20)
         self.test_size = (416, 416)
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

exps/example/yolox_voc/yolox_voc_s.py CHANGED Viewed

@@ -49,7 +49,8 @@ class Exp(MyExp):
             preproc=TrainTransform(max_labels=120),
             degrees=self.degrees,
             translate=self.translate,
-            scale=self.scale,
             shear=self.shear,
             perspective=self.perspective,
             enable_mixup=self.enable_mixup,

             preproc=TrainTransform(max_labels=120),
             degrees=self.degrees,
             translate=self.translate,
+            mosaic_scale=self.mosaic_scale,
+            mixup_scale=self.mixup_scale,
             shear=self.shear,
             perspective=self.perspective,
             enable_mixup=self.enable_mixup,

yolox/core/trainer.py CHANGED Viewed

@@ -248,7 +248,7 @@ class Trainer:
             self.meter.clear_meters()
         # random resizing
-        if self.exp.random_size is not None and (self.progress_in_iter + 1) % 10 == 0:
             self.input_size = self.exp.random_resize(
                 self.train_loader, self.epoch, self.rank, self.is_distributed
             )

             self.meter.clear_meters()
         # random resizing
+        if (self.progress_in_iter + 1) % 10 == 0:
             self.input_size = self.exp.random_resize(
                 self.train_loader, self.epoch, self.rank, self.is_distributed
             )

yolox/data/datasets/mosaicdetection.py CHANGED Viewed

@@ -39,9 +39,9 @@ class MosaicDetection(Dataset):
     def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
-        degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5),
-        shear=2.0, perspective=0.0, enable_mixup=True,
-        mosaic_prob=1.0, mixup_prob=1.0, *args
     ):
         """
@@ -52,8 +52,8 @@ class MosaicDetection(Dataset):
             preproc (func):
             degrees (float):
             translate (float):
-            scale (tuple):
-            mscale (tuple):
             shear (float):
             perspective (float):
             enable_mixup (bool):
@@ -64,10 +64,10 @@ class MosaicDetection(Dataset):
         self.preproc = preproc
         self.degrees = degrees
         self.translate = translate
-        self.scale = scale
         self.shear = shear
         self.perspective = perspective
-        self.mixup_scale = mscale
         self.enable_mosaic = mosaic
         self.enable_mixup = enable_mixup
         self.mosaic_prob = mosaic_prob

     def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
+        degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
+        mixup_scale=(0.5, 1.5), shear=2.0, perspective=0.0,
+        enable_mixup=True, mosaic_prob=1.0, mixup_prob=1.0, *args
     ):
         """
             preproc (func):
             degrees (float):
             translate (float):
+            mosaic_scale (tuple):
+            mixup_scale (tuple):
             shear (float):
             perspective (float):
             enable_mixup (bool):
         self.preproc = preproc
         self.degrees = degrees
         self.translate = translate
+        self.scale = mosaic_scale
         self.shear = shear
         self.perspective = perspective
+        self.mixup_scale = mixup_scale
         self.enable_mosaic = mosaic
         self.enable_mixup = enable_mixup
         self.mosaic_prob = mosaic_prob

yolox/exp/yolox_base.py CHANGED Viewed

@@ -25,7 +25,12 @@ class Exp(BaseExp):
         # set worker to 4 for shorter dataloader init time
         self.data_num_workers = 4
         self.input_size = (640, 640)
-        self.random_size = (14, 26)
         self.data_dir = None
         self.train_ann = "instances_train2017.json"
         self.val_ann = "instances_val2017.json"
@@ -35,8 +40,8 @@ class Exp(BaseExp):
         self.mixup_prob = 1.0
         self.degrees = 10.0
         self.translate = 0.1
-        self.scale = (0.1, 2)
-        self.mscale = (0.8, 1.6)
         self.shear = 2.0
         self.perspective = 0.0
         self.enable_mixup = True
@@ -116,7 +121,8 @@ class Exp(BaseExp):
             preproc=TrainTransform(max_labels=120),
             degrees=self.degrees,
             translate=self.translate,
-            scale=self.scale,
             shear=self.shear,
             perspective=self.perspective,
             enable_mixup=self.enable_mixup,
@@ -154,6 +160,10 @@ class Exp(BaseExp):
         if rank == 0:
             size_factor = self.input_size[1] * 1.0 / self.input_size[0]
             size = random.randint(*self.random_size)
             size = (int(32 * size), 32 * int(size * size_factor))
             tensor[0] = size[0]

         # set worker to 4 for shorter dataloader init time
         self.data_num_workers = 4
         self.input_size = (640, 640)
+        # Actual multiscale ranges: [640-5*32, 640+5*32].
+        # To disable multiscale training, set the
+        # self.multiscale_range to 0.
+        self.multiscale_range = 5
+        # You can uncomment this line to specify a multiscale range
+        # self.random_size = (14, 26)
         self.data_dir = None
         self.train_ann = "instances_train2017.json"
         self.val_ann = "instances_val2017.json"
         self.mixup_prob = 1.0
         self.degrees = 10.0
         self.translate = 0.1
+        self.mosaic_scale = (0.1, 2)
+        self.mixup_scale = (0.5, 1.5)
         self.shear = 2.0
         self.perspective = 0.0
         self.enable_mixup = True
             preproc=TrainTransform(max_labels=120),
             degrees=self.degrees,
             translate=self.translate,
+            mosaic_scale=self.mosaic_scale,
+            mixup_scale=self.mixup_scale,
             shear=self.shear,
             perspective=self.perspective,
             enable_mixup=self.enable_mixup,
         if rank == 0:
             size_factor = self.input_size[1] * 1.0 / self.input_size[0]
+            if not hasattr(self, 'random_size'):
+                min_size = int(self.input_size[0] / 32) - self.multiscale_range
+                max_size = int(self.input_size[0] / 32) + self.multiscale_range
+                self.random_size = (min_size, max_size)
             size = random.randint(*self.random_size)
             size = (int(32 * size), 32 * int(size * size_factor))
             tensor[0] = size[0]