Ge Zheng commited on
Commit
7416b67
·
1 Parent(s): d4c3ee7

refact (exp, docs) training scales controller and add the corresponding tutorial (#562)

Browse files
README.md CHANGED
@@ -186,7 +186,8 @@ python tools/eval.py -n yolox-s -c yolox_s.pth -b 1 -d 1 --conf 0.001 --fp16 --
186
  <details>
187
  <summary>Tutorials</summary>
188
 
189
- * [Training on custom data](docs/train_custom_data.md).
 
190
 
191
  </details>
192
 
 
186
  <details>
187
  <summary>Tutorials</summary>
188
 
189
+ * [Training on custom data](docs/train_custom_data.md)
190
+ * [Manipulating training image size](docs/manipulate_training_image_size.md)
191
 
192
  </details>
193
 
docs/manipulate_training_image_size.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Manipulating Your Training Image Size
2
+
3
+ This tutorial explains how to control your image size when training on your own data.
4
+
5
+ ## 1. Introduction
6
+
7
+ There are 3 hyperparamters control the training size:
8
+
9
+ - self.input_size = (640, 640)
10
+ - self.multiscale_range = 5
11
+ - self.random_size = (14, 26)
12
+
13
+ There is 1 hyperparameter constrols the testing size:
14
+
15
+ - self.test_size = (640, 640)
16
+
17
+ The self.input_size is suggested to set to the same value as self.test_size. By default, it is set to (640, 640) for most models and (416, 416) for yolox-tiny and yolox-nano.
18
+
19
+ ## 2. Multi Scale Training
20
+
21
+ When training on your custom dataset, you can use multiscale training in 2 ways:
22
+
23
+ 1. **【Default】Only specifying the self.input_size and leaving others unchanged.**
24
+
25
+ If so, the actual multiscale sizes range from:
26
+
27
+ [self.input_size[0] - self.multiscale_range\*32, self.input_size[0] + self.multiscale_range\*32]
28
+
29
+ For example, if you only set:
30
+
31
+ ```python
32
+ self.input_size = (640, 640)
33
+ ```
34
+
35
+ the actual multiscale range is [640 - 5*32, 640 + 5\*32], i.e., [480, 800].
36
+
37
+ You can modify self.multiscale_range to change the multiscale range.
38
+
39
+ 2. **Simultaneously specifying the self.input_size and self.random_size**
40
+
41
+ ```python
42
+ self.input_size = (416, 416)
43
+ self.random_size = (10, 20)
44
+ ```
45
+
46
+ In this case, the actual multiscale range is [self.random_size[0]\*32, self.random_size[1]\*32], i.e., [320, 640]
47
+
48
+ **Note: You must specify the self.input_size because it is used for initializing resize aug in dataset.**
49
+
50
+ ## 3. Single Scale Training
51
+
52
+ If you want to train in a single scale. You need to specify the self.input_size and self.multiscale_range=0:
53
+
54
+ ```python
55
+ self.input_size = (416, 416)
56
+ self.multiscale_range = 0
57
+ ```
58
+
59
+ **DO NOT** set the self.random_size.
exps/default/nano.py CHANGED
@@ -14,8 +14,9 @@ class Exp(MyExp):
14
  super(Exp, self).__init__()
15
  self.depth = 0.33
16
  self.width = 0.25
17
- self.scale = (0.5, 1.5)
18
  self.random_size = (10, 20)
 
19
  self.test_size = (416, 416)
20
  self.mosaic_prob = 0.5
21
  self.enable_mixup = False
 
14
  super(Exp, self).__init__()
15
  self.depth = 0.33
16
  self.width = 0.25
17
+ self.input_size = (416, 416)
18
  self.random_size = (10, 20)
19
+ self.mosaic_scale = (0.5, 1.5)
20
  self.test_size = (416, 416)
21
  self.mosaic_prob = 0.5
22
  self.enable_mixup = False
exps/default/yolov3.py CHANGED
@@ -33,63 +33,3 @@ class Exp(MyExp):
33
 
34
  return self.model
35
 
36
- def get_data_loader(self, batch_size, is_distributed, no_aug=False):
37
- import torch.distributed as dist
38
-
39
- from yolox.data import (
40
- COCODataset,
41
- DataLoader,
42
- InfiniteSampler,
43
- MosaicDetection,
44
- TrainTransform,
45
- YoloBatchSampler
46
- )
47
-
48
- dataset = COCODataset(
49
- data_dir='data/COCO/',
50
- json_file=self.train_ann,
51
- img_size=self.input_size,
52
- preproc=TrainTransform(
53
- rgb_means=(0.485, 0.456, 0.406),
54
- std=(0.229, 0.224, 0.225),
55
- max_labels=50
56
- ),
57
- )
58
-
59
- dataset = MosaicDetection(
60
- dataset,
61
- mosaic=not no_aug,
62
- img_size=self.input_size,
63
- preproc=TrainTransform(
64
- rgb_means=(0.485, 0.456, 0.406),
65
- std=(0.229, 0.224, 0.225),
66
- max_labels=120
67
- ),
68
- degrees=self.degrees,
69
- translate=self.translate,
70
- scale=self.scale,
71
- shear=self.shear,
72
- perspective=self.perspective,
73
- )
74
-
75
- self.dataset = dataset
76
-
77
- if is_distributed:
78
- batch_size = batch_size // dist.get_world_size()
79
- sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
80
- else:
81
- sampler = torch.utils.data.RandomSampler(self.dataset)
82
-
83
- batch_sampler = YoloBatchSampler(
84
- sampler=sampler,
85
- batch_size=batch_size,
86
- drop_last=False,
87
- input_dimension=self.input_size,
88
- mosaic=not no_aug
89
- )
90
-
91
- dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
92
- dataloader_kwargs["batch_sampler"] = batch_sampler
93
- train_loader = DataLoader(self.dataset, **dataloader_kwargs)
94
-
95
- return train_loader
 
33
 
34
  return self.model
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exps/default/yolox_tiny.py CHANGED
@@ -12,7 +12,8 @@ class Exp(MyExp):
12
  super(Exp, self).__init__()
13
  self.depth = 0.33
14
  self.width = 0.375
15
- self.scale = (0.5, 1.5)
 
16
  self.random_size = (10, 20)
17
  self.test_size = (416, 416)
18
  self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
12
  super(Exp, self).__init__()
13
  self.depth = 0.33
14
  self.width = 0.375
15
+ self.input_scale = (416, 416)
16
+ self.mosaic_scale = (0.5, 1.5)
17
  self.random_size = (10, 20)
18
  self.test_size = (416, 416)
19
  self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
exps/example/custom/nano.py CHANGED
@@ -14,7 +14,8 @@ class Exp(MyExp):
14
  super(Exp, self).__init__()
15
  self.depth = 0.33
16
  self.width = 0.25
17
- self.scale = (0.5, 1.5)
 
18
  self.random_size = (10, 20)
19
  self.test_size = (416, 416)
20
  self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
14
  super(Exp, self).__init__()
15
  self.depth = 0.33
16
  self.width = 0.25
17
+ self.input_size = (416, 416)
18
+ self.mosaic_scale = (0.5, 1.5)
19
  self.random_size = (10, 20)
20
  self.test_size = (416, 416)
21
  self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
exps/example/yolox_voc/yolox_voc_s.py CHANGED
@@ -49,7 +49,8 @@ class Exp(MyExp):
49
  preproc=TrainTransform(max_labels=120),
50
  degrees=self.degrees,
51
  translate=self.translate,
52
- scale=self.scale,
 
53
  shear=self.shear,
54
  perspective=self.perspective,
55
  enable_mixup=self.enable_mixup,
 
49
  preproc=TrainTransform(max_labels=120),
50
  degrees=self.degrees,
51
  translate=self.translate,
52
+ mosaic_scale=self.mosaic_scale,
53
+ mixup_scale=self.mixup_scale,
54
  shear=self.shear,
55
  perspective=self.perspective,
56
  enable_mixup=self.enable_mixup,
yolox/core/trainer.py CHANGED
@@ -248,7 +248,7 @@ class Trainer:
248
  self.meter.clear_meters()
249
 
250
  # random resizing
251
- if self.exp.random_size is not None and (self.progress_in_iter + 1) % 10 == 0:
252
  self.input_size = self.exp.random_resize(
253
  self.train_loader, self.epoch, self.rank, self.is_distributed
254
  )
 
248
  self.meter.clear_meters()
249
 
250
  # random resizing
251
+ if (self.progress_in_iter + 1) % 10 == 0:
252
  self.input_size = self.exp.random_resize(
253
  self.train_loader, self.epoch, self.rank, self.is_distributed
254
  )
yolox/data/datasets/mosaicdetection.py CHANGED
@@ -39,9 +39,9 @@ class MosaicDetection(Dataset):
39
 
40
  def __init__(
41
  self, dataset, img_size, mosaic=True, preproc=None,
42
- degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5),
43
- shear=2.0, perspective=0.0, enable_mixup=True,
44
- mosaic_prob=1.0, mixup_prob=1.0, *args
45
  ):
46
  """
47
 
@@ -52,8 +52,8 @@ class MosaicDetection(Dataset):
52
  preproc (func):
53
  degrees (float):
54
  translate (float):
55
- scale (tuple):
56
- mscale (tuple):
57
  shear (float):
58
  perspective (float):
59
  enable_mixup (bool):
@@ -64,10 +64,10 @@ class MosaicDetection(Dataset):
64
  self.preproc = preproc
65
  self.degrees = degrees
66
  self.translate = translate
67
- self.scale = scale
68
  self.shear = shear
69
  self.perspective = perspective
70
- self.mixup_scale = mscale
71
  self.enable_mosaic = mosaic
72
  self.enable_mixup = enable_mixup
73
  self.mosaic_prob = mosaic_prob
 
39
 
40
  def __init__(
41
  self, dataset, img_size, mosaic=True, preproc=None,
42
+ degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
43
+ mixup_scale=(0.5, 1.5), shear=2.0, perspective=0.0,
44
+ enable_mixup=True, mosaic_prob=1.0, mixup_prob=1.0, *args
45
  ):
46
  """
47
 
 
52
  preproc (func):
53
  degrees (float):
54
  translate (float):
55
+ mosaic_scale (tuple):
56
+ mixup_scale (tuple):
57
  shear (float):
58
  perspective (float):
59
  enable_mixup (bool):
 
64
  self.preproc = preproc
65
  self.degrees = degrees
66
  self.translate = translate
67
+ self.scale = mosaic_scale
68
  self.shear = shear
69
  self.perspective = perspective
70
+ self.mixup_scale = mixup_scale
71
  self.enable_mosaic = mosaic
72
  self.enable_mixup = enable_mixup
73
  self.mosaic_prob = mosaic_prob
yolox/exp/yolox_base.py CHANGED
@@ -25,7 +25,12 @@ class Exp(BaseExp):
25
  # set worker to 4 for shorter dataloader init time
26
  self.data_num_workers = 4
27
  self.input_size = (640, 640)
28
- self.random_size = (14, 26)
 
 
 
 
 
29
  self.data_dir = None
30
  self.train_ann = "instances_train2017.json"
31
  self.val_ann = "instances_val2017.json"
@@ -35,8 +40,8 @@ class Exp(BaseExp):
35
  self.mixup_prob = 1.0
36
  self.degrees = 10.0
37
  self.translate = 0.1
38
- self.scale = (0.1, 2)
39
- self.mscale = (0.8, 1.6)
40
  self.shear = 2.0
41
  self.perspective = 0.0
42
  self.enable_mixup = True
@@ -116,7 +121,8 @@ class Exp(BaseExp):
116
  preproc=TrainTransform(max_labels=120),
117
  degrees=self.degrees,
118
  translate=self.translate,
119
- scale=self.scale,
 
120
  shear=self.shear,
121
  perspective=self.perspective,
122
  enable_mixup=self.enable_mixup,
@@ -154,6 +160,10 @@ class Exp(BaseExp):
154
 
155
  if rank == 0:
156
  size_factor = self.input_size[1] * 1.0 / self.input_size[0]
 
 
 
 
157
  size = random.randint(*self.random_size)
158
  size = (int(32 * size), 32 * int(size * size_factor))
159
  tensor[0] = size[0]
 
25
  # set worker to 4 for shorter dataloader init time
26
  self.data_num_workers = 4
27
  self.input_size = (640, 640)
28
+ # Actual multiscale ranges: [640-5*32, 640+5*32].
29
+ # To disable multiscale training, set the
30
+ # self.multiscale_range to 0.
31
+ self.multiscale_range = 5
32
+ # You can uncomment this line to specify a multiscale range
33
+ # self.random_size = (14, 26)
34
  self.data_dir = None
35
  self.train_ann = "instances_train2017.json"
36
  self.val_ann = "instances_val2017.json"
 
40
  self.mixup_prob = 1.0
41
  self.degrees = 10.0
42
  self.translate = 0.1
43
+ self.mosaic_scale = (0.1, 2)
44
+ self.mixup_scale = (0.5, 1.5)
45
  self.shear = 2.0
46
  self.perspective = 0.0
47
  self.enable_mixup = True
 
121
  preproc=TrainTransform(max_labels=120),
122
  degrees=self.degrees,
123
  translate=self.translate,
124
+ mosaic_scale=self.mosaic_scale,
125
+ mixup_scale=self.mixup_scale,
126
  shear=self.shear,
127
  perspective=self.perspective,
128
  enable_mixup=self.enable_mixup,
 
160
 
161
  if rank == 0:
162
  size_factor = self.input_size[1] * 1.0 / self.input_size[0]
163
+ if not hasattr(self, 'random_size'):
164
+ min_size = int(self.input_size[0] / 32) - self.multiscale_range
165
+ max_size = int(self.input_size[0] / 32) + self.multiscale_range
166
+ self.random_size = (min_size, max_size)
167
  size = random.randint(*self.random_size)
168
  size = (int(32 * size), 32 * int(size * size_factor))
169
  tensor[0] = size[0]