Ge Zheng
commited on
Commit
·
7416b67
1
Parent(s):
d4c3ee7
refact (exp, docs) training scales controller and add the corresponding tutorial (#562)
Browse files- README.md +2 -1
- docs/manipulate_training_image_size.md +59 -0
- exps/default/nano.py +2 -1
- exps/default/yolov3.py +0 -60
- exps/default/yolox_tiny.py +2 -1
- exps/example/custom/nano.py +2 -1
- exps/example/yolox_voc/yolox_voc_s.py +2 -1
- yolox/core/trainer.py +1 -1
- yolox/data/datasets/mosaicdetection.py +7 -7
- yolox/exp/yolox_base.py +14 -4
README.md
CHANGED
@@ -186,7 +186,8 @@ python tools/eval.py -n yolox-s -c yolox_s.pth -b 1 -d 1 --conf 0.001 --fp16 --
|
|
186 |
<details>
|
187 |
<summary>Tutorials</summary>
|
188 |
|
189 |
-
* [Training on custom data](docs/train_custom_data.md)
|
|
|
190 |
|
191 |
</details>
|
192 |
|
|
|
186 |
<details>
|
187 |
<summary>Tutorials</summary>
|
188 |
|
189 |
+
* [Training on custom data](docs/train_custom_data.md)
|
190 |
+
* [Manipulating training image size](docs/manipulate_training_image_size.md)
|
191 |
|
192 |
</details>
|
193 |
|
docs/manipulate_training_image_size.md
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Manipulating Your Training Image Size
|
2 |
+
|
3 |
+
This tutorial explains how to control your image size when training on your own data.
|
4 |
+
|
5 |
+
## 1. Introduction
|
6 |
+
|
7 |
+
There are 3 hyperparamters control the training size:
|
8 |
+
|
9 |
+
- self.input_size = (640, 640)
|
10 |
+
- self.multiscale_range = 5
|
11 |
+
- self.random_size = (14, 26)
|
12 |
+
|
13 |
+
There is 1 hyperparameter constrols the testing size:
|
14 |
+
|
15 |
+
- self.test_size = (640, 640)
|
16 |
+
|
17 |
+
The self.input_size is suggested to set to the same value as self.test_size. By default, it is set to (640, 640) for most models and (416, 416) for yolox-tiny and yolox-nano.
|
18 |
+
|
19 |
+
## 2. Multi Scale Training
|
20 |
+
|
21 |
+
When training on your custom dataset, you can use multiscale training in 2 ways:
|
22 |
+
|
23 |
+
1. **【Default】Only specifying the self.input_size and leaving others unchanged.**
|
24 |
+
|
25 |
+
If so, the actual multiscale sizes range from:
|
26 |
+
|
27 |
+
[self.input_size[0] - self.multiscale_range\*32, self.input_size[0] + self.multiscale_range\*32]
|
28 |
+
|
29 |
+
For example, if you only set:
|
30 |
+
|
31 |
+
```python
|
32 |
+
self.input_size = (640, 640)
|
33 |
+
```
|
34 |
+
|
35 |
+
the actual multiscale range is [640 - 5*32, 640 + 5\*32], i.e., [480, 800].
|
36 |
+
|
37 |
+
You can modify self.multiscale_range to change the multiscale range.
|
38 |
+
|
39 |
+
2. **Simultaneously specifying the self.input_size and self.random_size**
|
40 |
+
|
41 |
+
```python
|
42 |
+
self.input_size = (416, 416)
|
43 |
+
self.random_size = (10, 20)
|
44 |
+
```
|
45 |
+
|
46 |
+
In this case, the actual multiscale range is [self.random_size[0]\*32, self.random_size[1]\*32], i.e., [320, 640]
|
47 |
+
|
48 |
+
**Note: You must specify the self.input_size because it is used for initializing resize aug in dataset.**
|
49 |
+
|
50 |
+
## 3. Single Scale Training
|
51 |
+
|
52 |
+
If you want to train in a single scale. You need to specify the self.input_size and self.multiscale_range=0:
|
53 |
+
|
54 |
+
```python
|
55 |
+
self.input_size = (416, 416)
|
56 |
+
self.multiscale_range = 0
|
57 |
+
```
|
58 |
+
|
59 |
+
**DO NOT** set the self.random_size.
|
exps/default/nano.py
CHANGED
@@ -14,8 +14,9 @@ class Exp(MyExp):
|
|
14 |
super(Exp, self).__init__()
|
15 |
self.depth = 0.33
|
16 |
self.width = 0.25
|
17 |
-
self.
|
18 |
self.random_size = (10, 20)
|
|
|
19 |
self.test_size = (416, 416)
|
20 |
self.mosaic_prob = 0.5
|
21 |
self.enable_mixup = False
|
|
|
14 |
super(Exp, self).__init__()
|
15 |
self.depth = 0.33
|
16 |
self.width = 0.25
|
17 |
+
self.input_size = (416, 416)
|
18 |
self.random_size = (10, 20)
|
19 |
+
self.mosaic_scale = (0.5, 1.5)
|
20 |
self.test_size = (416, 416)
|
21 |
self.mosaic_prob = 0.5
|
22 |
self.enable_mixup = False
|
exps/default/yolov3.py
CHANGED
@@ -33,63 +33,3 @@ class Exp(MyExp):
|
|
33 |
|
34 |
return self.model
|
35 |
|
36 |
-
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
|
37 |
-
import torch.distributed as dist
|
38 |
-
|
39 |
-
from yolox.data import (
|
40 |
-
COCODataset,
|
41 |
-
DataLoader,
|
42 |
-
InfiniteSampler,
|
43 |
-
MosaicDetection,
|
44 |
-
TrainTransform,
|
45 |
-
YoloBatchSampler
|
46 |
-
)
|
47 |
-
|
48 |
-
dataset = COCODataset(
|
49 |
-
data_dir='data/COCO/',
|
50 |
-
json_file=self.train_ann,
|
51 |
-
img_size=self.input_size,
|
52 |
-
preproc=TrainTransform(
|
53 |
-
rgb_means=(0.485, 0.456, 0.406),
|
54 |
-
std=(0.229, 0.224, 0.225),
|
55 |
-
max_labels=50
|
56 |
-
),
|
57 |
-
)
|
58 |
-
|
59 |
-
dataset = MosaicDetection(
|
60 |
-
dataset,
|
61 |
-
mosaic=not no_aug,
|
62 |
-
img_size=self.input_size,
|
63 |
-
preproc=TrainTransform(
|
64 |
-
rgb_means=(0.485, 0.456, 0.406),
|
65 |
-
std=(0.229, 0.224, 0.225),
|
66 |
-
max_labels=120
|
67 |
-
),
|
68 |
-
degrees=self.degrees,
|
69 |
-
translate=self.translate,
|
70 |
-
scale=self.scale,
|
71 |
-
shear=self.shear,
|
72 |
-
perspective=self.perspective,
|
73 |
-
)
|
74 |
-
|
75 |
-
self.dataset = dataset
|
76 |
-
|
77 |
-
if is_distributed:
|
78 |
-
batch_size = batch_size // dist.get_world_size()
|
79 |
-
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
|
80 |
-
else:
|
81 |
-
sampler = torch.utils.data.RandomSampler(self.dataset)
|
82 |
-
|
83 |
-
batch_sampler = YoloBatchSampler(
|
84 |
-
sampler=sampler,
|
85 |
-
batch_size=batch_size,
|
86 |
-
drop_last=False,
|
87 |
-
input_dimension=self.input_size,
|
88 |
-
mosaic=not no_aug
|
89 |
-
)
|
90 |
-
|
91 |
-
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
92 |
-
dataloader_kwargs["batch_sampler"] = batch_sampler
|
93 |
-
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
94 |
-
|
95 |
-
return train_loader
|
|
|
33 |
|
34 |
return self.model
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exps/default/yolox_tiny.py
CHANGED
@@ -12,7 +12,8 @@ class Exp(MyExp):
|
|
12 |
super(Exp, self).__init__()
|
13 |
self.depth = 0.33
|
14 |
self.width = 0.375
|
15 |
-
self.
|
|
|
16 |
self.random_size = (10, 20)
|
17 |
self.test_size = (416, 416)
|
18 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
|
|
12 |
super(Exp, self).__init__()
|
13 |
self.depth = 0.33
|
14 |
self.width = 0.375
|
15 |
+
self.input_scale = (416, 416)
|
16 |
+
self.mosaic_scale = (0.5, 1.5)
|
17 |
self.random_size = (10, 20)
|
18 |
self.test_size = (416, 416)
|
19 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/example/custom/nano.py
CHANGED
@@ -14,7 +14,8 @@ class Exp(MyExp):
|
|
14 |
super(Exp, self).__init__()
|
15 |
self.depth = 0.33
|
16 |
self.width = 0.25
|
17 |
-
self.
|
|
|
18 |
self.random_size = (10, 20)
|
19 |
self.test_size = (416, 416)
|
20 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
|
|
14 |
super(Exp, self).__init__()
|
15 |
self.depth = 0.33
|
16 |
self.width = 0.25
|
17 |
+
self.input_size = (416, 416)
|
18 |
+
self.mosaic_scale = (0.5, 1.5)
|
19 |
self.random_size = (10, 20)
|
20 |
self.test_size = (416, 416)
|
21 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/example/yolox_voc/yolox_voc_s.py
CHANGED
@@ -49,7 +49,8 @@ class Exp(MyExp):
|
|
49 |
preproc=TrainTransform(max_labels=120),
|
50 |
degrees=self.degrees,
|
51 |
translate=self.translate,
|
52 |
-
|
|
|
53 |
shear=self.shear,
|
54 |
perspective=self.perspective,
|
55 |
enable_mixup=self.enable_mixup,
|
|
|
49 |
preproc=TrainTransform(max_labels=120),
|
50 |
degrees=self.degrees,
|
51 |
translate=self.translate,
|
52 |
+
mosaic_scale=self.mosaic_scale,
|
53 |
+
mixup_scale=self.mixup_scale,
|
54 |
shear=self.shear,
|
55 |
perspective=self.perspective,
|
56 |
enable_mixup=self.enable_mixup,
|
yolox/core/trainer.py
CHANGED
@@ -248,7 +248,7 @@ class Trainer:
|
|
248 |
self.meter.clear_meters()
|
249 |
|
250 |
# random resizing
|
251 |
-
if
|
252 |
self.input_size = self.exp.random_resize(
|
253 |
self.train_loader, self.epoch, self.rank, self.is_distributed
|
254 |
)
|
|
|
248 |
self.meter.clear_meters()
|
249 |
|
250 |
# random resizing
|
251 |
+
if (self.progress_in_iter + 1) % 10 == 0:
|
252 |
self.input_size = self.exp.random_resize(
|
253 |
self.train_loader, self.epoch, self.rank, self.is_distributed
|
254 |
)
|
yolox/data/datasets/mosaicdetection.py
CHANGED
@@ -39,9 +39,9 @@ class MosaicDetection(Dataset):
|
|
39 |
|
40 |
def __init__(
|
41 |
self, dataset, img_size, mosaic=True, preproc=None,
|
42 |
-
degrees=10.0, translate=0.1,
|
43 |
-
shear=2.0, perspective=0.0,
|
44 |
-
mosaic_prob=1.0, mixup_prob=1.0, *args
|
45 |
):
|
46 |
"""
|
47 |
|
@@ -52,8 +52,8 @@ class MosaicDetection(Dataset):
|
|
52 |
preproc (func):
|
53 |
degrees (float):
|
54 |
translate (float):
|
55 |
-
|
56 |
-
|
57 |
shear (float):
|
58 |
perspective (float):
|
59 |
enable_mixup (bool):
|
@@ -64,10 +64,10 @@ class MosaicDetection(Dataset):
|
|
64 |
self.preproc = preproc
|
65 |
self.degrees = degrees
|
66 |
self.translate = translate
|
67 |
-
self.scale =
|
68 |
self.shear = shear
|
69 |
self.perspective = perspective
|
70 |
-
self.mixup_scale =
|
71 |
self.enable_mosaic = mosaic
|
72 |
self.enable_mixup = enable_mixup
|
73 |
self.mosaic_prob = mosaic_prob
|
|
|
39 |
|
40 |
def __init__(
|
41 |
self, dataset, img_size, mosaic=True, preproc=None,
|
42 |
+
degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
|
43 |
+
mixup_scale=(0.5, 1.5), shear=2.0, perspective=0.0,
|
44 |
+
enable_mixup=True, mosaic_prob=1.0, mixup_prob=1.0, *args
|
45 |
):
|
46 |
"""
|
47 |
|
|
|
52 |
preproc (func):
|
53 |
degrees (float):
|
54 |
translate (float):
|
55 |
+
mosaic_scale (tuple):
|
56 |
+
mixup_scale (tuple):
|
57 |
shear (float):
|
58 |
perspective (float):
|
59 |
enable_mixup (bool):
|
|
|
64 |
self.preproc = preproc
|
65 |
self.degrees = degrees
|
66 |
self.translate = translate
|
67 |
+
self.scale = mosaic_scale
|
68 |
self.shear = shear
|
69 |
self.perspective = perspective
|
70 |
+
self.mixup_scale = mixup_scale
|
71 |
self.enable_mosaic = mosaic
|
72 |
self.enable_mixup = enable_mixup
|
73 |
self.mosaic_prob = mosaic_prob
|
yolox/exp/yolox_base.py
CHANGED
@@ -25,7 +25,12 @@ class Exp(BaseExp):
|
|
25 |
# set worker to 4 for shorter dataloader init time
|
26 |
self.data_num_workers = 4
|
27 |
self.input_size = (640, 640)
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
self.data_dir = None
|
30 |
self.train_ann = "instances_train2017.json"
|
31 |
self.val_ann = "instances_val2017.json"
|
@@ -35,8 +40,8 @@ class Exp(BaseExp):
|
|
35 |
self.mixup_prob = 1.0
|
36 |
self.degrees = 10.0
|
37 |
self.translate = 0.1
|
38 |
-
self.
|
39 |
-
self.
|
40 |
self.shear = 2.0
|
41 |
self.perspective = 0.0
|
42 |
self.enable_mixup = True
|
@@ -116,7 +121,8 @@ class Exp(BaseExp):
|
|
116 |
preproc=TrainTransform(max_labels=120),
|
117 |
degrees=self.degrees,
|
118 |
translate=self.translate,
|
119 |
-
|
|
|
120 |
shear=self.shear,
|
121 |
perspective=self.perspective,
|
122 |
enable_mixup=self.enable_mixup,
|
@@ -154,6 +160,10 @@ class Exp(BaseExp):
|
|
154 |
|
155 |
if rank == 0:
|
156 |
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
|
|
|
|
|
|
|
|
|
157 |
size = random.randint(*self.random_size)
|
158 |
size = (int(32 * size), 32 * int(size * size_factor))
|
159 |
tensor[0] = size[0]
|
|
|
25 |
# set worker to 4 for shorter dataloader init time
|
26 |
self.data_num_workers = 4
|
27 |
self.input_size = (640, 640)
|
28 |
+
# Actual multiscale ranges: [640-5*32, 640+5*32].
|
29 |
+
# To disable multiscale training, set the
|
30 |
+
# self.multiscale_range to 0.
|
31 |
+
self.multiscale_range = 5
|
32 |
+
# You can uncomment this line to specify a multiscale range
|
33 |
+
# self.random_size = (14, 26)
|
34 |
self.data_dir = None
|
35 |
self.train_ann = "instances_train2017.json"
|
36 |
self.val_ann = "instances_val2017.json"
|
|
|
40 |
self.mixup_prob = 1.0
|
41 |
self.degrees = 10.0
|
42 |
self.translate = 0.1
|
43 |
+
self.mosaic_scale = (0.1, 2)
|
44 |
+
self.mixup_scale = (0.5, 1.5)
|
45 |
self.shear = 2.0
|
46 |
self.perspective = 0.0
|
47 |
self.enable_mixup = True
|
|
|
121 |
preproc=TrainTransform(max_labels=120),
|
122 |
degrees=self.degrees,
|
123 |
translate=self.translate,
|
124 |
+
mosaic_scale=self.mosaic_scale,
|
125 |
+
mixup_scale=self.mixup_scale,
|
126 |
shear=self.shear,
|
127 |
perspective=self.perspective,
|
128 |
enable_mixup=self.enable_mixup,
|
|
|
160 |
|
161 |
if rank == 0:
|
162 |
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
|
163 |
+
if not hasattr(self, 'random_size'):
|
164 |
+
min_size = int(self.input_size[0] / 32) - self.multiscale_range
|
165 |
+
max_size = int(self.input_size[0] / 32) + self.multiscale_range
|
166 |
+
self.random_size = (min_size, max_size)
|
167 |
size = random.randint(*self.random_size)
|
168 |
size = (int(32 * size), 32 * int(size * size_factor))
|
169 |
tensor[0] = size[0]
|