fix(data): training bug for rectangle input (#620)
Browse filesThis PR fixes the performance degradation when training with rectangle image shapes.
docs/manipulate_training_image_size.md
CHANGED
@@ -6,7 +6,7 @@ This tutorial explains how to control your image size when training on your own
|
|
6 |
|
7 |
There are 3 hyperparamters control the training size:
|
8 |
|
9 |
-
- self.input_size = (640, 640)
|
10 |
- self.multiscale_range = 5
|
11 |
- self.random_size = (14, 26)
|
12 |
|
|
|
6 |
|
7 |
There are 3 hyperparamters control the training size:
|
8 |
|
9 |
+
- self.input_size = (640, 640)   #(height, width)
|
10 |
- self.multiscale_range = 5
|
11 |
- self.random_size = (14, 26)
|
12 |
|
yolox/exp/yolox_base.py
CHANGED
@@ -24,7 +24,7 @@ class Exp(BaseExp):
|
|
24 |
# ---------------- dataloader config ---------------- #
|
25 |
# set worker to 4 for shorter dataloader init time
|
26 |
self.data_num_workers = 4
|
27 |
-
self.input_size = (640, 640)
|
28 |
# Actual multiscale ranges: [640-5*32, 640+5*32].
|
29 |
# To disable multiscale training, set the
|
30 |
# self.multiscale_range to 0.
|
@@ -185,12 +185,14 @@ class Exp(BaseExp):
|
|
185 |
return input_size
|
186 |
|
187 |
def preprocess(self, inputs, targets, tsize):
|
188 |
-
|
189 |
-
|
|
|
190 |
inputs = nn.functional.interpolate(
|
191 |
inputs, size=tsize, mode="bilinear", align_corners=False
|
192 |
)
|
193 |
-
targets[..., 1
|
|
|
194 |
return inputs, targets
|
195 |
|
196 |
def get_optimizer(self, batch_size):
|
|
|
24 |
# ---------------- dataloader config ---------------- #
|
25 |
# set worker to 4 for shorter dataloader init time
|
26 |
self.data_num_workers = 4
|
27 |
+
self.input_size = (640, 640) # (height, width)
|
28 |
# Actual multiscale ranges: [640-5*32, 640+5*32].
|
29 |
# To disable multiscale training, set the
|
30 |
# self.multiscale_range to 0.
|
|
|
185 |
return input_size
|
186 |
|
187 |
def preprocess(self, inputs, targets, tsize):
|
188 |
+
scale_y = tsize[0] / self.input_size[0]
|
189 |
+
scale_x = tsize[1] / self.input_size[1]
|
190 |
+
if scale_x != 1 or scale_y != 1:
|
191 |
inputs = nn.functional.interpolate(
|
192 |
inputs, size=tsize, mode="bilinear", align_corners=False
|
193 |
)
|
194 |
+
targets[..., 1::2] = targets[..., 1::2] * scale_x
|
195 |
+
targets[..., 2::2] = targets[..., 2::2] * scale_y
|
196 |
return inputs, targets
|
197 |
|
198 |
def get_optimizer(self, batch_size):
|