Feng Wang commited on
Commit
39bbed9
·
1 Parent(s): ad34180

feat(YOLOX): add some basic experiments

Browse files
exps/nano.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+ import torch.nn as nn
7
+
8
+ from yolox.exp import Exp as MyExp
9
+
10
+
11
+ class Exp(MyExp):
12
+ def __init__(self):
13
+ super(Exp, self).__init__()
14
+ self.depth = 0.33
15
+ self.width = 0.25
16
+ self.scale = (0.5, 1.5)
17
+ self.random_size = (10, 20)
18
+ self.test_size = (416, 416)
19
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20
+ self.enable_mixup = False
21
+
22
+ def get_model(self, sublinear=False):
23
+
24
+ def init_yolo(M):
25
+ for m in M.modules():
26
+ if isinstance(m, nn.BatchNorm2d):
27
+ m.eps = 1e-3
28
+ m.momentum = 0.03
29
+ if "model" not in self.__dict__:
30
+ from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
31
+ in_channels = [256, 512, 1024]
32
+ # NANO model use depthwise = True, which is main difference.
33
+ backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
34
+ head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
35
+ self.model = YOLOX(backbone, head)
36
+
37
+ self.model.apply(init_yolo)
38
+ self.model.head.initialize_biases(1e-2)
39
+ return self.model
exps/yolov3.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+ import torch
7
+ import torch.nn as nn
8
+
9
+ from yolox.exp import Exp as MyExp
10
+
11
+
12
+ class Exp(MyExp):
13
+ def __init__(self):
14
+ super(Exp, self).__init__()
15
+ self.depth = 1.0
16
+ self.width = 1.0
17
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
18
+
19
+ def get_model(self, sublinear=False):
20
+ def init_yolo(M):
21
+ for m in M.modules():
22
+ if isinstance(m, nn.BatchNorm2d):
23
+ m.eps = 1e-3
24
+ m.momentum = 0.03
25
+ if "model" not in self.__dict__:
26
+ from yolox.models import YOLOX, YOLOFPN, YOLOXHead
27
+ backbone = YOLOFPN()
28
+ head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
29
+ self.model = YOLOX(backbone, head)
30
+ self.model.apply(init_yolo)
31
+ self.model.head.initialize_biases(1e-2)
32
+
33
+ return self.model
34
+
35
+ def get_data_loader(self, batch_size, is_distributed, no_aug=False):
36
+ from data.datasets.cocodataset import COCODataset
37
+ from data.datasets.mosaicdetection import MosaicDetection
38
+ from data.datasets.data_augment import TrainTransform
39
+ from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler
40
+ import torch.distributed as dist
41
+
42
+ dataset = COCODataset(
43
+ data_dir='data/COCO/',
44
+ json_file=self.train_ann,
45
+ img_size=self.input_size,
46
+ preproc=TrainTransform(
47
+ rgb_means=(0.485, 0.456, 0.406),
48
+ std=(0.229, 0.224, 0.225),
49
+ max_labels=50
50
+ ),
51
+ )
52
+
53
+ dataset = MosaicDetection(
54
+ dataset,
55
+ mosaic=not no_aug,
56
+ img_size=self.input_size,
57
+ preproc=TrainTransform(
58
+ rgb_means=(0.485, 0.456, 0.406),
59
+ std=(0.229, 0.224, 0.225),
60
+ max_labels=120
61
+ ),
62
+ degrees=self.degrees,
63
+ translate=self.translate,
64
+ scale=self.scale,
65
+ shear=self.shear,
66
+ perspective=self.perspective,
67
+ )
68
+
69
+ self.dataset = dataset
70
+
71
+ if is_distributed:
72
+ batch_size = batch_size // dist.get_world_size()
73
+ sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
74
+ else:
75
+ sampler = torch.utils.data.RandomSampler(self.dataset)
76
+
77
+ batch_sampler = YoloBatchSampler(
78
+ sampler=sampler,
79
+ batch_size=batch_size,
80
+ drop_last=False,
81
+ input_dimension=self.input_size,
82
+ mosaic=not no_aug
83
+ )
84
+
85
+ dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
86
+ dataloader_kwargs["batch_sampler"] = batch_sampler
87
+ train_loader = DataLoader(self.dataset, **dataloader_kwargs)
88
+
89
+ return train_loader
exps/yolox_l.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+
7
+ from yolox.exp import Exp as MyExp
8
+
9
+
10
+ class Exp(MyExp):
11
+ def __init__(self):
12
+ super(Exp, self).__init__()
13
+ self.depth = 1.0
14
+ self.width = 1.0
15
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
exps/yolox_m.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+
7
+ from yolox.exp import Exp as MyExp
8
+
9
+
10
+ class Exp(MyExp):
11
+ def __init__(self):
12
+ super(Exp, self).__init__()
13
+ self.depth = 0.67
14
+ self.width = 0.75
15
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
exps/yolox_s.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+
7
+ from yolox.exp import Exp as MyExp
8
+
9
+
10
+ class Exp(MyExp):
11
+ def __init__(self):
12
+ super(Exp, self).__init__()
13
+ self.depth = 0.33
14
+ self.width = 0.50
15
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
exps/yolox_tiny.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+
7
+ from yolox.exp import Exp as MyExp
8
+
9
+
10
+ class Exp(MyExp):
11
+ def __init__(self):
12
+ super(Exp, self).__init__()
13
+ self.depth = 0.33
14
+ self.width = 0.375
15
+ self.scale = (0.5, 1.5)
16
+ self.random_size = (10, 20)
17
+ self.test_size = (416, 416)
18
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
19
+ self.enable_mixup = False
exps/yolox_x.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
+ import os
6
+
7
+ from yolox.exp import Exp as MyExp
8
+
9
+
10
+ class Exp(MyExp):
11
+ def __init__(self):
12
+ super(Exp, self).__init__()
13
+ self.depth = 1.33
14
+ self.width = 1.25
15
+ self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
tools/demo.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import argparse
2
  import os
3
  import time
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
  import argparse
6
  import os
7
  import time
tools/eval.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import argparse
2
  import os
3
  import random
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
  import argparse
6
  import os
7
  import random
tools/export_onnx.py CHANGED
@@ -1,5 +1,6 @@
1
  #!/usr/bin/env python3
2
  # -*- coding:utf-8 -*-
 
3
 
4
  import argparse
5
  import os
 
1
  #!/usr/bin/env python3
2
  # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
 
5
  import argparse
6
  import os
tools/train.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import argparse
2
  import random
3
  import warnings
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
+
5
  import argparse
6
  import random
7
  import warnings
tools/trt.py CHANGED
@@ -1,8 +1,10 @@
1
  #!/usr/bin/env python3
2
  # -*- coding:utf-8 -*-
 
3
 
4
  import argparse
5
  import os
 
6
  from loguru import logger
7
 
8
  import tensorrt as trt
@@ -61,6 +63,14 @@ def main():
61
  )
62
  torch.save(model_trt.state_dict(), os.path.join(file_name, 'model_trt.pth'))
63
  logger.info("Converted TensorRT model done.")
 
 
 
 
 
 
 
 
64
 
65
 
66
  if __name__ == "__main__":
 
1
  #!/usr/bin/env python3
2
  # -*- coding:utf-8 -*-
3
+ # Copyright (c) Megvii, Inc. and its affiliates.
4
 
5
  import argparse
6
  import os
7
+ import shutil
8
  from loguru import logger
9
 
10
  import tensorrt as trt
 
63
  )
64
  torch.save(model_trt.state_dict(), os.path.join(file_name, 'model_trt.pth'))
65
  logger.info("Converted TensorRT model done.")
66
+ engine_file = os.path.join(file_name, 'model_trt.engine')
67
+ engine_file_demo = os.path.join('yolox', 'deploy', 'demo_trt_c++', 'model_trt.engine')
68
+ with open(engine_file, 'wb') as f:
69
+ f.write(model_trt.engine.serialize())
70
+
71
+ shutil.copyfile(engine_file, engine_file_demo)
72
+
73
+ logger.info("Converted TensorRT model engine file is saved for C++ inference.")
74
 
75
 
76
  if __name__ == "__main__":
yolox/core/trainer.py CHANGED
@@ -283,7 +283,7 @@ class Trainer:
283
  logger.info("loading checkpoint for fine tuning")
284
  ckpt_file = self.args.ckpt
285
  ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
286
- model = load_ckpt(self.model, ckpt)
287
  self.start_epoch = 0
288
 
289
  return model
 
283
  logger.info("loading checkpoint for fine tuning")
284
  ckpt_file = self.args.ckpt
285
  ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
286
+ model = load_ckpt(model, ckpt)
287
  self.start_epoch = 0
288
 
289
  return model
yolox/data/data_augment.py CHANGED
@@ -162,97 +162,6 @@ def _mirror(image, boxes):
162
  return image, boxes
163
 
164
 
165
- # TODO: reorg: use mosaicDet instead
166
- def _random_affine(
167
- img,
168
- targets=None,
169
- degrees=(-10, 10),
170
- translate=(0.1, 0.1),
171
- scale=(0.9, 1.1),
172
- shear=(-2, 2),
173
- borderValue=(114, 114, 114),
174
- ):
175
- # degrees = (0, 0)
176
- # shear = (0, 0)
177
- border = 0 # width of added border (optional)
178
- # height = max(img.shape[0], img.shape[1]) + border * 2
179
- height, width, _ = img.shape
180
-
181
- # Rotation and Scale
182
- R = np.eye(3)
183
- a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
184
- # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
185
- s = random.random() * (scale[1] - scale[0]) + scale[0]
186
- R[:2] = cv2.getRotationMatrix2D(
187
- angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s
188
- )
189
-
190
- # Translation
191
- T = np.eye(3)
192
- # x translation (pixels)
193
- T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border
194
- # y translation (pixels)
195
- T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border
196
-
197
- # Shear
198
- S = np.eye(3)
199
- # x shear (deg)
200
- S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)
201
- # y shear (deg)
202
- S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)
203
-
204
- # Combined rotation matrix. NOTE: ORDER IS IMPORTANT HERE!!
205
- M = S @ T @ R
206
- # BGR order borderValue
207
- imw = cv2.warpPerspective(
208
- img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=borderValue
209
- )
210
-
211
- # Return warped points also
212
- if targets is not None:
213
- if len(targets) > 0:
214
- n = targets.shape[0]
215
- points = targets[:, 0:4].copy()
216
-
217
- # warp points
218
- xy = np.ones((n * 4, 3))
219
- xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
220
- n * 4, 2
221
- ) # x1y1, x2y2, x1y2, x2y1
222
- xy = (xy @ M.T)[:, :2].reshape(n, 8)
223
-
224
- # create new boxes
225
- x = xy[:, [0, 2, 4, 6]]
226
- y = xy[:, [1, 3, 5, 7]]
227
- xy = (
228
- np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
229
- )
230
-
231
- # apply angle-based reduction
232
- radians = a * math.pi / 180
233
- reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
234
- x = (xy[:, 2] + xy[:, 0]) / 2
235
- y = (xy[:, 3] + xy[:, 1]) / 2
236
- w = (xy[:, 2] - xy[:, 0]) * reduction
237
- h = (xy[:, 3] - xy[:, 1]) * reduction
238
- xy = (
239
- np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2))
240
- .reshape(4, n)
241
- .T
242
- )
243
-
244
- # reject warped points outside of image
245
- x1 = np.clip(xy[:, 0], 0, width)
246
- y1 = np.clip(xy[:, 1], 0, height)
247
- x2 = np.clip(xy[:, 2], 0, width)
248
- y2 = np.clip(xy[:, 3], 0, height)
249
- boxes = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T
250
-
251
- return imw, boxes, M
252
- else:
253
- return imw
254
-
255
-
256
  def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
257
  if len(image.shape) == 3:
258
  padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
 
162
  return image, boxes
163
 
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
166
  if len(image.shape) == 3:
167
  padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
yolox/data/datasets/mosaicdetection.py CHANGED
@@ -48,10 +48,10 @@ class MosaicDetection(Dataset):
48
  def __getitem__(self, idx):
49
  if self._mosaic:
50
  labels4 = []
51
- s = self._dataset.input_dim[0]
52
  # yc, xc = s, s # mosaic center x, y
53
- yc = int(random.uniform(0.5 * s, 1.5 * s))
54
- xc = int(random.uniform(0.5 * s, 1.5 * s))
55
 
56
  # 3 additional image indices
57
  indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
@@ -59,26 +59,28 @@ class MosaicDetection(Dataset):
59
  for i, index in enumerate(indices):
60
  img, _labels, _, _ = self._dataset.pull_item(index)
61
  h0, w0 = img.shape[:2] # orig hw
62
- r = 1.0 * s / max(h0, w0) # resize image to img_size
63
  interp = cv2.INTER_LINEAR
64
- img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
65
  (h, w) = img.shape[:2]
66
 
67
  if i == 0: # top left
68
  # base image with 4 tiles
69
- img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)
 
 
70
  # xmin, ymin, xmax, ymax (large image)
71
  x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
72
  # xmin, ymin, xmax, ymax (small image)
73
  x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
74
  elif i == 1: # top right
75
- x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
76
  x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
77
  elif i == 2: # bottom left
78
- x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
79
  x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
80
  elif i == 3: # bottom right
81
- x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
82
  x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
83
 
84
  img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
@@ -87,15 +89,20 @@ class MosaicDetection(Dataset):
87
 
88
  labels = _labels.copy() # [[xmin, ymin, xmax, ymax, label_ind], ... ]
89
  if _labels.size > 0: # Normalized xywh to pixel xyxy format
90
- labels[:, 0] = r * _labels[:, 0] + padw
91
- labels[:, 1] = r * _labels[:, 1] + padh
92
- labels[:, 2] = r * _labels[:, 2] + padw
93
- labels[:, 3] = r * _labels[:, 3] + padh
 
94
  labels4.append(labels)
95
 
96
  if len(labels4):
97
  labels4 = np.concatenate(labels4, 0)
98
- np.clip(labels4[:, :4], 0, 2 * s, out=labels4[:, :4]) # use with random_affine
 
 
 
 
99
  img4, labels4 = random_perspective(
100
  img4,
101
  labels4,
@@ -104,7 +111,7 @@ class MosaicDetection(Dataset):
104
  scale=self.scale,
105
  shear=self.shear,
106
  perspective=self.perspective,
107
- border=[-s // 2, -s // 2],
108
  ) # border to remove
109
 
110
  # -----------------------------------------------------------------
@@ -124,7 +131,6 @@ class MosaicDetection(Dataset):
124
  return img, label, img_info, int(idx)
125
 
126
  def mixup(self, origin_img, origin_labels, input_dim):
127
- # jit_factor = random.uniform(0.8, 1.2)
128
  jit_factor = random.uniform(*self.mixup_scale)
129
  FLIP = random.uniform(0, 1) > 0.5
130
  cp_labels = []
@@ -139,7 +145,7 @@ class MosaicDetection(Dataset):
139
  cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0
140
  else:
141
  cp_img = np.ones(input_dim) * 114.0
142
- cp_scale_ratio = input_dim[0] / max(img.shape[0], img.shape[1])
143
  resized_img = cv2.resize(
144
  img,
145
  (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
 
48
  def __getitem__(self, idx):
49
  if self._mosaic:
50
  labels4 = []
51
+ input_dim = self._dataset.input_dim
52
  # yc, xc = s, s # mosaic center x, y
53
+ yc = int(random.uniform(0.5 * input_dim[0], 1.5 * input_dim[0]))
54
+ xc = int(random.uniform(0.5 * input_dim[1], 1.5 * input_dim[1]))
55
 
56
  # 3 additional image indices
57
  indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
 
59
  for i, index in enumerate(indices):
60
  img, _labels, _, _ = self._dataset.pull_item(index)
61
  h0, w0 = img.shape[:2] # orig hw
62
+ scale = min(1. * input_dim[0] / h0, 1. * input_dim[1] / w0)
63
  interp = cv2.INTER_LINEAR
64
+ img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=interp)
65
  (h, w) = img.shape[:2]
66
 
67
  if i == 0: # top left
68
  # base image with 4 tiles
69
+ img4 = np.full(
70
+ (input_dim[0] * 2, input_dim[1] * 2, img.shape[2]), 114, dtype=np.uint8
71
+ )
72
  # xmin, ymin, xmax, ymax (large image)
73
  x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
74
  # xmin, ymin, xmax, ymax (small image)
75
  x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
76
  elif i == 1: # top right
77
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, input_dim[1] * 2), yc
78
  x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
79
  elif i == 2: # bottom left
80
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(input_dim[0] * 2, yc + h)
81
  x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
82
  elif i == 3: # bottom right
83
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, input_dim[1] * 2), min(input_dim[0] * 2, yc + h) # noqa
84
  x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
85
 
86
  img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
 
89
 
90
  labels = _labels.copy() # [[xmin, ymin, xmax, ymax, label_ind], ... ]
91
  if _labels.size > 0: # Normalized xywh to pixel xyxy format
92
+ labels[:, 0] = scale * _labels[:, 0] + padw
93
+ labels[:, 1] = scale * _labels[:, 1] + padh
94
+ labels[:, 2] = scale * _labels[:, 2] + padw
95
+ labels[:, 3] = scale * _labels[:, 3] + padh
96
+
97
  labels4.append(labels)
98
 
99
  if len(labels4):
100
  labels4 = np.concatenate(labels4, 0)
101
+ np.clip(labels4[:, 0], 0, 2 * input_dim[1], out=labels4[:, 0])
102
+ np.clip(labels4[:, 1], 0, 2 * input_dim[0], out=labels4[:, 1])
103
+ np.clip(labels4[:, 2], 0, 2 * input_dim[1], out=labels4[:, 2])
104
+ np.clip(labels4[:, 3], 0, 2 * input_dim[0], out=labels4[:, 3])
105
+
106
  img4, labels4 = random_perspective(
107
  img4,
108
  labels4,
 
111
  scale=self.scale,
112
  shear=self.shear,
113
  perspective=self.perspective,
114
+ border=[-input_dim[0] // 2, -input_dim[1] // 2],
115
  ) # border to remove
116
 
117
  # -----------------------------------------------------------------
 
131
  return img, label, img_info, int(idx)
132
 
133
  def mixup(self, origin_img, origin_labels, input_dim):
 
134
  jit_factor = random.uniform(*self.mixup_scale)
135
  FLIP = random.uniform(0, 1) > 0.5
136
  cp_labels = []
 
145
  cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0
146
  else:
147
  cp_img = np.ones(input_dim) * 114.0
148
+ cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
149
  resized_img = cv2.resize(
150
  img,
151
  (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
yolox/exp/build.py CHANGED
@@ -25,11 +25,12 @@ def get_exp_by_name(exp_name):
25
  "yolox-m": "yolox_l.py",
26
  "yolox-l": "yolox_l.py",
27
  "yolox-x": "yolox_x.py",
 
28
  "yolox-nano": "nano.py",
29
  "yolov3": "yolov3.py",
30
  }
31
  filename = filedict[exp_name]
32
- exp_path = os.path.join(yolox_path, "exps", "base", filename)
33
  return get_exp_by_file(exp_path)
34
 
35
 
 
25
  "yolox-m": "yolox_l.py",
26
  "yolox-l": "yolox_l.py",
27
  "yolox-x": "yolox_x.py",
28
+ "yolox-tiny": "yolox_tiny.py",
29
  "yolox-nano": "nano.py",
30
  "yolov3": "yolov3.py",
31
  }
32
  filename = filedict[exp_name]
33
+ exp_path = os.path.join(yolox_path, "exps", filename)
34
  return get_exp_by_file(exp_path)
35
 
36
 
yolox/exp/yolox_base.py CHANGED
@@ -141,22 +141,21 @@ class Exp(BaseExp):
141
  return train_loader
142
 
143
  def random_resize(self, data_loader, epoch, rank, is_distributed):
144
- tensor = torch.LongTensor(1).cuda()
145
 
146
  if rank == 0:
147
- if epoch >= self.max_epoch - 1:
148
- size = self.input_size[0]
149
- else:
150
- size = random.randint(*self.random_size)
151
- size = int(32 * size)
152
- tensor.fill_(size)
153
 
154
  if is_distributed:
155
  dist.barrier()
156
  dist.broadcast(tensor, 0)
157
 
158
  input_size = data_loader.change_input_dim(
159
- multiple=tensor.item(), random_range=None
160
  )
161
  return input_size
162
 
 
141
  return train_loader
142
 
143
  def random_resize(self, data_loader, epoch, rank, is_distributed):
144
+ tensor = torch.LongTensor(2).cuda()
145
 
146
  if rank == 0:
147
+ size_factor = self.input_size[1] * 1. / self.input_size[0]
148
+ size = random.randint(*self.random_size)
149
+ size = (int(32 * size), 32 * int(size * size_factor))
150
+ tensor[0] = size[0]
151
+ tensor[1] = size[1]
 
152
 
153
  if is_distributed:
154
  dist.barrier()
155
  dist.broadcast(tensor, 0)
156
 
157
  input_size = data_loader.change_input_dim(
158
+ multiple=(tensor[0].item(), tensor[1].item()), random_range=None
159
  )
160
  return input_size
161
 
yolox/models/darknet.py CHANGED
@@ -86,7 +86,11 @@ class Darknet(nn.Module):
86
 
87
  class CSPDarknet(nn.Module):
88
 
89
- def __init__(self, dep_mul, wid_mul, out_features=("dark3", "dark4", "dark5"), depthwise=False):
 
 
 
 
90
  super().__init__()
91
  assert out_features, "please provide output features of Darknet"
92
  self.out_features = out_features
@@ -96,33 +100,42 @@ class CSPDarknet(nn.Module):
96
  base_depth = max(round(dep_mul * 3), 1) # 3
97
 
98
  # stem
99
- self.stem = Focus(3, base_channels, ksize=3)
100
 
101
  # dark2
102
  self.dark2 = nn.Sequential(
103
- Conv(base_channels, base_channels * 2, 3, 2),
104
- CSPLayer(base_channels * 2, base_channels * 2, n=base_depth, depthwise=depthwise),
 
 
 
105
  )
106
 
107
  # dark3
108
  self.dark3 = nn.Sequential(
109
- Conv(base_channels * 2, base_channels * 4, 3, 2),
110
- CSPLayer(base_channels * 4, base_channels * 4, n=base_depth * 3, depthwise=depthwise),
 
 
 
111
  )
112
 
113
  # dark4
114
  self.dark4 = nn.Sequential(
115
- Conv(base_channels * 4, base_channels * 8, 3, 2),
116
- CSPLayer(base_channels * 8, base_channels * 8, n=base_depth * 3, depthwise=depthwise),
 
 
 
117
  )
118
 
119
  # dark5
120
  self.dark5 = nn.Sequential(
121
- Conv(base_channels * 8, base_channels * 16, 3, 2),
122
- SPPBottleneck(base_channels * 16, base_channels * 16),
123
  CSPLayer(
124
  base_channels * 16, base_channels * 16, n=base_depth,
125
- shortcut=False, depthwise=depthwise,
126
  ),
127
  )
128
 
 
86
 
87
  class CSPDarknet(nn.Module):
88
 
89
+ def __init__(
90
+ self, dep_mul, wid_mul,
91
+ out_features=("dark3", "dark4", "dark5"),
92
+ depthwise=False, act="silu",
93
+ ):
94
  super().__init__()
95
  assert out_features, "please provide output features of Darknet"
96
  self.out_features = out_features
 
100
  base_depth = max(round(dep_mul * 3), 1) # 3
101
 
102
  # stem
103
+ self.stem = Focus(3, base_channels, ksize=3, act=act)
104
 
105
  # dark2
106
  self.dark2 = nn.Sequential(
107
+ Conv(base_channels, base_channels * 2, 3, 2, act=act),
108
+ CSPLayer(
109
+ base_channels * 2, base_channels * 2,
110
+ n=base_depth, depthwise=depthwise, act=act
111
+ ),
112
  )
113
 
114
  # dark3
115
  self.dark3 = nn.Sequential(
116
+ Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
117
+ CSPLayer(
118
+ base_channels * 4, base_channels * 4,
119
+ n=base_depth * 3, depthwise=depthwise, act=act,
120
+ ),
121
  )
122
 
123
  # dark4
124
  self.dark4 = nn.Sequential(
125
+ Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
126
+ CSPLayer(
127
+ base_channels * 8, base_channels * 8,
128
+ n=base_depth * 3, depthwise=depthwise, act=act,
129
+ ),
130
  )
131
 
132
  # dark5
133
  self.dark5 = nn.Sequential(
134
+ Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
135
+ SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
136
  CSPLayer(
137
  base_channels * 16, base_channels * 16, n=base_depth,
138
+ shortcut=False, depthwise=depthwise, act=act,
139
  ),
140
  )
141
 
yolox/models/network_blocks.py CHANGED
@@ -72,12 +72,15 @@ class DWConv(nn.Module):
72
 
73
  class Bottleneck(nn.Module):
74
  # Standard bottleneck
75
- def __init__(self, in_channels, out_channels, shortcut=True, expansion=0.5, depthwise=False):
 
 
 
76
  super().__init__()
77
  hidden_channels = int(out_channels * expansion)
78
  Conv = DWConv if depthwise else BaseConv
79
- self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1)
80
- self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1)
81
  self.use_add = shortcut and in_channels == out_channels
82
 
83
  def forward(self, x):
@@ -124,7 +127,7 @@ class CSPLayer(nn.Module):
124
 
125
  def __init__(
126
  self, in_channels, out_channels, n=1,
127
- shortcut=True, expansion=0.5, depthwise=False
128
  ):
129
  """
130
  Args:
@@ -135,11 +138,11 @@ class CSPLayer(nn.Module):
135
  # ch_in, ch_out, number, shortcut, groups, expansion
136
  super().__init__()
137
  hidden_channels = int(out_channels * expansion) # hidden channels
138
- self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1)
139
- self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1)
140
- self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1) # act=FReLU(c2)
141
  module_list = [
142
- Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise)
143
  for _ in range(n)
144
  ]
145
  self.m = nn.Sequential(*module_list)
@@ -155,9 +158,9 @@ class CSPLayer(nn.Module):
155
  class Focus(nn.Module):
156
  """Focus width and height information into channel space."""
157
 
158
- def __init__(self, in_channels, out_channels, ksize=1, stride=1):
159
  super().__init__()
160
- self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride)
161
 
162
  def forward(self, x):
163
  # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)
 
72
 
73
  class Bottleneck(nn.Module):
74
  # Standard bottleneck
75
+ def __init__(
76
+ self, in_channels, out_channels, shortcut=True,
77
+ expansion=0.5, depthwise=False, act="silu"
78
+ ):
79
  super().__init__()
80
  hidden_channels = int(out_channels * expansion)
81
  Conv = DWConv if depthwise else BaseConv
82
+ self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
83
+ self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act)
84
  self.use_add = shortcut and in_channels == out_channels
85
 
86
  def forward(self, x):
 
127
 
128
  def __init__(
129
  self, in_channels, out_channels, n=1,
130
+ shortcut=True, expansion=0.5, depthwise=False, act="silu"
131
  ):
132
  """
133
  Args:
 
138
  # ch_in, ch_out, number, shortcut, groups, expansion
139
  super().__init__()
140
  hidden_channels = int(out_channels * expansion) # hidden channels
141
+ self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
142
+ self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
143
+ self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)
144
  module_list = [
145
+ Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act)
146
  for _ in range(n)
147
  ]
148
  self.m = nn.Sequential(*module_list)
 
158
  class Focus(nn.Module):
159
  """Focus width and height information into channel space."""
160
 
161
+ def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"):
162
  super().__init__()
163
+ self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)
164
 
165
  def forward(self, x):
166
  # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)
yolox/models/yolo_pafpn.py CHANGED
@@ -16,17 +16,17 @@ class YOLOPAFPN(nn.Module):
16
 
17
  def __init__(
18
  self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
19
- in_channels=[256, 512, 1024], depthwise=False,
20
  ):
21
  super().__init__()
22
- self.backbone = CSPDarknet(depth, width, depthwise=depthwise)
23
  self.in_features = in_features
24
  self.in_channels = in_channels
25
  Conv = DWConv if depthwise else BaseConv
26
 
27
  self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
28
  self.lateral_conv0 = BaseConv(
29
- int(in_channels[2] * width), int(in_channels[1] * width), 1, 1
30
  )
31
  self.C3_p4 = CSPLayer(
32
  int(2 * in_channels[1] * width),
@@ -34,10 +34,11 @@ class YOLOPAFPN(nn.Module):
34
  round(3 * depth),
35
  False,
36
  depthwise=depthwise,
 
37
  ) # cat
38
 
39
  self.reduce_conv1 = BaseConv(
40
- int(in_channels[1] * width), int(in_channels[0] * width), 1, 1
41
  )
42
  self.C3_p3 = CSPLayer(
43
  int(2 * in_channels[0] * width),
@@ -45,26 +46,33 @@ class YOLOPAFPN(nn.Module):
45
  round(3 * depth),
46
  False,
47
  depthwise=depthwise,
 
48
  )
49
 
50
  # bottom-up conv
51
- self.bu_conv2 = Conv(int(in_channels[0] * width), int(in_channels[0] * width), 3, 2)
 
 
52
  self.C3_n3 = CSPLayer(
53
  int(2 * in_channels[0] * width),
54
  int(in_channels[1] * width),
55
  round(3 * depth),
56
  False,
57
  depthwise=depthwise,
 
58
  )
59
 
60
  # bottom-up conv
61
- self.bu_conv1 = Conv(int(in_channels[1] * width), int(in_channels[1] * width), 3, 2)
 
 
62
  self.C3_n4 = CSPLayer(
63
  int(2 * in_channels[1] * width),
64
  int(in_channels[2] * width),
65
  round(3 * depth),
66
  False,
67
  depthwise=depthwise,
 
68
  )
69
 
70
  def forward(self, input):
 
16
 
17
  def __init__(
18
  self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
19
+ in_channels=[256, 512, 1024], depthwise=False, act="silu",
20
  ):
21
  super().__init__()
22
+ self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
23
  self.in_features = in_features
24
  self.in_channels = in_channels
25
  Conv = DWConv if depthwise else BaseConv
26
 
27
  self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
28
  self.lateral_conv0 = BaseConv(
29
+ int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
30
  )
31
  self.C3_p4 = CSPLayer(
32
  int(2 * in_channels[1] * width),
 
34
  round(3 * depth),
35
  False,
36
  depthwise=depthwise,
37
+ act=act,
38
  ) # cat
39
 
40
  self.reduce_conv1 = BaseConv(
41
+ int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
42
  )
43
  self.C3_p3 = CSPLayer(
44
  int(2 * in_channels[0] * width),
 
46
  round(3 * depth),
47
  False,
48
  depthwise=depthwise,
49
+ act=act,
50
  )
51
 
52
  # bottom-up conv
53
+ self.bu_conv2 = Conv(
54
+ int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
55
+ )
56
  self.C3_n3 = CSPLayer(
57
  int(2 * in_channels[0] * width),
58
  int(in_channels[1] * width),
59
  round(3 * depth),
60
  False,
61
  depthwise=depthwise,
62
+ act=act,
63
  )
64
 
65
  # bottom-up conv
66
+ self.bu_conv1 = Conv(
67
+ int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
68
+ )
69
  self.C3_n4 = CSPLayer(
70
  int(2 * in_channels[1] * width),
71
  int(in_channels[2] * width),
72
  round(3 * depth),
73
  False,
74
  depthwise=depthwise,
75
+ act=act,
76
  )
77
 
78
  def forward(self, input):
yolox/utils/visualize.py CHANGED
@@ -5,6 +5,8 @@
5
  import cv2
6
  import numpy as np
7
 
 
 
8
 
9
  def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
10
 
 
5
  import cv2
6
  import numpy as np
7
 
8
+ __all__ = ["vis"]
9
+
10
 
11
  def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
12