Feng Wang
commited on
Commit
·
39bbed9
1
Parent(s):
ad34180
feat(YOLOX): add some basic experiments
Browse files- exps/nano.py +39 -0
- exps/yolov3.py +89 -0
- exps/yolox_l.py +15 -0
- exps/yolox_m.py +15 -0
- exps/yolox_s.py +15 -0
- exps/yolox_tiny.py +19 -0
- exps/yolox_x.py +15 -0
- tools/demo.py +4 -0
- tools/eval.py +4 -0
- tools/export_onnx.py +1 -0
- tools/train.py +4 -0
- tools/trt.py +10 -0
- yolox/core/trainer.py +1 -1
- yolox/data/data_augment.py +0 -91
- yolox/data/datasets/mosaicdetection.py +23 -17
- yolox/exp/build.py +2 -1
- yolox/exp/yolox_base.py +7 -8
- yolox/models/darknet.py +24 -11
- yolox/models/network_blocks.py +13 -10
- yolox/models/yolo_pafpn.py +14 -6
- yolox/utils/visualize.py +2 -0
exps/nano.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
import torch.nn as nn
|
7 |
+
|
8 |
+
from yolox.exp import Exp as MyExp
|
9 |
+
|
10 |
+
|
11 |
+
class Exp(MyExp):
|
12 |
+
def __init__(self):
|
13 |
+
super(Exp, self).__init__()
|
14 |
+
self.depth = 0.33
|
15 |
+
self.width = 0.25
|
16 |
+
self.scale = (0.5, 1.5)
|
17 |
+
self.random_size = (10, 20)
|
18 |
+
self.test_size = (416, 416)
|
19 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
20 |
+
self.enable_mixup = False
|
21 |
+
|
22 |
+
def get_model(self, sublinear=False):
|
23 |
+
|
24 |
+
def init_yolo(M):
|
25 |
+
for m in M.modules():
|
26 |
+
if isinstance(m, nn.BatchNorm2d):
|
27 |
+
m.eps = 1e-3
|
28 |
+
m.momentum = 0.03
|
29 |
+
if "model" not in self.__dict__:
|
30 |
+
from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
|
31 |
+
in_channels = [256, 512, 1024]
|
32 |
+
# NANO model use depthwise = True, which is main difference.
|
33 |
+
backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
|
34 |
+
head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
|
35 |
+
self.model = YOLOX(backbone, head)
|
36 |
+
|
37 |
+
self.model.apply(init_yolo)
|
38 |
+
self.model.head.initialize_biases(1e-2)
|
39 |
+
return self.model
|
exps/yolov3.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
|
9 |
+
from yolox.exp import Exp as MyExp
|
10 |
+
|
11 |
+
|
12 |
+
class Exp(MyExp):
|
13 |
+
def __init__(self):
|
14 |
+
super(Exp, self).__init__()
|
15 |
+
self.depth = 1.0
|
16 |
+
self.width = 1.0
|
17 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
18 |
+
|
19 |
+
def get_model(self, sublinear=False):
|
20 |
+
def init_yolo(M):
|
21 |
+
for m in M.modules():
|
22 |
+
if isinstance(m, nn.BatchNorm2d):
|
23 |
+
m.eps = 1e-3
|
24 |
+
m.momentum = 0.03
|
25 |
+
if "model" not in self.__dict__:
|
26 |
+
from yolox.models import YOLOX, YOLOFPN, YOLOXHead
|
27 |
+
backbone = YOLOFPN()
|
28 |
+
head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
|
29 |
+
self.model = YOLOX(backbone, head)
|
30 |
+
self.model.apply(init_yolo)
|
31 |
+
self.model.head.initialize_biases(1e-2)
|
32 |
+
|
33 |
+
return self.model
|
34 |
+
|
35 |
+
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
|
36 |
+
from data.datasets.cocodataset import COCODataset
|
37 |
+
from data.datasets.mosaicdetection import MosaicDetection
|
38 |
+
from data.datasets.data_augment import TrainTransform
|
39 |
+
from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler
|
40 |
+
import torch.distributed as dist
|
41 |
+
|
42 |
+
dataset = COCODataset(
|
43 |
+
data_dir='data/COCO/',
|
44 |
+
json_file=self.train_ann,
|
45 |
+
img_size=self.input_size,
|
46 |
+
preproc=TrainTransform(
|
47 |
+
rgb_means=(0.485, 0.456, 0.406),
|
48 |
+
std=(0.229, 0.224, 0.225),
|
49 |
+
max_labels=50
|
50 |
+
),
|
51 |
+
)
|
52 |
+
|
53 |
+
dataset = MosaicDetection(
|
54 |
+
dataset,
|
55 |
+
mosaic=not no_aug,
|
56 |
+
img_size=self.input_size,
|
57 |
+
preproc=TrainTransform(
|
58 |
+
rgb_means=(0.485, 0.456, 0.406),
|
59 |
+
std=(0.229, 0.224, 0.225),
|
60 |
+
max_labels=120
|
61 |
+
),
|
62 |
+
degrees=self.degrees,
|
63 |
+
translate=self.translate,
|
64 |
+
scale=self.scale,
|
65 |
+
shear=self.shear,
|
66 |
+
perspective=self.perspective,
|
67 |
+
)
|
68 |
+
|
69 |
+
self.dataset = dataset
|
70 |
+
|
71 |
+
if is_distributed:
|
72 |
+
batch_size = batch_size // dist.get_world_size()
|
73 |
+
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
|
74 |
+
else:
|
75 |
+
sampler = torch.utils.data.RandomSampler(self.dataset)
|
76 |
+
|
77 |
+
batch_sampler = YoloBatchSampler(
|
78 |
+
sampler=sampler,
|
79 |
+
batch_size=batch_size,
|
80 |
+
drop_last=False,
|
81 |
+
input_dimension=self.input_size,
|
82 |
+
mosaic=not no_aug
|
83 |
+
)
|
84 |
+
|
85 |
+
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
86 |
+
dataloader_kwargs["batch_sampler"] = batch_sampler
|
87 |
+
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
88 |
+
|
89 |
+
return train_loader
|
exps/yolox_l.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
from yolox.exp import Exp as MyExp
|
8 |
+
|
9 |
+
|
10 |
+
class Exp(MyExp):
|
11 |
+
def __init__(self):
|
12 |
+
super(Exp, self).__init__()
|
13 |
+
self.depth = 1.0
|
14 |
+
self.width = 1.0
|
15 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/yolox_m.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
from yolox.exp import Exp as MyExp
|
8 |
+
|
9 |
+
|
10 |
+
class Exp(MyExp):
|
11 |
+
def __init__(self):
|
12 |
+
super(Exp, self).__init__()
|
13 |
+
self.depth = 0.67
|
14 |
+
self.width = 0.75
|
15 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/yolox_s.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
from yolox.exp import Exp as MyExp
|
8 |
+
|
9 |
+
|
10 |
+
class Exp(MyExp):
|
11 |
+
def __init__(self):
|
12 |
+
super(Exp, self).__init__()
|
13 |
+
self.depth = 0.33
|
14 |
+
self.width = 0.50
|
15 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/yolox_tiny.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
from yolox.exp import Exp as MyExp
|
8 |
+
|
9 |
+
|
10 |
+
class Exp(MyExp):
|
11 |
+
def __init__(self):
|
12 |
+
super(Exp, self).__init__()
|
13 |
+
self.depth = 0.33
|
14 |
+
self.width = 0.375
|
15 |
+
self.scale = (0.5, 1.5)
|
16 |
+
self.random_size = (10, 20)
|
17 |
+
self.test_size = (416, 416)
|
18 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
19 |
+
self.enable_mixup = False
|
exps/yolox_x.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
from yolox.exp import Exp as MyExp
|
8 |
+
|
9 |
+
|
10 |
+
class Exp(MyExp):
|
11 |
+
def __init__(self):
|
12 |
+
super(Exp, self).__init__()
|
13 |
+
self.depth = 1.33
|
14 |
+
self.width = 1.25
|
15 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
tools/demo.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
import argparse
|
2 |
import os
|
3 |
import time
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
import argparse
|
6 |
import os
|
7 |
import time
|
tools/eval.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
import argparse
|
2 |
import os
|
3 |
import random
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
import argparse
|
6 |
import os
|
7 |
import random
|
tools/export_onnx.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
# -*- coding:utf-8 -*-
|
|
|
3 |
|
4 |
import argparse
|
5 |
import os
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
|
5 |
import argparse
|
6 |
import os
|
tools/train.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
import argparse
|
2 |
import random
|
3 |
import warnings
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
import argparse
|
6 |
import random
|
7 |
import warnings
|
tools/trt.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
# -*- coding:utf-8 -*-
|
|
|
3 |
|
4 |
import argparse
|
5 |
import os
|
|
|
6 |
from loguru import logger
|
7 |
|
8 |
import tensorrt as trt
|
@@ -61,6 +63,14 @@ def main():
|
|
61 |
)
|
62 |
torch.save(model_trt.state_dict(), os.path.join(file_name, 'model_trt.pth'))
|
63 |
logger.info("Converted TensorRT model done.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
|
66 |
if __name__ == "__main__":
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
|
5 |
import argparse
|
6 |
import os
|
7 |
+
import shutil
|
8 |
from loguru import logger
|
9 |
|
10 |
import tensorrt as trt
|
|
|
63 |
)
|
64 |
torch.save(model_trt.state_dict(), os.path.join(file_name, 'model_trt.pth'))
|
65 |
logger.info("Converted TensorRT model done.")
|
66 |
+
engine_file = os.path.join(file_name, 'model_trt.engine')
|
67 |
+
engine_file_demo = os.path.join('yolox', 'deploy', 'demo_trt_c++', 'model_trt.engine')
|
68 |
+
with open(engine_file, 'wb') as f:
|
69 |
+
f.write(model_trt.engine.serialize())
|
70 |
+
|
71 |
+
shutil.copyfile(engine_file, engine_file_demo)
|
72 |
+
|
73 |
+
logger.info("Converted TensorRT model engine file is saved for C++ inference.")
|
74 |
|
75 |
|
76 |
if __name__ == "__main__":
|
yolox/core/trainer.py
CHANGED
@@ -283,7 +283,7 @@ class Trainer:
|
|
283 |
logger.info("loading checkpoint for fine tuning")
|
284 |
ckpt_file = self.args.ckpt
|
285 |
ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
|
286 |
-
model = load_ckpt(
|
287 |
self.start_epoch = 0
|
288 |
|
289 |
return model
|
|
|
283 |
logger.info("loading checkpoint for fine tuning")
|
284 |
ckpt_file = self.args.ckpt
|
285 |
ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
|
286 |
+
model = load_ckpt(model, ckpt)
|
287 |
self.start_epoch = 0
|
288 |
|
289 |
return model
|
yolox/data/data_augment.py
CHANGED
@@ -162,97 +162,6 @@ def _mirror(image, boxes):
|
|
162 |
return image, boxes
|
163 |
|
164 |
|
165 |
-
# TODO: reorg: use mosaicDet instead
|
166 |
-
def _random_affine(
|
167 |
-
img,
|
168 |
-
targets=None,
|
169 |
-
degrees=(-10, 10),
|
170 |
-
translate=(0.1, 0.1),
|
171 |
-
scale=(0.9, 1.1),
|
172 |
-
shear=(-2, 2),
|
173 |
-
borderValue=(114, 114, 114),
|
174 |
-
):
|
175 |
-
# degrees = (0, 0)
|
176 |
-
# shear = (0, 0)
|
177 |
-
border = 0 # width of added border (optional)
|
178 |
-
# height = max(img.shape[0], img.shape[1]) + border * 2
|
179 |
-
height, width, _ = img.shape
|
180 |
-
|
181 |
-
# Rotation and Scale
|
182 |
-
R = np.eye(3)
|
183 |
-
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
|
184 |
-
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
|
185 |
-
s = random.random() * (scale[1] - scale[0]) + scale[0]
|
186 |
-
R[:2] = cv2.getRotationMatrix2D(
|
187 |
-
angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s
|
188 |
-
)
|
189 |
-
|
190 |
-
# Translation
|
191 |
-
T = np.eye(3)
|
192 |
-
# x translation (pixels)
|
193 |
-
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border
|
194 |
-
# y translation (pixels)
|
195 |
-
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border
|
196 |
-
|
197 |
-
# Shear
|
198 |
-
S = np.eye(3)
|
199 |
-
# x shear (deg)
|
200 |
-
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)
|
201 |
-
# y shear (deg)
|
202 |
-
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)
|
203 |
-
|
204 |
-
# Combined rotation matrix. NOTE: ORDER IS IMPORTANT HERE!!
|
205 |
-
M = S @ T @ R
|
206 |
-
# BGR order borderValue
|
207 |
-
imw = cv2.warpPerspective(
|
208 |
-
img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=borderValue
|
209 |
-
)
|
210 |
-
|
211 |
-
# Return warped points also
|
212 |
-
if targets is not None:
|
213 |
-
if len(targets) > 0:
|
214 |
-
n = targets.shape[0]
|
215 |
-
points = targets[:, 0:4].copy()
|
216 |
-
|
217 |
-
# warp points
|
218 |
-
xy = np.ones((n * 4, 3))
|
219 |
-
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
|
220 |
-
n * 4, 2
|
221 |
-
) # x1y1, x2y2, x1y2, x2y1
|
222 |
-
xy = (xy @ M.T)[:, :2].reshape(n, 8)
|
223 |
-
|
224 |
-
# create new boxes
|
225 |
-
x = xy[:, [0, 2, 4, 6]]
|
226 |
-
y = xy[:, [1, 3, 5, 7]]
|
227 |
-
xy = (
|
228 |
-
np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
229 |
-
)
|
230 |
-
|
231 |
-
# apply angle-based reduction
|
232 |
-
radians = a * math.pi / 180
|
233 |
-
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
|
234 |
-
x = (xy[:, 2] + xy[:, 0]) / 2
|
235 |
-
y = (xy[:, 3] + xy[:, 1]) / 2
|
236 |
-
w = (xy[:, 2] - xy[:, 0]) * reduction
|
237 |
-
h = (xy[:, 3] - xy[:, 1]) * reduction
|
238 |
-
xy = (
|
239 |
-
np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2))
|
240 |
-
.reshape(4, n)
|
241 |
-
.T
|
242 |
-
)
|
243 |
-
|
244 |
-
# reject warped points outside of image
|
245 |
-
x1 = np.clip(xy[:, 0], 0, width)
|
246 |
-
y1 = np.clip(xy[:, 1], 0, height)
|
247 |
-
x2 = np.clip(xy[:, 2], 0, width)
|
248 |
-
y2 = np.clip(xy[:, 3], 0, height)
|
249 |
-
boxes = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T
|
250 |
-
|
251 |
-
return imw, boxes, M
|
252 |
-
else:
|
253 |
-
return imw
|
254 |
-
|
255 |
-
|
256 |
def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
|
257 |
if len(image.shape) == 3:
|
258 |
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
|
|
|
162 |
return image, boxes
|
163 |
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
|
166 |
if len(image.shape) == 3:
|
167 |
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
|
yolox/data/datasets/mosaicdetection.py
CHANGED
@@ -48,10 +48,10 @@ class MosaicDetection(Dataset):
|
|
48 |
def __getitem__(self, idx):
|
49 |
if self._mosaic:
|
50 |
labels4 = []
|
51 |
-
|
52 |
# yc, xc = s, s # mosaic center x, y
|
53 |
-
yc = int(random.uniform(0.5 *
|
54 |
-
xc = int(random.uniform(0.5 *
|
55 |
|
56 |
# 3 additional image indices
|
57 |
indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
|
@@ -59,26 +59,28 @@ class MosaicDetection(Dataset):
|
|
59 |
for i, index in enumerate(indices):
|
60 |
img, _labels, _, _ = self._dataset.pull_item(index)
|
61 |
h0, w0 = img.shape[:2] # orig hw
|
62 |
-
|
63 |
interp = cv2.INTER_LINEAR
|
64 |
-
img = cv2.resize(img, (int(w0 *
|
65 |
(h, w) = img.shape[:2]
|
66 |
|
67 |
if i == 0: # top left
|
68 |
# base image with 4 tiles
|
69 |
-
img4 = np.full(
|
|
|
|
|
70 |
# xmin, ymin, xmax, ymax (large image)
|
71 |
x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
|
72 |
# xmin, ymin, xmax, ymax (small image)
|
73 |
x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
|
74 |
elif i == 1: # top right
|
75 |
-
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w,
|
76 |
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
|
77 |
elif i == 2: # bottom left
|
78 |
-
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(
|
79 |
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
|
80 |
elif i == 3: # bottom right
|
81 |
-
x1a, y1a, x2a, y2a = xc, yc, min(xc + w,
|
82 |
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
|
83 |
|
84 |
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
@@ -87,15 +89,20 @@ class MosaicDetection(Dataset):
|
|
87 |
|
88 |
labels = _labels.copy() # [[xmin, ymin, xmax, ymax, label_ind], ... ]
|
89 |
if _labels.size > 0: # Normalized xywh to pixel xyxy format
|
90 |
-
labels[:, 0] =
|
91 |
-
labels[:, 1] =
|
92 |
-
labels[:, 2] =
|
93 |
-
labels[:, 3] =
|
|
|
94 |
labels4.append(labels)
|
95 |
|
96 |
if len(labels4):
|
97 |
labels4 = np.concatenate(labels4, 0)
|
98 |
-
np.clip(labels4[:,
|
|
|
|
|
|
|
|
|
99 |
img4, labels4 = random_perspective(
|
100 |
img4,
|
101 |
labels4,
|
@@ -104,7 +111,7 @@ class MosaicDetection(Dataset):
|
|
104 |
scale=self.scale,
|
105 |
shear=self.shear,
|
106 |
perspective=self.perspective,
|
107 |
-
border=[-
|
108 |
) # border to remove
|
109 |
|
110 |
# -----------------------------------------------------------------
|
@@ -124,7 +131,6 @@ class MosaicDetection(Dataset):
|
|
124 |
return img, label, img_info, int(idx)
|
125 |
|
126 |
def mixup(self, origin_img, origin_labels, input_dim):
|
127 |
-
# jit_factor = random.uniform(0.8, 1.2)
|
128 |
jit_factor = random.uniform(*self.mixup_scale)
|
129 |
FLIP = random.uniform(0, 1) > 0.5
|
130 |
cp_labels = []
|
@@ -139,7 +145,7 @@ class MosaicDetection(Dataset):
|
|
139 |
cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0
|
140 |
else:
|
141 |
cp_img = np.ones(input_dim) * 114.0
|
142 |
-
cp_scale_ratio = input_dim[0] /
|
143 |
resized_img = cv2.resize(
|
144 |
img,
|
145 |
(int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
|
|
|
48 |
def __getitem__(self, idx):
|
49 |
if self._mosaic:
|
50 |
labels4 = []
|
51 |
+
input_dim = self._dataset.input_dim
|
52 |
# yc, xc = s, s # mosaic center x, y
|
53 |
+
yc = int(random.uniform(0.5 * input_dim[0], 1.5 * input_dim[0]))
|
54 |
+
xc = int(random.uniform(0.5 * input_dim[1], 1.5 * input_dim[1]))
|
55 |
|
56 |
# 3 additional image indices
|
57 |
indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
|
|
|
59 |
for i, index in enumerate(indices):
|
60 |
img, _labels, _, _ = self._dataset.pull_item(index)
|
61 |
h0, w0 = img.shape[:2] # orig hw
|
62 |
+
scale = min(1. * input_dim[0] / h0, 1. * input_dim[1] / w0)
|
63 |
interp = cv2.INTER_LINEAR
|
64 |
+
img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=interp)
|
65 |
(h, w) = img.shape[:2]
|
66 |
|
67 |
if i == 0: # top left
|
68 |
# base image with 4 tiles
|
69 |
+
img4 = np.full(
|
70 |
+
(input_dim[0] * 2, input_dim[1] * 2, img.shape[2]), 114, dtype=np.uint8
|
71 |
+
)
|
72 |
# xmin, ymin, xmax, ymax (large image)
|
73 |
x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
|
74 |
# xmin, ymin, xmax, ymax (small image)
|
75 |
x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
|
76 |
elif i == 1: # top right
|
77 |
+
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, input_dim[1] * 2), yc
|
78 |
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
|
79 |
elif i == 2: # bottom left
|
80 |
+
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(input_dim[0] * 2, yc + h)
|
81 |
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
|
82 |
elif i == 3: # bottom right
|
83 |
+
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, input_dim[1] * 2), min(input_dim[0] * 2, yc + h) # noqa
|
84 |
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
|
85 |
|
86 |
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
|
|
89 |
|
90 |
labels = _labels.copy() # [[xmin, ymin, xmax, ymax, label_ind], ... ]
|
91 |
if _labels.size > 0: # Normalized xywh to pixel xyxy format
|
92 |
+
labels[:, 0] = scale * _labels[:, 0] + padw
|
93 |
+
labels[:, 1] = scale * _labels[:, 1] + padh
|
94 |
+
labels[:, 2] = scale * _labels[:, 2] + padw
|
95 |
+
labels[:, 3] = scale * _labels[:, 3] + padh
|
96 |
+
|
97 |
labels4.append(labels)
|
98 |
|
99 |
if len(labels4):
|
100 |
labels4 = np.concatenate(labels4, 0)
|
101 |
+
np.clip(labels4[:, 0], 0, 2 * input_dim[1], out=labels4[:, 0])
|
102 |
+
np.clip(labels4[:, 1], 0, 2 * input_dim[0], out=labels4[:, 1])
|
103 |
+
np.clip(labels4[:, 2], 0, 2 * input_dim[1], out=labels4[:, 2])
|
104 |
+
np.clip(labels4[:, 3], 0, 2 * input_dim[0], out=labels4[:, 3])
|
105 |
+
|
106 |
img4, labels4 = random_perspective(
|
107 |
img4,
|
108 |
labels4,
|
|
|
111 |
scale=self.scale,
|
112 |
shear=self.shear,
|
113 |
perspective=self.perspective,
|
114 |
+
border=[-input_dim[0] // 2, -input_dim[1] // 2],
|
115 |
) # border to remove
|
116 |
|
117 |
# -----------------------------------------------------------------
|
|
|
131 |
return img, label, img_info, int(idx)
|
132 |
|
133 |
def mixup(self, origin_img, origin_labels, input_dim):
|
|
|
134 |
jit_factor = random.uniform(*self.mixup_scale)
|
135 |
FLIP = random.uniform(0, 1) > 0.5
|
136 |
cp_labels = []
|
|
|
145 |
cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0
|
146 |
else:
|
147 |
cp_img = np.ones(input_dim) * 114.0
|
148 |
+
cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
|
149 |
resized_img = cv2.resize(
|
150 |
img,
|
151 |
(int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
|
yolox/exp/build.py
CHANGED
@@ -25,11 +25,12 @@ def get_exp_by_name(exp_name):
|
|
25 |
"yolox-m": "yolox_l.py",
|
26 |
"yolox-l": "yolox_l.py",
|
27 |
"yolox-x": "yolox_x.py",
|
|
|
28 |
"yolox-nano": "nano.py",
|
29 |
"yolov3": "yolov3.py",
|
30 |
}
|
31 |
filename = filedict[exp_name]
|
32 |
-
exp_path = os.path.join(yolox_path, "exps",
|
33 |
return get_exp_by_file(exp_path)
|
34 |
|
35 |
|
|
|
25 |
"yolox-m": "yolox_l.py",
|
26 |
"yolox-l": "yolox_l.py",
|
27 |
"yolox-x": "yolox_x.py",
|
28 |
+
"yolox-tiny": "yolox_tiny.py",
|
29 |
"yolox-nano": "nano.py",
|
30 |
"yolov3": "yolov3.py",
|
31 |
}
|
32 |
filename = filedict[exp_name]
|
33 |
+
exp_path = os.path.join(yolox_path, "exps", filename)
|
34 |
return get_exp_by_file(exp_path)
|
35 |
|
36 |
|
yolox/exp/yolox_base.py
CHANGED
@@ -141,22 +141,21 @@ class Exp(BaseExp):
|
|
141 |
return train_loader
|
142 |
|
143 |
def random_resize(self, data_loader, epoch, rank, is_distributed):
|
144 |
-
tensor = torch.LongTensor(
|
145 |
|
146 |
if rank == 0:
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
tensor.fill_(size)
|
153 |
|
154 |
if is_distributed:
|
155 |
dist.barrier()
|
156 |
dist.broadcast(tensor, 0)
|
157 |
|
158 |
input_size = data_loader.change_input_dim(
|
159 |
-
multiple=tensor.item(), random_range=None
|
160 |
)
|
161 |
return input_size
|
162 |
|
|
|
141 |
return train_loader
|
142 |
|
143 |
def random_resize(self, data_loader, epoch, rank, is_distributed):
|
144 |
+
tensor = torch.LongTensor(2).cuda()
|
145 |
|
146 |
if rank == 0:
|
147 |
+
size_factor = self.input_size[1] * 1. / self.input_size[0]
|
148 |
+
size = random.randint(*self.random_size)
|
149 |
+
size = (int(32 * size), 32 * int(size * size_factor))
|
150 |
+
tensor[0] = size[0]
|
151 |
+
tensor[1] = size[1]
|
|
|
152 |
|
153 |
if is_distributed:
|
154 |
dist.barrier()
|
155 |
dist.broadcast(tensor, 0)
|
156 |
|
157 |
input_size = data_loader.change_input_dim(
|
158 |
+
multiple=(tensor[0].item(), tensor[1].item()), random_range=None
|
159 |
)
|
160 |
return input_size
|
161 |
|
yolox/models/darknet.py
CHANGED
@@ -86,7 +86,11 @@ class Darknet(nn.Module):
|
|
86 |
|
87 |
class CSPDarknet(nn.Module):
|
88 |
|
89 |
-
def __init__(
|
|
|
|
|
|
|
|
|
90 |
super().__init__()
|
91 |
assert out_features, "please provide output features of Darknet"
|
92 |
self.out_features = out_features
|
@@ -96,33 +100,42 @@ class CSPDarknet(nn.Module):
|
|
96 |
base_depth = max(round(dep_mul * 3), 1) # 3
|
97 |
|
98 |
# stem
|
99 |
-
self.stem = Focus(3, base_channels, ksize=3)
|
100 |
|
101 |
# dark2
|
102 |
self.dark2 = nn.Sequential(
|
103 |
-
Conv(base_channels, base_channels * 2, 3, 2),
|
104 |
-
CSPLayer(
|
|
|
|
|
|
|
105 |
)
|
106 |
|
107 |
# dark3
|
108 |
self.dark3 = nn.Sequential(
|
109 |
-
Conv(base_channels * 2, base_channels * 4, 3, 2),
|
110 |
-
CSPLayer(
|
|
|
|
|
|
|
111 |
)
|
112 |
|
113 |
# dark4
|
114 |
self.dark4 = nn.Sequential(
|
115 |
-
Conv(base_channels * 4, base_channels * 8, 3, 2),
|
116 |
-
CSPLayer(
|
|
|
|
|
|
|
117 |
)
|
118 |
|
119 |
# dark5
|
120 |
self.dark5 = nn.Sequential(
|
121 |
-
Conv(base_channels * 8, base_channels * 16, 3, 2),
|
122 |
-
SPPBottleneck(base_channels * 16, base_channels * 16),
|
123 |
CSPLayer(
|
124 |
base_channels * 16, base_channels * 16, n=base_depth,
|
125 |
-
shortcut=False, depthwise=depthwise,
|
126 |
),
|
127 |
)
|
128 |
|
|
|
86 |
|
87 |
class CSPDarknet(nn.Module):
|
88 |
|
89 |
+
def __init__(
|
90 |
+
self, dep_mul, wid_mul,
|
91 |
+
out_features=("dark3", "dark4", "dark5"),
|
92 |
+
depthwise=False, act="silu",
|
93 |
+
):
|
94 |
super().__init__()
|
95 |
assert out_features, "please provide output features of Darknet"
|
96 |
self.out_features = out_features
|
|
|
100 |
base_depth = max(round(dep_mul * 3), 1) # 3
|
101 |
|
102 |
# stem
|
103 |
+
self.stem = Focus(3, base_channels, ksize=3, act=act)
|
104 |
|
105 |
# dark2
|
106 |
self.dark2 = nn.Sequential(
|
107 |
+
Conv(base_channels, base_channels * 2, 3, 2, act=act),
|
108 |
+
CSPLayer(
|
109 |
+
base_channels * 2, base_channels * 2,
|
110 |
+
n=base_depth, depthwise=depthwise, act=act
|
111 |
+
),
|
112 |
)
|
113 |
|
114 |
# dark3
|
115 |
self.dark3 = nn.Sequential(
|
116 |
+
Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
|
117 |
+
CSPLayer(
|
118 |
+
base_channels * 4, base_channels * 4,
|
119 |
+
n=base_depth * 3, depthwise=depthwise, act=act,
|
120 |
+
),
|
121 |
)
|
122 |
|
123 |
# dark4
|
124 |
self.dark4 = nn.Sequential(
|
125 |
+
Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
|
126 |
+
CSPLayer(
|
127 |
+
base_channels * 8, base_channels * 8,
|
128 |
+
n=base_depth * 3, depthwise=depthwise, act=act,
|
129 |
+
),
|
130 |
)
|
131 |
|
132 |
# dark5
|
133 |
self.dark5 = nn.Sequential(
|
134 |
+
Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
|
135 |
+
SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
|
136 |
CSPLayer(
|
137 |
base_channels * 16, base_channels * 16, n=base_depth,
|
138 |
+
shortcut=False, depthwise=depthwise, act=act,
|
139 |
),
|
140 |
)
|
141 |
|
yolox/models/network_blocks.py
CHANGED
@@ -72,12 +72,15 @@ class DWConv(nn.Module):
|
|
72 |
|
73 |
class Bottleneck(nn.Module):
|
74 |
# Standard bottleneck
|
75 |
-
def __init__(
|
|
|
|
|
|
|
76 |
super().__init__()
|
77 |
hidden_channels = int(out_channels * expansion)
|
78 |
Conv = DWConv if depthwise else BaseConv
|
79 |
-
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1)
|
80 |
-
self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1)
|
81 |
self.use_add = shortcut and in_channels == out_channels
|
82 |
|
83 |
def forward(self, x):
|
@@ -124,7 +127,7 @@ class CSPLayer(nn.Module):
|
|
124 |
|
125 |
def __init__(
|
126 |
self, in_channels, out_channels, n=1,
|
127 |
-
shortcut=True, expansion=0.5, depthwise=False
|
128 |
):
|
129 |
"""
|
130 |
Args:
|
@@ -135,11 +138,11 @@ class CSPLayer(nn.Module):
|
|
135 |
# ch_in, ch_out, number, shortcut, groups, expansion
|
136 |
super().__init__()
|
137 |
hidden_channels = int(out_channels * expansion) # hidden channels
|
138 |
-
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1)
|
139 |
-
self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1)
|
140 |
-
self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1
|
141 |
module_list = [
|
142 |
-
Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise)
|
143 |
for _ in range(n)
|
144 |
]
|
145 |
self.m = nn.Sequential(*module_list)
|
@@ -155,9 +158,9 @@ class CSPLayer(nn.Module):
|
|
155 |
class Focus(nn.Module):
|
156 |
"""Focus width and height information into channel space."""
|
157 |
|
158 |
-
def __init__(self, in_channels, out_channels, ksize=1, stride=1):
|
159 |
super().__init__()
|
160 |
-
self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride)
|
161 |
|
162 |
def forward(self, x):
|
163 |
# shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)
|
|
|
72 |
|
73 |
class Bottleneck(nn.Module):
|
74 |
# Standard bottleneck
|
75 |
+
def __init__(
|
76 |
+
self, in_channels, out_channels, shortcut=True,
|
77 |
+
expansion=0.5, depthwise=False, act="silu"
|
78 |
+
):
|
79 |
super().__init__()
|
80 |
hidden_channels = int(out_channels * expansion)
|
81 |
Conv = DWConv if depthwise else BaseConv
|
82 |
+
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
|
83 |
+
self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act)
|
84 |
self.use_add = shortcut and in_channels == out_channels
|
85 |
|
86 |
def forward(self, x):
|
|
|
127 |
|
128 |
def __init__(
|
129 |
self, in_channels, out_channels, n=1,
|
130 |
+
shortcut=True, expansion=0.5, depthwise=False, act="silu"
|
131 |
):
|
132 |
"""
|
133 |
Args:
|
|
|
138 |
# ch_in, ch_out, number, shortcut, groups, expansion
|
139 |
super().__init__()
|
140 |
hidden_channels = int(out_channels * expansion) # hidden channels
|
141 |
+
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
|
142 |
+
self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
|
143 |
+
self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)
|
144 |
module_list = [
|
145 |
+
Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act)
|
146 |
for _ in range(n)
|
147 |
]
|
148 |
self.m = nn.Sequential(*module_list)
|
|
|
158 |
class Focus(nn.Module):
|
159 |
"""Focus width and height information into channel space."""
|
160 |
|
161 |
+
def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"):
|
162 |
super().__init__()
|
163 |
+
self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)
|
164 |
|
165 |
def forward(self, x):
|
166 |
# shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)
|
yolox/models/yolo_pafpn.py
CHANGED
@@ -16,17 +16,17 @@ class YOLOPAFPN(nn.Module):
|
|
16 |
|
17 |
def __init__(
|
18 |
self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
|
19 |
-
in_channels=[256, 512, 1024], depthwise=False,
|
20 |
):
|
21 |
super().__init__()
|
22 |
-
self.backbone = CSPDarknet(depth, width, depthwise=depthwise)
|
23 |
self.in_features = in_features
|
24 |
self.in_channels = in_channels
|
25 |
Conv = DWConv if depthwise else BaseConv
|
26 |
|
27 |
self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
|
28 |
self.lateral_conv0 = BaseConv(
|
29 |
-
int(in_channels[2] * width), int(in_channels[1] * width), 1, 1
|
30 |
)
|
31 |
self.C3_p4 = CSPLayer(
|
32 |
int(2 * in_channels[1] * width),
|
@@ -34,10 +34,11 @@ class YOLOPAFPN(nn.Module):
|
|
34 |
round(3 * depth),
|
35 |
False,
|
36 |
depthwise=depthwise,
|
|
|
37 |
) # cat
|
38 |
|
39 |
self.reduce_conv1 = BaseConv(
|
40 |
-
int(in_channels[1] * width), int(in_channels[0] * width), 1, 1
|
41 |
)
|
42 |
self.C3_p3 = CSPLayer(
|
43 |
int(2 * in_channels[0] * width),
|
@@ -45,26 +46,33 @@ class YOLOPAFPN(nn.Module):
|
|
45 |
round(3 * depth),
|
46 |
False,
|
47 |
depthwise=depthwise,
|
|
|
48 |
)
|
49 |
|
50 |
# bottom-up conv
|
51 |
-
self.bu_conv2 = Conv(
|
|
|
|
|
52 |
self.C3_n3 = CSPLayer(
|
53 |
int(2 * in_channels[0] * width),
|
54 |
int(in_channels[1] * width),
|
55 |
round(3 * depth),
|
56 |
False,
|
57 |
depthwise=depthwise,
|
|
|
58 |
)
|
59 |
|
60 |
# bottom-up conv
|
61 |
-
self.bu_conv1 = Conv(
|
|
|
|
|
62 |
self.C3_n4 = CSPLayer(
|
63 |
int(2 * in_channels[1] * width),
|
64 |
int(in_channels[2] * width),
|
65 |
round(3 * depth),
|
66 |
False,
|
67 |
depthwise=depthwise,
|
|
|
68 |
)
|
69 |
|
70 |
def forward(self, input):
|
|
|
16 |
|
17 |
def __init__(
|
18 |
self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
|
19 |
+
in_channels=[256, 512, 1024], depthwise=False, act="silu",
|
20 |
):
|
21 |
super().__init__()
|
22 |
+
self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
|
23 |
self.in_features = in_features
|
24 |
self.in_channels = in_channels
|
25 |
Conv = DWConv if depthwise else BaseConv
|
26 |
|
27 |
self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
|
28 |
self.lateral_conv0 = BaseConv(
|
29 |
+
int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
|
30 |
)
|
31 |
self.C3_p4 = CSPLayer(
|
32 |
int(2 * in_channels[1] * width),
|
|
|
34 |
round(3 * depth),
|
35 |
False,
|
36 |
depthwise=depthwise,
|
37 |
+
act=act,
|
38 |
) # cat
|
39 |
|
40 |
self.reduce_conv1 = BaseConv(
|
41 |
+
int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
|
42 |
)
|
43 |
self.C3_p3 = CSPLayer(
|
44 |
int(2 * in_channels[0] * width),
|
|
|
46 |
round(3 * depth),
|
47 |
False,
|
48 |
depthwise=depthwise,
|
49 |
+
act=act,
|
50 |
)
|
51 |
|
52 |
# bottom-up conv
|
53 |
+
self.bu_conv2 = Conv(
|
54 |
+
int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
|
55 |
+
)
|
56 |
self.C3_n3 = CSPLayer(
|
57 |
int(2 * in_channels[0] * width),
|
58 |
int(in_channels[1] * width),
|
59 |
round(3 * depth),
|
60 |
False,
|
61 |
depthwise=depthwise,
|
62 |
+
act=act,
|
63 |
)
|
64 |
|
65 |
# bottom-up conv
|
66 |
+
self.bu_conv1 = Conv(
|
67 |
+
int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
|
68 |
+
)
|
69 |
self.C3_n4 = CSPLayer(
|
70 |
int(2 * in_channels[1] * width),
|
71 |
int(in_channels[2] * width),
|
72 |
round(3 * depth),
|
73 |
False,
|
74 |
depthwise=depthwise,
|
75 |
+
act=act,
|
76 |
)
|
77 |
|
78 |
def forward(self, input):
|
yolox/utils/visualize.py
CHANGED
@@ -5,6 +5,8 @@
|
|
5 |
import cv2
|
6 |
import numpy as np
|
7 |
|
|
|
|
|
8 |
|
9 |
def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
|
10 |
|
|
|
5 |
import cv2
|
6 |
import numpy as np
|
7 |
|
8 |
+
__all__ = ["vis"]
|
9 |
+
|
10 |
|
11 |
def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
|
12 |
|