Feng Wang
commited on
Commit
·
53d1ae9
1
Parent(s):
7020e34
fix(core): avoid overwrite best ckpt and add attr description (#1103)
Browse files- yolox/core/trainer.py +10 -3
- yolox/data/data_augment.py +1 -3
- yolox/exp/yolox_base.py +43 -6
yolox/core/trainer.py
CHANGED
@@ -46,6 +46,7 @@ class Trainer:
|
|
46 |
self.local_rank = get_local_rank()
|
47 |
self.device = "cuda:{}".format(self.local_rank)
|
48 |
self.use_model_ema = exp.ema
|
|
|
49 |
|
50 |
# data/dataloader related attr
|
51 |
self.data_type = torch.float16 if args.fp16 else torch.float32
|
@@ -174,7 +175,7 @@ class Trainer:
|
|
174 |
)
|
175 |
# Tensorboard logger
|
176 |
if self.rank == 0:
|
177 |
-
self.tblogger = SummaryWriter(self.file_name)
|
178 |
|
179 |
logger.info("Training start...")
|
180 |
logger.info("\n{}".format(model))
|
@@ -269,6 +270,7 @@ class Trainer:
|
|
269 |
# resume the model/optimizer state dict
|
270 |
model.load_state_dict(ckpt["model"])
|
271 |
self.optimizer.load_state_dict(ckpt["optimizer"])
|
|
|
272 |
# resume the training states variables
|
273 |
start_epoch = (
|
274 |
self.args.start_epoch - 1
|
@@ -302,6 +304,9 @@ class Trainer:
|
|
302 |
ap50_95, ap50, summary = self.exp.eval(
|
303 |
evalmodel, self.evaluator, self.is_distributed
|
304 |
)
|
|
|
|
|
|
|
305 |
self.model.train()
|
306 |
if self.rank == 0:
|
307 |
self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
|
@@ -309,8 +314,9 @@ class Trainer:
|
|
309 |
logger.info("\n" + summary)
|
310 |
synchronize()
|
311 |
|
312 |
-
self.save_ckpt("last_epoch",
|
313 |
-
|
|
|
314 |
|
315 |
def save_ckpt(self, ckpt_name, update_best_ckpt=False):
|
316 |
if self.rank == 0:
|
@@ -320,6 +326,7 @@ class Trainer:
|
|
320 |
"start_epoch": self.epoch + 1,
|
321 |
"model": save_model.state_dict(),
|
322 |
"optimizer": self.optimizer.state_dict(),
|
|
|
323 |
}
|
324 |
save_checkpoint(
|
325 |
ckpt_state,
|
|
|
46 |
self.local_rank = get_local_rank()
|
47 |
self.device = "cuda:{}".format(self.local_rank)
|
48 |
self.use_model_ema = exp.ema
|
49 |
+
self.save_history_ckpt = exp.save_history_ckpt
|
50 |
|
51 |
# data/dataloader related attr
|
52 |
self.data_type = torch.float16 if args.fp16 else torch.float32
|
|
|
175 |
)
|
176 |
# Tensorboard logger
|
177 |
if self.rank == 0:
|
178 |
+
self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
|
179 |
|
180 |
logger.info("Training start...")
|
181 |
logger.info("\n{}".format(model))
|
|
|
270 |
# resume the model/optimizer state dict
|
271 |
model.load_state_dict(ckpt["model"])
|
272 |
self.optimizer.load_state_dict(ckpt["optimizer"])
|
273 |
+
self.best_ap = ckpt.pop("best_ap", 0)
|
274 |
# resume the training states variables
|
275 |
start_epoch = (
|
276 |
self.args.start_epoch - 1
|
|
|
304 |
ap50_95, ap50, summary = self.exp.eval(
|
305 |
evalmodel, self.evaluator, self.is_distributed
|
306 |
)
|
307 |
+
update_best_ckpt = ap50_95 > self.best_ap
|
308 |
+
self.best_ap = max(self.best_ap, ap50_95)
|
309 |
+
|
310 |
self.model.train()
|
311 |
if self.rank == 0:
|
312 |
self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
|
|
|
314 |
logger.info("\n" + summary)
|
315 |
synchronize()
|
316 |
|
317 |
+
self.save_ckpt("last_epoch", update_best_ckpt)
|
318 |
+
if self.save_history_ckpt:
|
319 |
+
self.save_ckpt(f"epoch_{self.epoch + 1}")
|
320 |
|
321 |
def save_ckpt(self, ckpt_name, update_best_ckpt=False):
|
322 |
if self.rank == 0:
|
|
|
326 |
"start_epoch": self.epoch + 1,
|
327 |
"model": save_model.state_dict(),
|
328 |
"optimizer": self.optimizer.state_dict(),
|
329 |
+
"best_ap": self.best_ap,
|
330 |
}
|
331 |
save_checkpoint(
|
332 |
ckpt_state,
|
yolox/data/data_augment.py
CHANGED
@@ -39,9 +39,7 @@ def get_aug_params(value, center=0):
|
|
39 |
else:
|
40 |
raise ValueError(
|
41 |
"Affine params should be either a sequence containing two values\
|
42 |
-
|
43 |
-
value
|
44 |
-
)
|
45 |
)
|
46 |
|
47 |
|
|
|
39 |
else:
|
40 |
raise ValueError(
|
41 |
"Affine params should be either a sequence containing two values\
|
42 |
+
or single float values. Got {}".format(value)
|
|
|
|
|
43 |
)
|
44 |
|
45 |
|
yolox/exp/yolox_base.py
CHANGED
@@ -17,57 +17,94 @@ class Exp(BaseExp):
|
|
17 |
super().__init__()
|
18 |
|
19 |
# ---------------- model config ---------------- #
|
|
|
20 |
self.num_classes = 80
|
|
|
21 |
self.depth = 1.00
|
|
|
22 |
self.width = 1.00
|
23 |
-
|
|
|
24 |
|
25 |
# ---------------- dataloader config ---------------- #
|
26 |
# set worker to 4 for shorter dataloader init time
|
|
|
27 |
self.data_num_workers = 4
|
28 |
self.input_size = (640, 640) # (height, width)
|
29 |
-
# Actual multiscale ranges: [640-5*32, 640+5*32].
|
30 |
-
# To disable multiscale training, set the
|
31 |
-
# self.multiscale_range to 0.
|
32 |
self.multiscale_range = 5
|
33 |
# You can uncomment this line to specify a multiscale range
|
34 |
# self.random_size = (14, 26)
|
|
|
35 |
self.data_dir = None
|
|
|
36 |
self.train_ann = "instances_train2017.json"
|
|
|
37 |
self.val_ann = "instances_val2017.json"
|
|
|
38 |
self.test_ann = "instances_test2017.json"
|
39 |
|
40 |
# --------------- transform config ----------------- #
|
|
|
41 |
self.mosaic_prob = 1.0
|
|
|
42 |
self.mixup_prob = 1.0
|
|
|
43 |
self.hsv_prob = 1.0
|
|
|
44 |
self.flip_prob = 0.5
|
|
|
45 |
self.degrees = 10.0
|
|
|
46 |
self.translate = 0.1
|
47 |
self.mosaic_scale = (0.1, 2)
|
|
|
|
|
48 |
self.mixup_scale = (0.5, 1.5)
|
|
|
49 |
self.shear = 2.0
|
50 |
-
self.enable_mixup = True
|
51 |
|
52 |
# -------------- training config --------------------- #
|
|
|
53 |
self.warmup_epochs = 5
|
|
|
54 |
self.max_epoch = 300
|
|
|
55 |
self.warmup_lr = 0
|
|
|
|
|
56 |
self.basic_lr_per_img = 0.01 / 64.0
|
|
|
57 |
self.scheduler = "yoloxwarmcos"
|
|
|
58 |
self.no_aug_epochs = 15
|
59 |
-
|
60 |
self.ema = True
|
61 |
|
|
|
62 |
self.weight_decay = 5e-4
|
|
|
63 |
self.momentum = 0.9
|
|
|
|
|
64 |
self.print_interval = 10
|
|
|
|
|
65 |
self.eval_interval = 10
|
|
|
|
|
|
|
|
|
66 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
67 |
|
68 |
# ----------------- testing config ------------------ #
|
|
|
69 |
self.test_size = (640, 640)
|
|
|
|
|
70 |
self.test_conf = 0.01
|
|
|
71 |
self.nmsthre = 0.65
|
72 |
|
73 |
def get_model(self):
|
|
|
17 |
super().__init__()
|
18 |
|
19 |
# ---------------- model config ---------------- #
|
20 |
+
# detect classes number of model
|
21 |
self.num_classes = 80
|
22 |
+
# factor of model depth
|
23 |
self.depth = 1.00
|
24 |
+
# factor of model width
|
25 |
self.width = 1.00
|
26 |
+
# activation name. For example, if using "relu", then "silu" will be replaced to "relu".
|
27 |
+
self.act = "silu"
|
28 |
|
29 |
# ---------------- dataloader config ---------------- #
|
30 |
# set worker to 4 for shorter dataloader init time
|
31 |
+
# If your training process cost many memory, reduce this value.
|
32 |
self.data_num_workers = 4
|
33 |
self.input_size = (640, 640) # (height, width)
|
34 |
+
# Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32].
|
35 |
+
# To disable multiscale training, set the value to 0.
|
|
|
36 |
self.multiscale_range = 5
|
37 |
# You can uncomment this line to specify a multiscale range
|
38 |
# self.random_size = (14, 26)
|
39 |
+
# dir of dataset images, if data_dir is None, this project will use `datasets` dir
|
40 |
self.data_dir = None
|
41 |
+
# name of annotation file for training
|
42 |
self.train_ann = "instances_train2017.json"
|
43 |
+
# name of annotation file for evaluation
|
44 |
self.val_ann = "instances_val2017.json"
|
45 |
+
# name of annotation file for testing
|
46 |
self.test_ann = "instances_test2017.json"
|
47 |
|
48 |
# --------------- transform config ----------------- #
|
49 |
+
# prob of applying mosaic aug
|
50 |
self.mosaic_prob = 1.0
|
51 |
+
# prob of applying mixup aug
|
52 |
self.mixup_prob = 1.0
|
53 |
+
# prob of applying hsv aug
|
54 |
self.hsv_prob = 1.0
|
55 |
+
# prob of applying flip aug
|
56 |
self.flip_prob = 0.5
|
57 |
+
# rotation angle range, for example, if set to 2, the true range is (-2, 2)
|
58 |
self.degrees = 10.0
|
59 |
+
# translate range, for example, if set to 0.1, the true range is (-0.1, 0.1)
|
60 |
self.translate = 0.1
|
61 |
self.mosaic_scale = (0.1, 2)
|
62 |
+
# apply mixup aug or not
|
63 |
+
self.enable_mixup = True
|
64 |
self.mixup_scale = (0.5, 1.5)
|
65 |
+
# shear angle range, for example, if set to 2, the true range is (-2, 2)
|
66 |
self.shear = 2.0
|
|
|
67 |
|
68 |
# -------------- training config --------------------- #
|
69 |
+
# epoch number used for warmup
|
70 |
self.warmup_epochs = 5
|
71 |
+
# max training epoch
|
72 |
self.max_epoch = 300
|
73 |
+
# minimum learning rate during warmup
|
74 |
self.warmup_lr = 0
|
75 |
+
self.min_lr_ratio = 0.05
|
76 |
+
# learning rate for one image. During traing, lr will multiply batchsize.
|
77 |
self.basic_lr_per_img = 0.01 / 64.0
|
78 |
+
# name of LRScheduler
|
79 |
self.scheduler = "yoloxwarmcos"
|
80 |
+
# last #epoch to close augmention like mosaic
|
81 |
self.no_aug_epochs = 15
|
82 |
+
# apply EMA during training
|
83 |
self.ema = True
|
84 |
|
85 |
+
# weight decay of optimizer
|
86 |
self.weight_decay = 5e-4
|
87 |
+
# momentum of optimizer
|
88 |
self.momentum = 0.9
|
89 |
+
# log period in iter, for example,
|
90 |
+
# if set to 1, user could see log every iteration.
|
91 |
self.print_interval = 10
|
92 |
+
# eval period in epoch, for example,
|
93 |
+
# if set to 1, model will be evaluate after every epoch.
|
94 |
self.eval_interval = 10
|
95 |
+
# save history checkpoint or not.
|
96 |
+
# If set to False, yolox will only save latest and best ckpt.
|
97 |
+
self.save_history_ckpt = True
|
98 |
+
# name of experiment
|
99 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
100 |
|
101 |
# ----------------- testing config ------------------ #
|
102 |
+
# output image size during evaluation/test
|
103 |
self.test_size = (640, 640)
|
104 |
+
# confidence threshold during evalulation/test,
|
105 |
+
# boxes whose scores are less than test_conf will be filtered
|
106 |
self.test_conf = 0.01
|
107 |
+
# nms threshold
|
108 |
self.nmsthre = 0.65
|
109 |
|
110 |
def get_model(self):
|