Spaces:

tidalove
/

yolox

Sleeping

App Files Files Community

Feng Wang commited on Jan 27, 2022

Commit

53d1ae9

1 Parent(s): 7020e34

fix(core): avoid overwrite best ckpt and add attr description (#1103)

Browse files

Files changed (3) hide show

yolox/core/trainer.py +10 -3
yolox/data/data_augment.py +1 -3
yolox/exp/yolox_base.py +43 -6

yolox/core/trainer.py CHANGED Viewed

@@ -46,6 +46,7 @@ class Trainer:
         self.local_rank = get_local_rank()
         self.device = "cuda:{}".format(self.local_rank)
         self.use_model_ema = exp.ema
         # data/dataloader related attr
         self.data_type = torch.float16 if args.fp16 else torch.float32
@@ -174,7 +175,7 @@ class Trainer:
         )
         # Tensorboard logger
         if self.rank == 0:
-            self.tblogger = SummaryWriter(self.file_name)
         logger.info("Training start...")
         logger.info("\n{}".format(model))
@@ -269,6 +270,7 @@ class Trainer:
             # resume the model/optimizer state dict
             model.load_state_dict(ckpt["model"])
             self.optimizer.load_state_dict(ckpt["optimizer"])
             # resume the training states variables
             start_epoch = (
                 self.args.start_epoch - 1
@@ -302,6 +304,9 @@ class Trainer:
         ap50_95, ap50, summary = self.exp.eval(
             evalmodel, self.evaluator, self.is_distributed
         )
         self.model.train()
         if self.rank == 0:
             self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
@@ -309,8 +314,9 @@ class Trainer:
             logger.info("\n" + summary)
         synchronize()
-        self.save_ckpt("last_epoch", ap50_95 > self.best_ap)
-        self.best_ap = max(self.best_ap, ap50_95)
     def save_ckpt(self, ckpt_name, update_best_ckpt=False):
         if self.rank == 0:
@@ -320,6 +326,7 @@ class Trainer:
                 "start_epoch": self.epoch + 1,
                 "model": save_model.state_dict(),
                 "optimizer": self.optimizer.state_dict(),
             }
             save_checkpoint(
                 ckpt_state,

         self.local_rank = get_local_rank()
         self.device = "cuda:{}".format(self.local_rank)
         self.use_model_ema = exp.ema
+        self.save_history_ckpt = exp.save_history_ckpt
         # data/dataloader related attr
         self.data_type = torch.float16 if args.fp16 else torch.float32
         )
         # Tensorboard logger
         if self.rank == 0:
+            self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
         logger.info("Training start...")
         logger.info("\n{}".format(model))
             # resume the model/optimizer state dict
             model.load_state_dict(ckpt["model"])
             self.optimizer.load_state_dict(ckpt["optimizer"])
+            self.best_ap = ckpt.pop("best_ap", 0)
             # resume the training states variables
             start_epoch = (
                 self.args.start_epoch - 1
         ap50_95, ap50, summary = self.exp.eval(
             evalmodel, self.evaluator, self.is_distributed
         )
+        update_best_ckpt = ap50_95 > self.best_ap
+        self.best_ap = max(self.best_ap, ap50_95)
         self.model.train()
         if self.rank == 0:
             self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
             logger.info("\n" + summary)
         synchronize()
+        self.save_ckpt("last_epoch", update_best_ckpt)
+        if self.save_history_ckpt:
+            self.save_ckpt(f"epoch_{self.epoch + 1}")
     def save_ckpt(self, ckpt_name, update_best_ckpt=False):
         if self.rank == 0:
                 "start_epoch": self.epoch + 1,
                 "model": save_model.state_dict(),
                 "optimizer": self.optimizer.state_dict(),
+                "best_ap": self.best_ap,
             }
             save_checkpoint(
                 ckpt_state,

yolox/data/data_augment.py CHANGED Viewed

@@ -39,9 +39,7 @@ def get_aug_params(value, center=0):
     else:
         raise ValueError(
             "Affine params should be either a sequence containing two values\
-                          or single float values. Got {}".format(
-                value
-            )
         )

     else:
         raise ValueError(
             "Affine params should be either a sequence containing two values\
+             or single float values. Got {}".format(value)
         )

yolox/exp/yolox_base.py CHANGED Viewed

@@ -17,57 +17,94 @@ class Exp(BaseExp):
         super().__init__()
         # ---------------- model config ---------------- #
         self.num_classes = 80
         self.depth = 1.00
         self.width = 1.00
-        self.act = 'silu'
         # ---------------- dataloader config ---------------- #
         # set worker to 4 for shorter dataloader init time
         self.data_num_workers = 4
         self.input_size = (640, 640)  # (height, width)
-        # Actual multiscale ranges: [640-5*32, 640+5*32].
-        # To disable multiscale training, set the
-        # self.multiscale_range to 0.
         self.multiscale_range = 5
         # You can uncomment this line to specify a multiscale range
         # self.random_size = (14, 26)
         self.data_dir = None
         self.train_ann = "instances_train2017.json"
         self.val_ann = "instances_val2017.json"
         self.test_ann = "instances_test2017.json"
         # --------------- transform config ----------------- #
         self.mosaic_prob = 1.0
         self.mixup_prob = 1.0
         self.hsv_prob = 1.0
         self.flip_prob = 0.5
         self.degrees = 10.0
         self.translate = 0.1
         self.mosaic_scale = (0.1, 2)
         self.mixup_scale = (0.5, 1.5)
         self.shear = 2.0
-        self.enable_mixup = True
         # --------------  training config --------------------- #
         self.warmup_epochs = 5
         self.max_epoch = 300
         self.warmup_lr = 0
         self.basic_lr_per_img = 0.01 / 64.0
         self.scheduler = "yoloxwarmcos"
         self.no_aug_epochs = 15
-        self.min_lr_ratio = 0.05
         self.ema = True
         self.weight_decay = 5e-4
         self.momentum = 0.9
         self.print_interval = 10
         self.eval_interval = 10
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
         # -----------------  testing config ------------------ #
         self.test_size = (640, 640)
         self.test_conf = 0.01
         self.nmsthre = 0.65
     def get_model(self):

         super().__init__()
         # ---------------- model config ---------------- #
+        # detect classes number of model
         self.num_classes = 80
+        # factor of model depth
         self.depth = 1.00
+        # factor of model width
         self.width = 1.00
+        # activation name. For example, if using "relu", then "silu" will be replaced to "relu".
+        self.act = "silu"
         # ---------------- dataloader config ---------------- #
         # set worker to 4 for shorter dataloader init time
+        # If your training process cost many memory, reduce this value.
         self.data_num_workers = 4
         self.input_size = (640, 640)  # (height, width)
+        # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32].
+        # To disable multiscale training, set the value to 0.
         self.multiscale_range = 5
         # You can uncomment this line to specify a multiscale range
         # self.random_size = (14, 26)
+        # dir of dataset images, if data_dir is None, this project will use `datasets` dir
         self.data_dir = None
+        # name of annotation file for training
         self.train_ann = "instances_train2017.json"
+        # name of annotation file for evaluation
         self.val_ann = "instances_val2017.json"
+        # name of annotation file for testing
         self.test_ann = "instances_test2017.json"
         # --------------- transform config ----------------- #
+        # prob of applying mosaic aug
         self.mosaic_prob = 1.0
+        # prob of applying mixup aug
         self.mixup_prob = 1.0
+        # prob of applying hsv aug
         self.hsv_prob = 1.0
+        # prob of applying flip aug
         self.flip_prob = 0.5
+        # rotation angle range, for example, if set to 2, the true range is (-2, 2)
         self.degrees = 10.0
+        # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1)
         self.translate = 0.1
         self.mosaic_scale = (0.1, 2)
+        # apply mixup aug or not
+        self.enable_mixup = True
         self.mixup_scale = (0.5, 1.5)
+        # shear angle range, for example, if set to 2, the true range is (-2, 2)
         self.shear = 2.0
         # --------------  training config --------------------- #
+        # epoch number used for warmup
         self.warmup_epochs = 5
+        # max training epoch
         self.max_epoch = 300
+        # minimum learning rate during warmup
         self.warmup_lr = 0
+        self.min_lr_ratio = 0.05
+        # learning rate for one image. During traing, lr will multiply batchsize.
         self.basic_lr_per_img = 0.01 / 64.0
+        # name of LRScheduler
         self.scheduler = "yoloxwarmcos"
+        # last #epoch to close augmention like mosaic
         self.no_aug_epochs = 15
+        # apply EMA during training
         self.ema = True
+        # weight decay of optimizer
         self.weight_decay = 5e-4
+        # momentum of optimizer
         self.momentum = 0.9
+        # log period in iter, for example,
+        # if set to 1, user could see log every iteration.
         self.print_interval = 10
+        # eval period in epoch, for example,
+        # if set to 1, model will be evaluate after every epoch.
         self.eval_interval = 10
+        # save history checkpoint or not.
+        # If set to False, yolox will only save latest and best ckpt.
+        self.save_history_ckpt = True
+        # name of experiment
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
         # -----------------  testing config ------------------ #
+        # output image size during evaluation/test
         self.test_size = (640, 640)
+        # confidence threshold during evalulation/test,
+        # boxes whose scores are less than test_conf will be filtered
         self.test_conf = 0.01
+        # nms threshold
         self.nmsthre = 0.65
     def get_model(self):