⚡️ [Update] Optimizer, with more fine LR
Browse files- yolo/config/task/train.yaml +1 -0
- yolo/tools/solver.py +2 -1
- yolo/utils/model_utils.py +23 -7
yolo/config/task/train.yaml
CHANGED
@@ -22,6 +22,7 @@ optimizer:
|
|
22 |
lr: 0.01
|
23 |
weight_decay: 0.0005
|
24 |
momentum: 0.937
|
|
|
25 |
|
26 |
loss:
|
27 |
objective:
|
|
|
22 |
lr: 0.01
|
23 |
weight_decay: 0.0005
|
24 |
momentum: 0.937
|
25 |
+
nesterov: true
|
26 |
|
27 |
loss:
|
28 |
objective:
|
yolo/tools/solver.py
CHANGED
@@ -87,8 +87,9 @@ class ModelTrainer:
|
|
87 |
self.model.train()
|
88 |
total_loss = defaultdict(lambda: torch.tensor(0.0, device=self.device))
|
89 |
total_samples = 0
|
90 |
-
|
91 |
for batch_size, images, targets, *_ in dataloader:
|
|
|
92 |
loss_each = self.train_one_batch(images, targets)
|
93 |
|
94 |
for loss_name, loss_val in loss_each.items():
|
|
|
87 |
self.model.train()
|
88 |
total_loss = defaultdict(lambda: torch.tensor(0.0, device=self.device))
|
89 |
total_samples = 0
|
90 |
+
self.optimizer.next_epoch(len(dataloader))
|
91 |
for batch_size, images, targets, *_ in dataloader:
|
92 |
+
self.optimizer.next_batch()
|
93 |
loss_each = self.train_one_batch(images, targets)
|
94 |
|
95 |
for loss_name, loss_val in loss_each.items():
|
yolo/utils/model_utils.py
CHANGED
@@ -52,11 +52,27 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
52 |
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
53 |
|
54 |
model_parameters = [
|
55 |
-
{"params": bias_params, "
|
56 |
-
{"params": conv_params
|
57 |
-
{"params": norm_params, "weight_decay":
|
58 |
]
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
|
62 |
def create_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LRScheduler:
|
@@ -69,9 +85,9 @@ def create_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LR
|
|
69 |
schedule = scheduler_class(optimizer, **schedule_cfg.args)
|
70 |
if hasattr(schedule_cfg, "warmup"):
|
71 |
wepoch = schedule_cfg.warmup.epochs
|
72 |
-
lambda1 = lambda epoch:
|
73 |
-
lambda2 = lambda epoch: 10 - 9 * (epoch / wepoch) if epoch < wepoch else 1
|
74 |
-
warmup_schedule = LambdaLR(optimizer, lr_lambda=[
|
75 |
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[2])
|
76 |
return schedule
|
77 |
|
|
|
52 |
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
53 |
|
54 |
model_parameters = [
|
55 |
+
{"params": bias_params, "weight_decay": 0},
|
56 |
+
{"params": conv_params},
|
57 |
+
{"params": norm_params, "weight_decay": 0},
|
58 |
]
|
59 |
+
|
60 |
+
def next_epoch(self, batch_num):
|
61 |
+
self.min_lr = self.max_lr
|
62 |
+
self.max_lr = [param["lr"] for param in self.param_groups]
|
63 |
+
self.batch_num = batch_num
|
64 |
+
self.batch_idx = 0
|
65 |
+
|
66 |
+
def next_batch(self):
|
67 |
+
for lr_idx, param_group in enumerate(self.param_groups):
|
68 |
+
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
69 |
+
param_group["lr"] = min_lr + (self.batch_idx + 1) * (max_lr - min_lr) / self.batch_num
|
70 |
+
|
71 |
+
optimizer_class.next_batch = next_batch
|
72 |
+
optimizer_class.next_epoch = next_epoch
|
73 |
+
optimizer = optimizer_class(model_parameters, **optim_cfg.args)
|
74 |
+
optimizer.max_lr = [0.1, 0, 0]
|
75 |
+
return optimizer
|
76 |
|
77 |
|
78 |
def create_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LRScheduler:
|
|
|
85 |
schedule = scheduler_class(optimizer, **schedule_cfg.args)
|
86 |
if hasattr(schedule_cfg, "warmup"):
|
87 |
wepoch = schedule_cfg.warmup.epochs
|
88 |
+
lambda1 = lambda epoch: (epoch + 1) / wepoch if epoch < wepoch else 1
|
89 |
+
lambda2 = lambda epoch: 10 - 9 * ((epoch + 1) / wepoch) if epoch < wepoch else 1
|
90 |
+
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda2, lambda1, lambda1])
|
91 |
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[2])
|
92 |
return schedule
|
93 |
|