🔥 [Remove] & fix typo of momentum schedule
Browse files- yolo/config/config.py +1 -0
- yolo/utils/model_utils.py +7 -8
yolo/config/config.py
CHANGED
@@ -66,6 +66,7 @@ class DataConfig:
|
|
66 |
class OptimizerArgs:
|
67 |
lr: float
|
68 |
weight_decay: float
|
|
|
69 |
|
70 |
|
71 |
@dataclass
|
|
|
66 |
class OptimizerArgs:
|
67 |
lr: float
|
68 |
weight_decay: float
|
69 |
+
momentum: float
|
70 |
|
71 |
|
72 |
@dataclass
|
yolo/utils/model_utils.py
CHANGED
@@ -8,7 +8,6 @@ import torch
|
|
8 |
import torch.distributed as dist
|
9 |
from lightning import LightningModule, Trainer
|
10 |
from lightning.pytorch.callbacks import Callback
|
11 |
-
from lightning.pytorch.utilities import rank_zero_only
|
12 |
from omegaconf import ListConfig
|
13 |
from torch import Tensor, no_grad
|
14 |
from torch.optim import Optimizer
|
@@ -77,9 +76,9 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
77 |
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
78 |
|
79 |
model_parameters = [
|
80 |
-
{"params": bias_params, "momentum": 0.
|
81 |
-
{"params": conv_params, "momentum": 0.
|
82 |
-
{"params": norm_params, "momentum": 0.
|
83 |
]
|
84 |
|
85 |
def next_epoch(self, batch_num, epoch_idx):
|
@@ -89,8 +88,8 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
89 |
# 0.937: Start Momentum
|
90 |
# 0.8 : Normal Momemtum
|
91 |
# 3 : The warm up epoch num
|
92 |
-
self.min_mom = lerp(0.937, 0.8,
|
93 |
-
self.max_mom = lerp(0.937, 0.8,
|
94 |
self.batch_num = batch_num
|
95 |
self.batch_idx = 0
|
96 |
|
@@ -100,7 +99,7 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
100 |
for lr_idx, param_group in enumerate(self.param_groups):
|
101 |
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
102 |
param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
|
103 |
-
param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
|
104 |
lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
|
105 |
return lr_dict
|
106 |
|
@@ -125,7 +124,7 @@ def create_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LR
|
|
125 |
lambda1 = lambda epoch: (epoch + 1) / wepoch if epoch < wepoch else 1
|
126 |
lambda2 = lambda epoch: 10 - 9 * ((epoch + 1) / wepoch) if epoch < wepoch else 1
|
127 |
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda2, lambda1, lambda1])
|
128 |
-
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[
|
129 |
return schedule
|
130 |
|
131 |
|
|
|
8 |
import torch.distributed as dist
|
9 |
from lightning import LightningModule, Trainer
|
10 |
from lightning.pytorch.callbacks import Callback
|
|
|
11 |
from omegaconf import ListConfig
|
12 |
from torch import Tensor, no_grad
|
13 |
from torch.optim import Optimizer
|
|
|
76 |
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
77 |
|
78 |
model_parameters = [
|
79 |
+
{"params": bias_params, "momentum": 0.937, "weight_decay": 0},
|
80 |
+
{"params": conv_params, "momentum": 0.937},
|
81 |
+
{"params": norm_params, "momentum": 0.937, "weight_decay": 0},
|
82 |
]
|
83 |
|
84 |
def next_epoch(self, batch_num, epoch_idx):
|
|
|
88 |
# 0.937: Start Momentum
|
89 |
# 0.8 : Normal Momemtum
|
90 |
# 3 : The warm up epoch num
|
91 |
+
self.min_mom = lerp(0.937, 0.8, min(epoch_idx, 3), 3)
|
92 |
+
self.max_mom = lerp(0.937, 0.8, min(epoch_idx + 1, 3), 3)
|
93 |
self.batch_num = batch_num
|
94 |
self.batch_idx = 0
|
95 |
|
|
|
99 |
for lr_idx, param_group in enumerate(self.param_groups):
|
100 |
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
101 |
param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
|
102 |
+
# param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
|
103 |
lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
|
104 |
return lr_dict
|
105 |
|
|
|
124 |
lambda1 = lambda epoch: (epoch + 1) / wepoch if epoch < wepoch else 1
|
125 |
lambda2 = lambda epoch: 10 - 9 * ((epoch + 1) / wepoch) if epoch < wepoch else 1
|
126 |
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda2, lambda1, lambda1])
|
127 |
+
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[wepoch - 1])
|
128 |
return schedule
|
129 |
|
130 |
|