π [Fix] momentum scaling bugs
Browse files
yolo/utils/model_utils.py
CHANGED
@@ -23,6 +23,8 @@ def lerp(start: float, end: float, step: Union[int, float], total: int = 1):
|
|
23 |
"""
|
24 |
Linearly interpolates between start and end values.
|
25 |
|
|
|
|
|
26 |
Parameters:
|
27 |
start (float): The starting value.
|
28 |
end (float): The ending value.
|
@@ -88,8 +90,8 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
88 |
# 0.937: Start Momentum
|
89 |
# 0.8 : Normal Momemtum
|
90 |
# 3 : The warm up epoch num
|
91 |
-
self.min_mom = lerp(0.
|
92 |
-
self.max_mom = lerp(0.
|
93 |
self.batch_num = batch_num
|
94 |
self.batch_idx = 0
|
95 |
|
@@ -99,8 +101,9 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
|
99 |
for lr_idx, param_group in enumerate(self.param_groups):
|
100 |
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
101 |
param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
|
102 |
-
|
103 |
lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
|
|
|
104 |
return lr_dict
|
105 |
|
106 |
optimizer_class.next_batch = next_batch
|
|
|
23 |
"""
|
24 |
Linearly interpolates between start and end values.
|
25 |
|
26 |
+
start * (1 - step) + end * step
|
27 |
+
|
28 |
Parameters:
|
29 |
start (float): The starting value.
|
30 |
end (float): The ending value.
|
|
|
90 |
# 0.937: Start Momentum
|
91 |
# 0.8 : Normal Momemtum
|
92 |
# 3 : The warm up epoch num
|
93 |
+
self.min_mom = lerp(0.8, 0.937, min(epoch_idx, 3), 3)
|
94 |
+
self.max_mom = lerp(0.8, 0.937, min(epoch_idx + 1, 3), 3)
|
95 |
self.batch_num = batch_num
|
96 |
self.batch_idx = 0
|
97 |
|
|
|
101 |
for lr_idx, param_group in enumerate(self.param_groups):
|
102 |
min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
|
103 |
param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
|
104 |
+
param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
|
105 |
lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
|
106 |
+
lr_dict[f"momentum/{lr_idx}"] = param_group["momentum"]
|
107 |
return lr_dict
|
108 |
|
109 |
optimizer_class.next_batch = next_batch
|