Update optimizer param group strategy (#7376)
Browse files* Update optimizer param group strategy
Avoid empty lists on missing BathNorm2d models as in https://github.com/ultralytics/yolov5/issues/7375
* fix init
train.py
CHANGED
@@ -150,27 +150,27 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
|
|
150 |
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
|
151 |
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
|
152 |
|
153 |
-
|
154 |
for v in model.modules():
|
155 |
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
|
156 |
-
|
157 |
if isinstance(v, nn.BatchNorm2d): # weight (no decay)
|
158 |
-
|
159 |
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
|
160 |
-
|
161 |
|
162 |
if opt.optimizer == 'Adam':
|
163 |
-
optimizer = Adam(
|
164 |
elif opt.optimizer == 'AdamW':
|
165 |
-
optimizer = AdamW(
|
166 |
else:
|
167 |
-
optimizer = SGD(
|
168 |
|
169 |
-
optimizer.add_param_group({'params':
|
170 |
-
optimizer.add_param_group({'params':
|
171 |
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
|
172 |
-
f"{len(
|
173 |
-
del
|
174 |
|
175 |
# Scheduler
|
176 |
if opt.cos_lr:
|
|
|
150 |
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
|
151 |
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
|
152 |
|
153 |
+
g = [], [], [] # optimizer parameter groups
|
154 |
for v in model.modules():
|
155 |
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
|
156 |
+
g[2].append(v.bias)
|
157 |
if isinstance(v, nn.BatchNorm2d): # weight (no decay)
|
158 |
+
g[1].append(v.weight)
|
159 |
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
|
160 |
+
g[0].append(v.weight)
|
161 |
|
162 |
if opt.optimizer == 'Adam':
|
163 |
+
optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
164 |
elif opt.optimizer == 'AdamW':
|
165 |
+
optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
166 |
else:
|
167 |
+
optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
|
168 |
|
169 |
+
optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay
|
170 |
+
optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights)
|
171 |
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
|
172 |
+
f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
|
173 |
+
del g
|
174 |
|
175 |
# Scheduler
|
176 |
if opt.cos_lr:
|