glenn-jocher commited on
Commit
bd2dda8
·
unverified ·
1 Parent(s): 71685cb

Update optimizer param group strategy (#7376)

Browse files

* Update optimizer param group strategy

Avoid empty lists on missing BathNorm2d models as in https://github.com/ultralytics/yolov5/issues/7375

* fix init

Files changed (1) hide show
  1. train.py +11 -11
train.py CHANGED
@@ -150,27 +150,27 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
150
  hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
151
  LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
152
 
153
- g0, g1, g2 = [], [], [] # optimizer parameter groups
154
  for v in model.modules():
155
  if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
156
- g2.append(v.bias)
157
  if isinstance(v, nn.BatchNorm2d): # weight (no decay)
158
- g0.append(v.weight)
159
  elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
160
- g1.append(v.weight)
161
 
162
  if opt.optimizer == 'Adam':
163
- optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
164
  elif opt.optimizer == 'AdamW':
165
- optimizer = AdamW(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
166
  else:
167
- optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
168
 
169
- optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']}) # add g1 with weight_decay
170
- optimizer.add_param_group({'params': g2}) # add g2 (biases)
171
  LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
172
- f"{len(g0)} weight (no decay), {len(g1)} weight, {len(g2)} bias")
173
- del g0, g1, g2
174
 
175
  # Scheduler
176
  if opt.cos_lr:
 
150
  hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
151
  LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
152
 
153
+ g = [], [], [] # optimizer parameter groups
154
  for v in model.modules():
155
  if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
156
+ g[2].append(v.bias)
157
  if isinstance(v, nn.BatchNorm2d): # weight (no decay)
158
+ g[1].append(v.weight)
159
  elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
160
+ g[0].append(v.weight)
161
 
162
  if opt.optimizer == 'Adam':
163
+ optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
164
  elif opt.optimizer == 'AdamW':
165
+ optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
166
  else:
167
+ optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
168
 
169
+ optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay
170
+ optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights)
171
  LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
172
+ f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
173
+ del g
174
 
175
  # Scheduler
176
  if opt.cos_lr: