glenn-jocher commited on
Commit
f1c63e2
·
1 Parent(s): 806e75f

add mosaic and warmup to hyperparameters (#931)

Browse files
data/hyp.finetune.yaml CHANGED
@@ -12,6 +12,9 @@ lr0: 0.0032
12
  lrf: 0.12
13
  momentum: 0.843
14
  weight_decay: 0.00036
 
 
 
15
  giou: 0.0296
16
  cls: 0.243
17
  cls_pw: 0.631
@@ -31,4 +34,5 @@ shear: 0.602
31
  perspective: 0.0
32
  flipud: 0.00856
33
  fliplr: 0.5
 
34
  mixup: 0.243
 
12
  lrf: 0.12
13
  momentum: 0.843
14
  weight_decay: 0.00036
15
+ warmup_epochs: 2.0
16
+ warmup_momentum: 0.5
17
+ warmup_bias_lr: 0.05
18
  giou: 0.0296
19
  cls: 0.243
20
  cls_pw: 0.631
 
34
  perspective: 0.0
35
  flipud: 0.00856
36
  fliplr: 0.5
37
+ mosaic: 1.0
38
  mixup: 0.243
data/hyp.scratch.yaml CHANGED
@@ -7,6 +7,9 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7
  lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
8
  momentum: 0.937 # SGD momentum/Adam beta1
9
  weight_decay: 0.0005 # optimizer weight decay 5e-4
 
 
 
10
  giou: 0.05 # box loss gain
11
  cls: 0.5 # cls loss gain
12
  cls_pw: 1.0 # cls BCELoss positive_weight
@@ -26,4 +29,5 @@ shear: 0.0 # image shear (+/- deg)
26
  perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
27
  flipud: 0.0 # image flip up-down (probability)
28
  fliplr: 0.5 # image flip left-right (probability)
 
29
  mixup: 0.0 # image mixup (probability)
 
7
  lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
8
  momentum: 0.937 # SGD momentum/Adam beta1
9
  weight_decay: 0.0005 # optimizer weight decay 5e-4
10
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
11
+ warmup_momentum: 0.8 # warmup initial momentum
12
+ warmup_bias_lr: 0.1 # warmup initial bias lr
13
  giou: 0.05 # box loss gain
14
  cls: 0.5 # cls loss gain
15
  cls_pw: 1.0 # cls BCELoss positive_weight
 
29
  perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
30
  flipud: 0.0 # image flip up-down (probability)
31
  fliplr: 0.5 # image flip left-right (probability)
32
+ mosaic: 1.0 # image mosaic (probability)
33
  mixup: 0.0 # image mixup (probability)
train.py CHANGED
@@ -202,7 +202,7 @@ def train(hyp, opt, device, tb_writer=None):
202
 
203
  # Start training
204
  t0 = time.time()
205
- nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
206
  # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
207
  maps = np.zeros(nc) # mAP per class
208
  results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
@@ -250,9 +250,9 @@ def train(hyp, opt, device, tb_writer=None):
250
  accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
251
  for j, x in enumerate(optimizer.param_groups):
252
  # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
253
- x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
254
  if 'momentum' in x:
255
- x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])
256
 
257
  # Multi-scale
258
  if opt.multi_scale:
@@ -460,8 +460,11 @@ if __name__ == '__main__':
460
  # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
461
  meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
462
  'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
463
- 'momentum': (0.1, 0.6, 0.98), # SGD momentum/Adam beta1
464
  'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
 
 
 
465
  'giou': (1, 0.02, 0.2), # GIoU loss gain
466
  'cls': (1, 0.2, 4.0), # cls loss gain
467
  'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
@@ -469,7 +472,7 @@ if __name__ == '__main__':
469
  'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
470
  'iou_t': (0, 0.1, 0.7), # IoU training threshold
471
  'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
472
- 'anchors': (1, 2.0, 10.0), # anchors per output grid (0 to ignore)
473
  'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
474
  'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
475
  'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
@@ -481,6 +484,7 @@ if __name__ == '__main__':
481
  'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
482
  'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
483
  'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
 
484
  'mixup': (1, 0.0, 1.0)} # image mixup (probability)
485
 
486
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
@@ -490,7 +494,7 @@ if __name__ == '__main__':
490
  if opt.bucket:
491
  os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
492
 
493
- for _ in range(1): # generations to evolve
494
  if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
495
  # Select parent(s)
496
  parent = 'single' # parent selection method: 'single' or 'weighted'
@@ -505,7 +509,7 @@ if __name__ == '__main__':
505
  x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
506
 
507
  # Mutate
508
- mp, s = 0.9, 0.2 # mutation probability, sigma
509
  npr = np.random
510
  npr.seed(int(time.time()))
511
  g = np.array([x[0] for x in meta.values()]) # gains 0-1
 
202
 
203
  # Start training
204
  t0 = time.time()
205
+ nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
206
  # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
207
  maps = np.zeros(nc) # mAP per class
208
  results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
 
250
  accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
251
  for j, x in enumerate(optimizer.param_groups):
252
  # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
253
+ x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
254
  if 'momentum' in x:
255
+ x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
256
 
257
  # Multi-scale
258
  if opt.multi_scale:
 
460
  # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
461
  meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
462
  'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
463
+ 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
464
  'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
465
+ 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
466
+ 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
467
+ 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
468
  'giou': (1, 0.02, 0.2), # GIoU loss gain
469
  'cls': (1, 0.2, 4.0), # cls loss gain
470
  'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
 
472
  'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
473
  'iou_t': (0, 0.1, 0.7), # IoU training threshold
474
  'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
475
+ 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
476
  'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
477
  'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
478
  'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
 
484
  'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
485
  'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
486
  'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
487
+ 'mosaic': (1, 0.0, 1.0), # image mixup (probability)
488
  'mixup': (1, 0.0, 1.0)} # image mixup (probability)
489
 
490
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
 
494
  if opt.bucket:
495
  os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
496
 
497
+ for _ in range(300): # generations to evolve
498
  if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
499
  # Select parent(s)
500
  parent = 'single' # parent selection method: 'single' or 'weighted'
 
509
  x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
510
 
511
  # Mutate
512
+ mp, s = 0.8, 0.2 # mutation probability, sigma
513
  npr = np.random
514
  npr.seed(int(time.time()))
515
  g = np.array([x[0] for x in meta.values()]) # gains 0-1
utils/datasets.py CHANGED
@@ -516,7 +516,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing
516
  index = self.indices[index]
517
 
518
  hyp = self.hyp
519
- if self.mosaic:
 
520
  # Load mosaic
521
  img, labels = load_mosaic(self, index)
522
  shapes = None
@@ -550,7 +551,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
550
 
551
  if self.augment:
552
  # Augment imagespace
553
- if not self.mosaic:
554
  img, labels = random_perspective(img, labels,
555
  degrees=hyp['degrees'],
556
  translate=hyp['translate'],
 
516
  index = self.indices[index]
517
 
518
  hyp = self.hyp
519
+ mosaic = self.mosaic and random.random() < hyp['mosaic']
520
+ if mosaic:
521
  # Load mosaic
522
  img, labels = load_mosaic(self, index)
523
  shapes = None
 
551
 
552
  if self.augment:
553
  # Augment imagespace
554
+ if not mosaic:
555
  img, labels = random_perspective(img, labels,
556
  degrees=hyp['degrees'],
557
  translate=hyp['translate'],