Commit
·
f1c63e2
1
Parent(s):
806e75f
add mosaic and warmup to hyperparameters (#931)
Browse files- data/hyp.finetune.yaml +4 -0
- data/hyp.scratch.yaml +4 -0
- train.py +11 -7
- utils/datasets.py +3 -2
data/hyp.finetune.yaml
CHANGED
@@ -12,6 +12,9 @@ lr0: 0.0032
|
|
12 |
lrf: 0.12
|
13 |
momentum: 0.843
|
14 |
weight_decay: 0.00036
|
|
|
|
|
|
|
15 |
giou: 0.0296
|
16 |
cls: 0.243
|
17 |
cls_pw: 0.631
|
@@ -31,4 +34,5 @@ shear: 0.602
|
|
31 |
perspective: 0.0
|
32 |
flipud: 0.00856
|
33 |
fliplr: 0.5
|
|
|
34 |
mixup: 0.243
|
|
|
12 |
lrf: 0.12
|
13 |
momentum: 0.843
|
14 |
weight_decay: 0.00036
|
15 |
+
warmup_epochs: 2.0
|
16 |
+
warmup_momentum: 0.5
|
17 |
+
warmup_bias_lr: 0.05
|
18 |
giou: 0.0296
|
19 |
cls: 0.243
|
20 |
cls_pw: 0.631
|
|
|
34 |
perspective: 0.0
|
35 |
flipud: 0.00856
|
36 |
fliplr: 0.5
|
37 |
+
mosaic: 1.0
|
38 |
mixup: 0.243
|
data/hyp.scratch.yaml
CHANGED
@@ -7,6 +7,9 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
|
7 |
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
|
8 |
momentum: 0.937 # SGD momentum/Adam beta1
|
9 |
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
|
|
|
|
|
|
10 |
giou: 0.05 # box loss gain
|
11 |
cls: 0.5 # cls loss gain
|
12 |
cls_pw: 1.0 # cls BCELoss positive_weight
|
@@ -26,4 +29,5 @@ shear: 0.0 # image shear (+/- deg)
|
|
26 |
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
27 |
flipud: 0.0 # image flip up-down (probability)
|
28 |
fliplr: 0.5 # image flip left-right (probability)
|
|
|
29 |
mixup: 0.0 # image mixup (probability)
|
|
|
7 |
lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
|
8 |
momentum: 0.937 # SGD momentum/Adam beta1
|
9 |
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
10 |
+
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
11 |
+
warmup_momentum: 0.8 # warmup initial momentum
|
12 |
+
warmup_bias_lr: 0.1 # warmup initial bias lr
|
13 |
giou: 0.05 # box loss gain
|
14 |
cls: 0.5 # cls loss gain
|
15 |
cls_pw: 1.0 # cls BCELoss positive_weight
|
|
|
29 |
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
30 |
flipud: 0.0 # image flip up-down (probability)
|
31 |
fliplr: 0.5 # image flip left-right (probability)
|
32 |
+
mosaic: 1.0 # image mosaic (probability)
|
33 |
mixup: 0.0 # image mixup (probability)
|
train.py
CHANGED
@@ -202,7 +202,7 @@ def train(hyp, opt, device, tb_writer=None):
|
|
202 |
|
203 |
# Start training
|
204 |
t0 = time.time()
|
205 |
-
nw = max(
|
206 |
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
|
207 |
maps = np.zeros(nc) # mAP per class
|
208 |
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
|
@@ -250,9 +250,9 @@ def train(hyp, opt, device, tb_writer=None):
|
|
250 |
accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
|
251 |
for j, x in enumerate(optimizer.param_groups):
|
252 |
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
253 |
-
x['lr'] = np.interp(ni, xi, [
|
254 |
if 'momentum' in x:
|
255 |
-
x['momentum'] = np.interp(ni, xi, [
|
256 |
|
257 |
# Multi-scale
|
258 |
if opt.multi_scale:
|
@@ -460,8 +460,11 @@ if __name__ == '__main__':
|
|
460 |
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
|
461 |
meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
|
462 |
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
|
463 |
-
'momentum': (0.
|
464 |
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
|
|
|
|
|
|
|
465 |
'giou': (1, 0.02, 0.2), # GIoU loss gain
|
466 |
'cls': (1, 0.2, 4.0), # cls loss gain
|
467 |
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
|
@@ -469,7 +472,7 @@ if __name__ == '__main__':
|
|
469 |
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
|
470 |
'iou_t': (0, 0.1, 0.7), # IoU training threshold
|
471 |
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
|
472 |
-
'anchors': (
|
473 |
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
|
474 |
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
475 |
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
@@ -481,6 +484,7 @@ if __name__ == '__main__':
|
|
481 |
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
|
482 |
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
|
483 |
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
|
|
|
484 |
'mixup': (1, 0.0, 1.0)} # image mixup (probability)
|
485 |
|
486 |
assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
|
@@ -490,7 +494,7 @@ if __name__ == '__main__':
|
|
490 |
if opt.bucket:
|
491 |
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
|
492 |
|
493 |
-
for _ in range(
|
494 |
if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
|
495 |
# Select parent(s)
|
496 |
parent = 'single' # parent selection method: 'single' or 'weighted'
|
@@ -505,7 +509,7 @@ if __name__ == '__main__':
|
|
505 |
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
506 |
|
507 |
# Mutate
|
508 |
-
mp, s = 0.
|
509 |
npr = np.random
|
510 |
npr.seed(int(time.time()))
|
511 |
g = np.array([x[0] for x in meta.values()]) # gains 0-1
|
|
|
202 |
|
203 |
# Start training
|
204 |
t0 = time.time()
|
205 |
+
nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
|
206 |
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
|
207 |
maps = np.zeros(nc) # mAP per class
|
208 |
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
|
|
|
250 |
accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
|
251 |
for j, x in enumerate(optimizer.param_groups):
|
252 |
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
253 |
+
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
|
254 |
if 'momentum' in x:
|
255 |
+
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
|
256 |
|
257 |
# Multi-scale
|
258 |
if opt.multi_scale:
|
|
|
460 |
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
|
461 |
meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
|
462 |
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
|
463 |
+
'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
|
464 |
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
|
465 |
+
'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
|
466 |
+
'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
|
467 |
+
'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
|
468 |
'giou': (1, 0.02, 0.2), # GIoU loss gain
|
469 |
'cls': (1, 0.2, 4.0), # cls loss gain
|
470 |
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
|
|
|
472 |
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
|
473 |
'iou_t': (0, 0.1, 0.7), # IoU training threshold
|
474 |
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
|
475 |
+
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
|
476 |
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
|
477 |
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
478 |
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
|
|
484 |
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
|
485 |
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
|
486 |
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
|
487 |
+
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
|
488 |
'mixup': (1, 0.0, 1.0)} # image mixup (probability)
|
489 |
|
490 |
assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
|
|
|
494 |
if opt.bucket:
|
495 |
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
|
496 |
|
497 |
+
for _ in range(300): # generations to evolve
|
498 |
if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
|
499 |
# Select parent(s)
|
500 |
parent = 'single' # parent selection method: 'single' or 'weighted'
|
|
|
509 |
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
510 |
|
511 |
# Mutate
|
512 |
+
mp, s = 0.8, 0.2 # mutation probability, sigma
|
513 |
npr = np.random
|
514 |
npr.seed(int(time.time()))
|
515 |
g = np.array([x[0] for x in meta.values()]) # gains 0-1
|
utils/datasets.py
CHANGED
@@ -516,7 +516,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
516 |
index = self.indices[index]
|
517 |
|
518 |
hyp = self.hyp
|
519 |
-
|
|
|
520 |
# Load mosaic
|
521 |
img, labels = load_mosaic(self, index)
|
522 |
shapes = None
|
@@ -550,7 +551,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
550 |
|
551 |
if self.augment:
|
552 |
# Augment imagespace
|
553 |
-
if not
|
554 |
img, labels = random_perspective(img, labels,
|
555 |
degrees=hyp['degrees'],
|
556 |
translate=hyp['translate'],
|
|
|
516 |
index = self.indices[index]
|
517 |
|
518 |
hyp = self.hyp
|
519 |
+
mosaic = self.mosaic and random.random() < hyp['mosaic']
|
520 |
+
if mosaic:
|
521 |
# Load mosaic
|
522 |
img, labels = load_mosaic(self, index)
|
523 |
shapes = None
|
|
|
551 |
|
552 |
if self.augment:
|
553 |
# Augment imagespace
|
554 |
+
if not mosaic:
|
555 |
img, labels = random_perspective(img, labels,
|
556 |
degrees=hyp['degrees'],
|
557 |
translate=hyp['translate'],
|