Copy-Paste augmentation for YOLOv5 (#3845)
Browse files* Copy-paste augmentation initial commit
* if any segments
* Add obscuration rejection
* Add copy_paste hyperparameter
* Update comments
- data/hyps/hyp.finetune.yaml +1 -0
- data/hyps/hyp.finetune_objects365.yaml +1 -0
- data/hyps/hyp.scratch-p6.yaml +1 -0
- data/hyps/hyp.scratch.yaml +1 -0
- train.py +3 -2
- utils/datasets.py +26 -18
- utils/metrics.py +25 -1
data/hyps/hyp.finetune.yaml
CHANGED
@@ -36,3 +36,4 @@ flipud: 0.00856
|
|
36 |
fliplr: 0.5
|
37 |
mosaic: 1.0
|
38 |
mixup: 0.243
|
|
|
|
36 |
fliplr: 0.5
|
37 |
mosaic: 1.0
|
38 |
mixup: 0.243
|
39 |
+
copy_paste: 0.0
|
data/hyps/hyp.finetune_objects365.yaml
CHANGED
@@ -26,3 +26,4 @@ flipud: 0.0
|
|
26 |
fliplr: 0.5
|
27 |
mosaic: 1.0
|
28 |
mixup: 0.0
|
|
|
|
26 |
fliplr: 0.5
|
27 |
mosaic: 1.0
|
28 |
mixup: 0.0
|
29 |
+
copy_paste: 0.0
|
data/hyps/hyp.scratch-p6.yaml
CHANGED
@@ -31,3 +31,4 @@ flipud: 0.0 # image flip up-down (probability)
|
|
31 |
fliplr: 0.5 # image flip left-right (probability)
|
32 |
mosaic: 1.0 # image mosaic (probability)
|
33 |
mixup: 0.0 # image mixup (probability)
|
|
|
|
31 |
fliplr: 0.5 # image flip left-right (probability)
|
32 |
mosaic: 1.0 # image mosaic (probability)
|
33 |
mixup: 0.0 # image mixup (probability)
|
34 |
+
copy_paste: 0.0 # segment copy-paste (probability)
|
data/hyps/hyp.scratch.yaml
CHANGED
@@ -31,3 +31,4 @@ flipud: 0.0 # image flip up-down (probability)
|
|
31 |
fliplr: 0.5 # image flip left-right (probability)
|
32 |
mosaic: 1.0 # image mosaic (probability)
|
33 |
mixup: 0.0 # image mixup (probability)
|
|
|
|
31 |
fliplr: 0.5 # image flip left-right (probability)
|
32 |
mosaic: 1.0 # image mosaic (probability)
|
33 |
mixup: 0.0 # image mixup (probability)
|
34 |
+
copy_paste: 0.0 # segment copy-paste (probability)
|
train.py
CHANGED
@@ -6,7 +6,6 @@ Usage:
|
|
6 |
|
7 |
import argparse
|
8 |
import logging
|
9 |
-
import math
|
10 |
import os
|
11 |
import random
|
12 |
import sys
|
@@ -16,6 +15,7 @@ from copy import deepcopy
|
|
16 |
from pathlib import Path
|
17 |
from threading import Thread
|
18 |
|
|
|
19 |
import numpy as np
|
20 |
import torch.distributed as dist
|
21 |
import torch.nn as nn
|
@@ -591,7 +591,8 @@ def main(opt):
|
|
591 |
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
|
592 |
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
|
593 |
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
|
594 |
-
'mixup': (1, 0.0, 1.0)
|
|
|
595 |
|
596 |
with open(opt.hyp) as f:
|
597 |
hyp = yaml.safe_load(f) # load hyps dict
|
|
|
6 |
|
7 |
import argparse
|
8 |
import logging
|
|
|
9 |
import os
|
10 |
import random
|
11 |
import sys
|
|
|
15 |
from pathlib import Path
|
16 |
from threading import Thread
|
17 |
|
18 |
+
import math
|
19 |
import numpy as np
|
20 |
import torch.distributed as dist
|
21 |
import torch.nn as nn
|
|
|
591 |
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
|
592 |
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
|
593 |
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
|
594 |
+
'mixup': (1, 0.0, 1.0), # image mixup (probability)
|
595 |
+
'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability)
|
596 |
|
597 |
with open(opt.hyp) as f:
|
598 |
hyp = yaml.safe_load(f) # load hyps dict
|
utils/datasets.py
CHANGED
@@ -25,6 +25,7 @@ from tqdm import tqdm
|
|
25 |
|
26 |
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
|
27 |
xyn2xy, segment2box, segments2boxes, resample_segments, clean_str
|
|
|
28 |
from utils.torch_utils import torch_distributed_zero_first
|
29 |
|
30 |
# Parameters
|
@@ -683,6 +684,7 @@ def load_mosaic(self, index):
|
|
683 |
# img4, labels4 = replicate(img4, labels4) # replicate
|
684 |
|
685 |
# Augment
|
|
|
686 |
img4, labels4 = random_perspective(img4, labels4, segments4,
|
687 |
degrees=self.hyp['degrees'],
|
688 |
translate=self.hyp['translate'],
|
@@ -907,6 +909,30 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s
|
|
907 |
return img, targets
|
908 |
|
909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
911 |
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
912 |
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
@@ -919,24 +945,6 @@ def cutout(image, labels):
|
|
919 |
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
920 |
h, w = image.shape[:2]
|
921 |
|
922 |
-
def bbox_ioa(box1, box2):
|
923 |
-
# Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
|
924 |
-
box2 = box2.transpose()
|
925 |
-
|
926 |
-
# Get the coordinates of bounding boxes
|
927 |
-
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
928 |
-
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
929 |
-
|
930 |
-
# Intersection area
|
931 |
-
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
|
932 |
-
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
|
933 |
-
|
934 |
-
# box2 area
|
935 |
-
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
|
936 |
-
|
937 |
-
# Intersection over box2 area
|
938 |
-
return inter_area / box2_area
|
939 |
-
|
940 |
# create random masks
|
941 |
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
942 |
for s in scales:
|
|
|
25 |
|
26 |
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
|
27 |
xyn2xy, segment2box, segments2boxes, resample_segments, clean_str
|
28 |
+
from utils.metrics import bbox_ioa
|
29 |
from utils.torch_utils import torch_distributed_zero_first
|
30 |
|
31 |
# Parameters
|
|
|
684 |
# img4, labels4 = replicate(img4, labels4) # replicate
|
685 |
|
686 |
# Augment
|
687 |
+
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste'])
|
688 |
img4, labels4 = random_perspective(img4, labels4, segments4,
|
689 |
degrees=self.hyp['degrees'],
|
690 |
translate=self.hyp['translate'],
|
|
|
909 |
return img, targets
|
910 |
|
911 |
|
912 |
+
def copy_paste(img, labels, segments, probability=0.5):
|
913 |
+
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
914 |
+
n = len(segments)
|
915 |
+
if probability and n:
|
916 |
+
h, w, c = img.shape # height, width, channels
|
917 |
+
im_new = np.zeros(img.shape, np.uint8)
|
918 |
+
for j in random.sample(range(n), k=round(probability * n)):
|
919 |
+
l, s = labels[j], segments[j]
|
920 |
+
box = w - l[3], l[2], w - l[1], l[4]
|
921 |
+
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
922 |
+
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
|
923 |
+
labels = np.concatenate((labels, [[l[0], *box]]), 0)
|
924 |
+
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
|
925 |
+
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
|
926 |
+
|
927 |
+
result = cv2.bitwise_and(src1=img, src2=im_new)
|
928 |
+
result = cv2.flip(result, 1) # augment segments (flip left-right)
|
929 |
+
i = result > 0 # pixels to replace
|
930 |
+
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
|
931 |
+
img[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
|
932 |
+
|
933 |
+
return img, labels, segments
|
934 |
+
|
935 |
+
|
936 |
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
937 |
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
938 |
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
|
|
945 |
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
946 |
h, w = image.shape[:2]
|
947 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
948 |
# create random masks
|
949 |
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
950 |
for s in scales:
|
utils/metrics.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# Model validation metrics
|
2 |
|
3 |
-
import math
|
4 |
import warnings
|
5 |
from pathlib import Path
|
6 |
|
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import numpy as np
|
9 |
import torch
|
@@ -253,6 +253,30 @@ def box_iou(box1, box2):
|
|
253 |
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
|
254 |
|
255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
def wh_iou(wh1, wh2):
|
257 |
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
|
258 |
wh1 = wh1[:, None] # [N,1,2]
|
|
|
1 |
# Model validation metrics
|
2 |
|
|
|
3 |
import warnings
|
4 |
from pathlib import Path
|
5 |
|
6 |
+
import math
|
7 |
import matplotlib.pyplot as plt
|
8 |
import numpy as np
|
9 |
import torch
|
|
|
253 |
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
|
254 |
|
255 |
|
256 |
+
def bbox_ioa(box1, box2, eps=1E-7):
|
257 |
+
""" Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
|
258 |
+
box1: np.array of shape(4)
|
259 |
+
box2: np.array of shape(nx4)
|
260 |
+
returns: np.array of shape(n)
|
261 |
+
"""
|
262 |
+
|
263 |
+
box2 = box2.transpose()
|
264 |
+
|
265 |
+
# Get the coordinates of bounding boxes
|
266 |
+
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
267 |
+
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
268 |
+
|
269 |
+
# Intersection area
|
270 |
+
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
|
271 |
+
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
|
272 |
+
|
273 |
+
# box2 area
|
274 |
+
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
|
275 |
+
|
276 |
+
# Intersection over box2 area
|
277 |
+
return inter_area / box2_area
|
278 |
+
|
279 |
+
|
280 |
def wh_iou(wh1, wh2):
|
281 |
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
|
282 |
wh1 = wh1[:, None] # [N,1,2]
|