Commit
·
93a6765
1
Parent(s):
77fb8ee
update mosaic border
Browse files- utils/datasets.py +17 -13
utils/datasets.py
CHANGED
@@ -62,7 +62,7 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
|
|
62 |
|
63 |
|
64 |
class LoadImages: # for inference
|
65 |
-
def __init__(self, path, img_size=
|
66 |
path = str(Path(path)) # os-agnostic
|
67 |
files = []
|
68 |
if os.path.isdir(path):
|
@@ -139,7 +139,7 @@ class LoadImages: # for inference
|
|
139 |
|
140 |
|
141 |
class LoadWebcam: # for inference
|
142 |
-
def __init__(self, pipe=0, img_size=
|
143 |
self.img_size = img_size
|
144 |
|
145 |
if pipe == '0':
|
@@ -204,7 +204,7 @@ class LoadWebcam: # for inference
|
|
204 |
|
205 |
|
206 |
class LoadStreams: # multiple IP or RTSP cameras
|
207 |
-
def __init__(self, sources='streams.txt', img_size=
|
208 |
self.mode = 'images'
|
209 |
self.img_size = img_size
|
210 |
|
@@ -277,7 +277,7 @@ class LoadStreams: # multiple IP or RTSP cameras
|
|
277 |
|
278 |
|
279 |
class LoadImagesAndLabels(Dataset): # for training/testing
|
280 |
-
def __init__(self, path, img_size=
|
281 |
cache_images=False, single_cls=False, stride=32, pad=0.0):
|
282 |
try:
|
283 |
path = str(Path(path)) # os-agnostic
|
@@ -307,6 +307,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
307 |
self.image_weights = image_weights
|
308 |
self.rect = False if image_weights else rect
|
309 |
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
|
|
|
|
|
|
310 |
|
311 |
# Define labels
|
312 |
self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
|
@@ -585,7 +588,8 @@ def load_mosaic(self, index):
|
|
585 |
|
586 |
labels4 = []
|
587 |
s = self.img_size
|
588 |
-
|
|
|
589 |
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
|
590 |
for i, index in enumerate(indices):
|
591 |
# Load image
|
@@ -633,12 +637,12 @@ def load_mosaic(self, index):
|
|
633 |
translate=self.hyp['translate'],
|
634 |
scale=self.hyp['scale'],
|
635 |
shear=self.hyp['shear'],
|
636 |
-
border
|
637 |
|
638 |
return img4, labels4
|
639 |
|
640 |
|
641 |
-
def letterbox(img, new_shape=(
|
642 |
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
643 |
shape = img.shape[:2] # current shape [height, width]
|
644 |
if isinstance(new_shape, int):
|
@@ -671,13 +675,13 @@ def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale
|
|
671 |
return img, ratio, (dw, dh)
|
672 |
|
673 |
|
674 |
-
def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
|
675 |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
676 |
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
677 |
# targets = [cls, xyxy]
|
678 |
|
679 |
-
height = img.shape[0] + border * 2
|
680 |
-
width = img.shape[1] + border * 2
|
681 |
|
682 |
# Rotation and Scale
|
683 |
R = np.eye(3)
|
@@ -689,8 +693,8 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
|
|
689 |
|
690 |
# Translation
|
691 |
T = np.eye(3)
|
692 |
-
T[0, 2] = random.uniform(-translate, translate) * img.shape[
|
693 |
-
T[1, 2] = random.uniform(-translate, translate) * img.shape[
|
694 |
|
695 |
# Shear
|
696 |
S = np.eye(3)
|
@@ -699,7 +703,7 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
|
|
699 |
|
700 |
# Combined rotation matrix
|
701 |
M = S @ T @ R # ORDER IS IMPORTANT HERE!!
|
702 |
-
if (border != 0) or (M != np.eye(3)).any(): # image changed
|
703 |
img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
|
704 |
|
705 |
# Transform label coordinates
|
|
|
62 |
|
63 |
|
64 |
class LoadImages: # for inference
|
65 |
+
def __init__(self, path, img_size=640):
|
66 |
path = str(Path(path)) # os-agnostic
|
67 |
files = []
|
68 |
if os.path.isdir(path):
|
|
|
139 |
|
140 |
|
141 |
class LoadWebcam: # for inference
|
142 |
+
def __init__(self, pipe=0, img_size=640):
|
143 |
self.img_size = img_size
|
144 |
|
145 |
if pipe == '0':
|
|
|
204 |
|
205 |
|
206 |
class LoadStreams: # multiple IP or RTSP cameras
|
207 |
+
def __init__(self, sources='streams.txt', img_size=640):
|
208 |
self.mode = 'images'
|
209 |
self.img_size = img_size
|
210 |
|
|
|
277 |
|
278 |
|
279 |
class LoadImagesAndLabels(Dataset): # for training/testing
|
280 |
+
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
|
281 |
cache_images=False, single_cls=False, stride=32, pad=0.0):
|
282 |
try:
|
283 |
path = str(Path(path)) # os-agnostic
|
|
|
307 |
self.image_weights = image_weights
|
308 |
self.rect = False if image_weights else rect
|
309 |
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
310 |
+
self.mosaic_border = None
|
311 |
+
self.stride = stride
|
312 |
+
|
313 |
|
314 |
# Define labels
|
315 |
self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
|
|
|
588 |
|
589 |
labels4 = []
|
590 |
s = self.img_size
|
591 |
+
border = [-s // 2, -s // 2] # self.mosaic_border
|
592 |
+
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in border] # mosaic center x, y
|
593 |
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
|
594 |
for i, index in enumerate(indices):
|
595 |
# Load image
|
|
|
637 |
translate=self.hyp['translate'],
|
638 |
scale=self.hyp['scale'],
|
639 |
shear=self.hyp['shear'],
|
640 |
+
border=border) # border to remove
|
641 |
|
642 |
return img4, labels4
|
643 |
|
644 |
|
645 |
+
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
|
646 |
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
647 |
shape = img.shape[:2] # current shape [height, width]
|
648 |
if isinstance(new_shape, int):
|
|
|
675 |
return img, ratio, (dw, dh)
|
676 |
|
677 |
|
678 |
+
def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
|
679 |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
680 |
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
681 |
# targets = [cls, xyxy]
|
682 |
|
683 |
+
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
|
684 |
+
width = img.shape[1] + border[1] * 2
|
685 |
|
686 |
# Rotation and Scale
|
687 |
R = np.eye(3)
|
|
|
693 |
|
694 |
# Translation
|
695 |
T = np.eye(3)
|
696 |
+
T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
|
697 |
+
T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
|
698 |
|
699 |
# Shear
|
700 |
S = np.eye(3)
|
|
|
703 |
|
704 |
# Combined rotation matrix
|
705 |
M = S @ T @ R # ORDER IS IMPORTANT HERE!!
|
706 |
+
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
707 |
img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
|
708 |
|
709 |
# Transform label coordinates
|