Spaces:

xiang-wuu
/

yolov5

Runtime error

App Files Files Community

glenn-jocher commited on Oct 15, 2021

Commit

991c654

unverified ·

1 Parent(s): fc36064

Autofix duplicate label handling (#5210)

Browse files

* Autofix duplicate labels

PR changes duplicate label handling from report error and ignore image-label pair to report warning and autofix image-label pair.

This should fix this common issue for users and allow everyone to get started and get a model trained faster and easier than before.

* sign fix

* Cleanup

* Increment cache version

* all to any fix

Files changed (1) hide show

utils/datasets.py +12 -8

utils/datasets.py CHANGED Viewed

@@ -375,7 +375,7 @@ def img2label_paths(img_paths):
 class LoadImagesAndLabels(Dataset):
     # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
-    cache_version = 0.5  # dataset labels *.cache version
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
                  cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
@@ -897,7 +897,7 @@ def verify_image_label(args):
                 f.seek(-2, 2)
                 if f.read() != b'\xff\xd9':  # corrupt JPEG
                     Image.open(im_file).save(im_file, format='JPEG', subsampling=0, quality=100)  # re-save image
-                    msg = f'{prefix}WARNING: corrupt JPEG restored and saved {im_file}'
         # verify labels
         if os.path.isfile(lb_file):
@@ -909,11 +909,15 @@ def verify_image_label(args):
                     segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
                     l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
                 l = np.array(l, dtype=np.float32)
-            if len(l):
-                assert l.shape[1] == 5, 'labels require 5 columns each'
-                assert (l >= 0).all(), 'negative labels'
-                assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
-                assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
             else:
                 ne = 1  # label empty
                 l = np.zeros((0, 5), dtype=np.float32)
@@ -923,7 +927,7 @@ def verify_image_label(args):
         return im_file, l, shape, segments, nm, nf, ne, nc, msg
     except Exception as e:
         nc = 1
-        msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
         return [None, None, None, None, nm, nf, ne, nc, msg]

 class LoadImagesAndLabels(Dataset):
     # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
+    cache_version = 0.6  # dataset labels *.cache version
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
                  cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
                 f.seek(-2, 2)
                 if f.read() != b'\xff\xd9':  # corrupt JPEG
                     Image.open(im_file).save(im_file, format='JPEG', subsampling=0, quality=100)  # re-save image
+                    msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'
         # verify labels
         if os.path.isfile(lb_file):
                     segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
                     l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
                 l = np.array(l, dtype=np.float32)
+            nl = len(l)
+            if nl:
+                assert l.shape[1] == 5, f'labels require 5 columns, {l.shape[1]} columns detected'
+                assert (l >= 0).all(), f'negative label values {l[l < 0]}'
+                assert (l[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}'
+                l = np.unique(l, axis=0)  # remove duplicate rows
+                if len(l) < nl:
+                    segments = np.unique(segments, axis=0)
+                    msg = f'{prefix}WARNING: {im_file}: {nl - len(l)} duplicate labels removed'
             else:
                 ne = 1  # label empty
                 l = np.zeros((0, 5), dtype=np.float32)
         return im_file, l, shape, segments, nm, nf, ne, nc, msg
     except Exception as e:
         nc = 1
+        msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'
         return [None, None, None, None, nm, nf, ne, nc, msg]