glenn-jocher commited on
Commit
47233e1
·
unverified ·
1 Parent(s): 7cef03d

Improved dataset error introspection (#8091)

Browse files

* Improved dataset error introspection

Intended to help https://github.com/ultralytics/yolov5/issues/8090

* Update general.py

Files changed (1) hide show
  1. utils/general.py +8 -5
utils/general.py CHANGED
@@ -448,8 +448,7 @@ def check_font(font=FONT, progress=False):
448
 
449
 
450
  def check_dataset(data, autodownload=True):
451
- # Download and/or unzip dataset if not found locally
452
- # Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
453
 
454
  # Download (optional)
455
  extract_dir = ''
@@ -463,6 +462,13 @@ def check_dataset(data, autodownload=True):
463
  with open(data, errors='ignore') as f:
464
  data = yaml.safe_load(f) # dictionary
465
 
 
 
 
 
 
 
 
466
  # Resolve paths
467
  path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
468
  if not path.is_absolute():
@@ -472,9 +478,6 @@ def check_dataset(data, autodownload=True):
472
  data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
473
 
474
  # Parse yaml
475
- assert 'nc' in data, "Dataset 'nc' key missing."
476
- if 'names' not in data:
477
- data['names'] = [f'class{i}' for i in range(data['nc'])] # assign class names if missing
478
  train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
479
  if val:
480
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
 
448
 
449
 
450
  def check_dataset(data, autodownload=True):
451
+ # Download, check and/or unzip dataset if not found locally
 
452
 
453
  # Download (optional)
454
  extract_dir = ''
 
462
  with open(data, errors='ignore') as f:
463
  data = yaml.safe_load(f) # dictionary
464
 
465
+ # Checks
466
+ for k in 'train', 'val', 'nc':
467
+ assert k in data, emojis(f"data.yaml '{k}:' field missing ❌")
468
+ if 'names' not in data:
469
+ LOGGER.warning(emojis("data.yaml 'names:' field missing ⚠, assigning default names 'class0', 'class1', etc."))
470
+ data['names'] = [f'class{i}' for i in range(data['nc'])] # default names
471
+
472
  # Resolve paths
473
  path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
474
  if not path.is_absolute():
 
478
  data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
479
 
480
  # Parse yaml
 
 
 
481
  train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
482
  if val:
483
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path