Improved dataset error introspection (#8091)
Browse files* Improved dataset error introspection
Intended to help https://github.com/ultralytics/yolov5/issues/8090
* Update general.py
- utils/general.py +8 -5
utils/general.py
CHANGED
@@ -448,8 +448,7 @@ def check_font(font=FONT, progress=False):
|
|
448 |
|
449 |
|
450 |
def check_dataset(data, autodownload=True):
|
451 |
-
# Download and/or unzip dataset if not found locally
|
452 |
-
# Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
|
453 |
|
454 |
# Download (optional)
|
455 |
extract_dir = ''
|
@@ -463,6 +462,13 @@ def check_dataset(data, autodownload=True):
|
|
463 |
with open(data, errors='ignore') as f:
|
464 |
data = yaml.safe_load(f) # dictionary
|
465 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
# Resolve paths
|
467 |
path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
|
468 |
if not path.is_absolute():
|
@@ -472,9 +478,6 @@ def check_dataset(data, autodownload=True):
|
|
472 |
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
|
473 |
|
474 |
# Parse yaml
|
475 |
-
assert 'nc' in data, "Dataset 'nc' key missing."
|
476 |
-
if 'names' not in data:
|
477 |
-
data['names'] = [f'class{i}' for i in range(data['nc'])] # assign class names if missing
|
478 |
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
|
479 |
if val:
|
480 |
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
|
|
448 |
|
449 |
|
450 |
def check_dataset(data, autodownload=True):
|
451 |
+
# Download, check and/or unzip dataset if not found locally
|
|
|
452 |
|
453 |
# Download (optional)
|
454 |
extract_dir = ''
|
|
|
462 |
with open(data, errors='ignore') as f:
|
463 |
data = yaml.safe_load(f) # dictionary
|
464 |
|
465 |
+
# Checks
|
466 |
+
for k in 'train', 'val', 'nc':
|
467 |
+
assert k in data, emojis(f"data.yaml '{k}:' field missing ❌")
|
468 |
+
if 'names' not in data:
|
469 |
+
LOGGER.warning(emojis("data.yaml 'names:' field missing ⚠, assigning default names 'class0', 'class1', etc."))
|
470 |
+
data['names'] = [f'class{i}' for i in range(data['nc'])] # default names
|
471 |
+
|
472 |
# Resolve paths
|
473 |
path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
|
474 |
if not path.is_absolute():
|
|
|
478 |
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
|
479 |
|
480 |
# Parse yaml
|
|
|
|
|
|
|
481 |
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
|
482 |
if val:
|
483 |
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|