Update `dataset_stats()` for HUB (#3536)
Browse files* Update `dataset_stats()` for HUB
Cleanup of b6fdd2e
* autodownload flag
* Update general.py
* cleanup
- utils/datasets.py +5 -6
- utils/general.py +3 -3
utils/datasets.py
CHANGED
@@ -1086,18 +1086,17 @@ def verify_image_label(params):
|
|
1086 |
return [None] * 4 + [nm, nf, ne, nc]
|
1087 |
|
1088 |
|
1089 |
-
def dataset_stats(path='
|
1090 |
""" Return dataset statistics dictionary with images and instances counts per split per class
|
1091 |
-
Usage: from utils.datasets import *; dataset_stats('
|
1092 |
Arguments
|
1093 |
path: Path to data.yaml
|
|
|
1094 |
verbose: Print stats dictionary
|
1095 |
"""
|
1096 |
-
|
1097 |
-
with open(path) as f:
|
1098 |
data = yaml.safe_load(f) # data dict
|
1099 |
-
check_dataset(data) # download dataset if missing
|
1100 |
-
|
1101 |
nc = data['nc'] # number of classes
|
1102 |
stats = {'nc': nc, 'names': data['names']} # statistics dictionary
|
1103 |
for split in 'train', 'val', 'test':
|
|
|
1086 |
return [None] * 4 + [nm, nf, ne, nc]
|
1087 |
|
1088 |
|
1089 |
+
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
|
1090 |
""" Return dataset statistics dictionary with images and instances counts per split per class
|
1091 |
+
Usage: from utils.datasets import *; dataset_stats('coco128.yaml', verbose=True)
|
1092 |
Arguments
|
1093 |
path: Path to data.yaml
|
1094 |
+
autodownload: Attempt to download dataset if not found locally
|
1095 |
verbose: Print stats dictionary
|
1096 |
"""
|
1097 |
+
with open(check_file(Path(path))) as f:
|
|
|
1098 |
data = yaml.safe_load(f) # data dict
|
1099 |
+
check_dataset(data, autodownload) # download dataset if missing
|
|
|
1100 |
nc = data['nc'] # number of classes
|
1101 |
stats = {'nc': nc, 'names': data['names']} # statistics dictionary
|
1102 |
for split in 'train', 'val', 'test':
|
utils/general.py
CHANGED
@@ -220,14 +220,14 @@ def check_file(file):
|
|
220 |
return files[0] # return file
|
221 |
|
222 |
|
223 |
-
def check_dataset(
|
224 |
# Download dataset if not found locally
|
225 |
-
val, s =
|
226 |
if val and len(val):
|
227 |
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
228 |
if not all(x.exists() for x in val):
|
229 |
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
|
230 |
-
if s and len(s): # download script
|
231 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
232 |
f = Path(s).name # filename
|
233 |
print(f'Downloading {s} ...')
|
|
|
220 |
return files[0] # return file
|
221 |
|
222 |
|
223 |
+
def check_dataset(data, autodownload=True):
|
224 |
# Download dataset if not found locally
|
225 |
+
val, s = data.get('val'), data.get('download')
|
226 |
if val and len(val):
|
227 |
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
228 |
if not all(x.exists() for x in val):
|
229 |
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
|
230 |
+
if s and len(s) and autodownload: # download script
|
231 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
232 |
f = Path(s).name # filename
|
233 |
print(f'Downloading {s} ...')
|