glenn-jocher commited on
Commit
1b5edb6
·
unverified ·
1 Parent(s): b6fdd2e

Update `dataset_stats()` for HUB (#3536)

Browse files

* Update `dataset_stats()` for HUB

Cleanup of b6fdd2e

* autodownload flag

* Update general.py

* cleanup

Files changed (2) hide show
  1. utils/datasets.py +5 -6
  2. utils/general.py +3 -3
utils/datasets.py CHANGED
@@ -1086,18 +1086,17 @@ def verify_image_label(params):
1086
  return [None] * 4 + [nm, nf, ne, nc]
1087
 
1088
 
1089
- def dataset_stats(path='data/coco128.yaml', verbose=False):
1090
  """ Return dataset statistics dictionary with images and instances counts per split per class
1091
- Usage: from utils.datasets import *; dataset_stats('data/coco128.yaml')
1092
  Arguments
1093
  path: Path to data.yaml
 
1094
  verbose: Print stats dictionary
1095
  """
1096
- path = check_file(Path(path))
1097
- with open(path) as f:
1098
  data = yaml.safe_load(f) # data dict
1099
- check_dataset(data) # download dataset if missing
1100
-
1101
  nc = data['nc'] # number of classes
1102
  stats = {'nc': nc, 'names': data['names']} # statistics dictionary
1103
  for split in 'train', 'val', 'test':
 
1086
  return [None] * 4 + [nm, nf, ne, nc]
1087
 
1088
 
1089
+ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
1090
  """ Return dataset statistics dictionary with images and instances counts per split per class
1091
+ Usage: from utils.datasets import *; dataset_stats('coco128.yaml', verbose=True)
1092
  Arguments
1093
  path: Path to data.yaml
1094
+ autodownload: Attempt to download dataset if not found locally
1095
  verbose: Print stats dictionary
1096
  """
1097
+ with open(check_file(Path(path))) as f:
 
1098
  data = yaml.safe_load(f) # data dict
1099
+ check_dataset(data, autodownload) # download dataset if missing
 
1100
  nc = data['nc'] # number of classes
1101
  stats = {'nc': nc, 'names': data['names']} # statistics dictionary
1102
  for split in 'train', 'val', 'test':
utils/general.py CHANGED
@@ -220,14 +220,14 @@ def check_file(file):
220
  return files[0] # return file
221
 
222
 
223
- def check_dataset(dict):
224
  # Download dataset if not found locally
225
- val, s = dict.get('val'), dict.get('download')
226
  if val and len(val):
227
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
228
  if not all(x.exists() for x in val):
229
  print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
230
- if s and len(s): # download script
231
  if s.startswith('http') and s.endswith('.zip'): # URL
232
  f = Path(s).name # filename
233
  print(f'Downloading {s} ...')
 
220
  return files[0] # return file
221
 
222
 
223
+ def check_dataset(data, autodownload=True):
224
  # Download dataset if not found locally
225
+ val, s = data.get('val'), data.get('download')
226
  if val and len(val):
227
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
228
  if not all(x.exists() for x in val):
229
  print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
230
+ if s and len(s) and autodownload: # download script
231
  if s.startswith('http') and s.endswith('.zip'): # URL
232
  f = Path(s).name # filename
233
  print(f'Downloading {s} ...')