glenn-jocher commited on
Commit
c6b5bfc
·
unverified ·
1 Parent(s): 1f8d716

Updated cache v0.2 with `hashlib` (#3350)

Browse files

* Update cache v0.2 to include parent hash

Possible fix for https://github.com/ultralytics/yolov5/issues/3349

* Update datasets.py

Files changed (1) hide show
  1. utils/datasets.py +10 -6
utils/datasets.py CHANGED
@@ -1,6 +1,7 @@
1
  # Dataset utils and dataloaders
2
 
3
  import glob
 
4
  import logging
5
  import math
6
  import os
@@ -36,9 +37,12 @@ for orientation in ExifTags.TAGS.keys():
36
  break
37
 
38
 
39
- def get_hash(files):
40
- # Returns a single hash value of a list of files
41
- return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
 
 
 
42
 
43
 
44
  def exif_size(img):
@@ -383,7 +387,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
383
  cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
384
  if cache_path.is_file():
385
  cache, exists = torch.load(cache_path), True # load
386
- if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed
387
  cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
388
  else:
389
  cache, exists = self.cache_labels(cache_path, prefix), False # cache
@@ -501,9 +505,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
501
 
502
  x['hash'] = get_hash(self.label_files + self.img_files)
503
  x['results'] = nf, nm, ne, nc, i + 1
504
- x['version'] = 0.1 # cache version
505
  try:
506
- torch.save(x, path) # save for next time
507
  logging.info(f'{prefix}New cache created: {path}')
508
  except Exception as e:
509
  logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
 
1
  # Dataset utils and dataloaders
2
 
3
  import glob
4
+ import hashlib
5
  import logging
6
  import math
7
  import os
 
37
  break
38
 
39
 
40
+ def get_hash(paths):
41
+ # Returns a single hash value of a list of paths (files or dirs)
42
+ size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
43
+ h = hashlib.md5(str(size).encode()) # hash sizes
44
+ h.update(''.join(paths).encode()) # hash paths
45
+ return h.hexdigest() # return hash
46
 
47
 
48
  def exif_size(img):
 
387
  cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
388
  if cache_path.is_file():
389
  cache, exists = torch.load(cache_path), True # load
390
+ if cache['hash'] != get_hash(self.label_files + self.img_files): # changed
391
  cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
392
  else:
393
  cache, exists = self.cache_labels(cache_path, prefix), False # cache
 
505
 
506
  x['hash'] = get_hash(self.label_files + self.img_files)
507
  x['results'] = nf, nm, ne, nc, i + 1
508
+ x['version'] = 0.2 # cache version
509
  try:
510
+ torch.save(x, path) # save cache for next time
511
  logging.info(f'{prefix}New cache created: {path}')
512
  except Exception as e:
513
  logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable