Updated cache v0.2 with `hashlib` (#3350)
Browse files* Update cache v0.2 to include parent hash
Possible fix for https://github.com/ultralytics/yolov5/issues/3349
* Update datasets.py
- utils/datasets.py +10 -6
utils/datasets.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# Dataset utils and dataloaders
|
2 |
|
3 |
import glob
|
|
|
4 |
import logging
|
5 |
import math
|
6 |
import os
|
@@ -36,9 +37,12 @@ for orientation in ExifTags.TAGS.keys():
|
|
36 |
break
|
37 |
|
38 |
|
39 |
-
def get_hash(
|
40 |
-
# Returns a single hash value of a list of files
|
41 |
-
|
|
|
|
|
|
|
42 |
|
43 |
|
44 |
def exif_size(img):
|
@@ -383,7 +387,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
383 |
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
|
384 |
if cache_path.is_file():
|
385 |
cache, exists = torch.load(cache_path), True # load
|
386 |
-
if cache['hash'] != get_hash(self.label_files + self.img_files)
|
387 |
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
|
388 |
else:
|
389 |
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
@@ -501,9 +505,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
501 |
|
502 |
x['hash'] = get_hash(self.label_files + self.img_files)
|
503 |
x['results'] = nf, nm, ne, nc, i + 1
|
504 |
-
x['version'] = 0.
|
505 |
try:
|
506 |
-
torch.save(x, path) # save for next time
|
507 |
logging.info(f'{prefix}New cache created: {path}')
|
508 |
except Exception as e:
|
509 |
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
|
|
|
1 |
# Dataset utils and dataloaders
|
2 |
|
3 |
import glob
|
4 |
+
import hashlib
|
5 |
import logging
|
6 |
import math
|
7 |
import os
|
|
|
37 |
break
|
38 |
|
39 |
|
40 |
+
def get_hash(paths):
|
41 |
+
# Returns a single hash value of a list of paths (files or dirs)
|
42 |
+
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
43 |
+
h = hashlib.md5(str(size).encode()) # hash sizes
|
44 |
+
h.update(''.join(paths).encode()) # hash paths
|
45 |
+
return h.hexdigest() # return hash
|
46 |
|
47 |
|
48 |
def exif_size(img):
|
|
|
387 |
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
|
388 |
if cache_path.is_file():
|
389 |
cache, exists = torch.load(cache_path), True # load
|
390 |
+
if cache['hash'] != get_hash(self.label_files + self.img_files): # changed
|
391 |
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
|
392 |
else:
|
393 |
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
|
|
505 |
|
506 |
x['hash'] = get_hash(self.label_files + self.img_files)
|
507 |
x['results'] = nf, nm, ne, nc, i + 1
|
508 |
+
x['version'] = 0.2 # cache version
|
509 |
try:
|
510 |
+
torch.save(x, path) # save cache for next time
|
511 |
logging.info(f'{prefix}New cache created: {path}')
|
512 |
except Exception as e:
|
513 |
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
|