glenn-jocher commited on
Commit
8cab907
·
unverified ·
1 Parent(s): 5189b3a

download() ThreadPool update (#3027)

Browse files

* download() ThreadPool update

* update train image count

* cid + 1

Files changed (2) hide show
  1. data/objects365.yaml +8 -3
  2. utils/general.py +4 -1
data/objects365.yaml CHANGED
@@ -6,7 +6,7 @@
6
  # /yolov5
7
 
8
  # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
9
- train: ../datasets/objects365/images/train # 1.7 Million images
10
  val: ../datasets/objects365/images/val # 5570 images
11
 
12
  # number of classes
@@ -72,17 +72,22 @@ download: |
72
 
73
  # Download
74
  url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
75
- download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json
76
  download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
77
  curl=True, delete=False, threads=8)
78
 
 
 
 
 
 
79
  # Labels
80
  coco = COCO(dir / 'zhiyuan_objv2_train.json')
81
  names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
82
  for cid, cat in enumerate(names):
83
  catIds = coco.getCatIds(catNms=[cat])
84
  imgIds = coco.getImgIds(catIds=catIds)
85
- for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid}/{len(names)} {cat}'):
86
  width, height = im["width"], im["height"]
87
  path = Path(im["file_name"]) # image filename
88
  try:
 
6
  # /yolov5
7
 
8
  # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
9
+ train: ../datasets/objects365/images/train # 1742289 images
10
  val: ../datasets/objects365/images/val # 5570 images
11
 
12
  # number of classes
 
72
 
73
  # Download
74
  url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
75
+ download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
76
  download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
77
  curl=True, delete=False, threads=8)
78
 
79
+ # Move
80
+ train = dir / 'images' / 'train'
81
+ for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
82
+ f.rename(train / f.name) # move to /images/train
83
+
84
  # Labels
85
  coco = COCO(dir / 'zhiyuan_objv2_train.json')
86
  names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
87
  for cid, cat in enumerate(names):
88
  catIds = coco.getCatIds(catNms=[cat])
89
  imgIds = coco.getImgIds(catIds=catIds)
90
+ for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
91
  width, height = im["width"], im["height"]
92
  path = Path(im["file_name"]) # image filename
93
  try:
utils/general.py CHANGED
@@ -217,7 +217,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
217
  dir = Path(dir)
218
  dir.mkdir(parents=True, exist_ok=True) # make directory
219
  if threads > 1:
220
- ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
 
 
 
221
  else:
222
  for u in tuple(url) if isinstance(url, str) else url:
223
  download_one(u, dir)
 
217
  dir = Path(dir)
218
  dir.mkdir(parents=True, exist_ok=True) # make directory
219
  if threads > 1:
220
+ pool = ThreadPool(threads)
221
+ pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
222
+ pool.close()
223
+ pool.join()
224
  else:
225
  for u in tuple(url) if isinstance(url, str) else url:
226
  download_one(u, dir)