download() ThreadPool update (#3027)
Browse files* download() ThreadPool update
* update train image count
* cid + 1
- data/objects365.yaml +8 -3
- utils/general.py +4 -1
data/objects365.yaml
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
# /yolov5
|
7 |
|
8 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
9 |
-
train: ../datasets/objects365/images/train #
|
10 |
val: ../datasets/objects365/images/val # 5570 images
|
11 |
|
12 |
# number of classes
|
@@ -72,17 +72,22 @@ download: |
|
|
72 |
|
73 |
# Download
|
74 |
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
75 |
-
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json
|
76 |
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
|
77 |
curl=True, delete=False, threads=8)
|
78 |
|
|
|
|
|
|
|
|
|
|
|
79 |
# Labels
|
80 |
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
81 |
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
82 |
for cid, cat in enumerate(names):
|
83 |
catIds = coco.getCatIds(catNms=[cat])
|
84 |
imgIds = coco.getImgIds(catIds=catIds)
|
85 |
-
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid}/{len(names)} {cat}'):
|
86 |
width, height = im["width"], im["height"]
|
87 |
path = Path(im["file_name"]) # image filename
|
88 |
try:
|
|
|
6 |
# /yolov5
|
7 |
|
8 |
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
9 |
+
train: ../datasets/objects365/images/train # 1742289 images
|
10 |
val: ../datasets/objects365/images/val # 5570 images
|
11 |
|
12 |
# number of classes
|
|
|
72 |
|
73 |
# Download
|
74 |
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
75 |
+
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
|
76 |
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
|
77 |
curl=True, delete=False, threads=8)
|
78 |
|
79 |
+
# Move
|
80 |
+
train = dir / 'images' / 'train'
|
81 |
+
for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
|
82 |
+
f.rename(train / f.name) # move to /images/train
|
83 |
+
|
84 |
# Labels
|
85 |
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
86 |
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
87 |
for cid, cat in enumerate(names):
|
88 |
catIds = coco.getCatIds(catNms=[cat])
|
89 |
imgIds = coco.getImgIds(catIds=catIds)
|
90 |
+
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
|
91 |
width, height = im["width"], im["height"]
|
92 |
path = Path(im["file_name"]) # image filename
|
93 |
try:
|
utils/general.py
CHANGED
@@ -217,7 +217,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
|
|
217 |
dir = Path(dir)
|
218 |
dir.mkdir(parents=True, exist_ok=True) # make directory
|
219 |
if threads > 1:
|
220 |
-
ThreadPool(threads)
|
|
|
|
|
|
|
221 |
else:
|
222 |
for u in tuple(url) if isinstance(url, str) else url:
|
223 |
download_one(u, dir)
|
|
|
217 |
dir = Path(dir)
|
218 |
dir.mkdir(parents=True, exist_ok=True) # make directory
|
219 |
if threads > 1:
|
220 |
+
pool = ThreadPool(threads)
|
221 |
+
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
222 |
+
pool.close()
|
223 |
+
pool.join()
|
224 |
else:
|
225 |
for u in tuple(url) if isinstance(url, str) else url:
|
226 |
download_one(u, dir)
|