♻️ [Refactor] get_dataset to using hydra
Browse files- utils/get_dataset.py +12 -7
utils/get_dataset.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
-
import requests
|
2 |
-
import zipfile
|
3 |
import os
|
4 |
-
|
|
|
|
|
5 |
from loguru import logger
|
|
|
|
|
6 |
|
7 |
|
8 |
def download_file(url, dest_path):
|
@@ -41,11 +43,14 @@ def check_files(directory, expected_count):
|
|
41 |
return num_files == expected_count
|
42 |
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
|
48 |
-
for
|
|
|
49 |
url = f"{base_url}{file_name}"
|
50 |
local_zip_path = os.path.join(data_dir, file_name)
|
51 |
extract_to = os.path.join(data_dir, dataset_type, "images")
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import zipfile
|
3 |
+
|
4 |
+
import hydra
|
5 |
from loguru import logger
|
6 |
+
import requests
|
7 |
+
from tqdm.rich import tqdm
|
8 |
|
9 |
|
10 |
def download_file(url, dest_path):
|
|
|
43 |
return num_files == expected_count
|
44 |
|
45 |
|
46 |
+
@hydra.main(config_path="../config/data", config_name="download", version_base=None)
|
47 |
+
def download_coco_dataset(download_cfg):
|
48 |
+
data_dir = download_cfg.path
|
49 |
+
base_url = download_cfg.images.base_url
|
50 |
+
datasets = download_cfg.images.datasets
|
51 |
|
52 |
+
for dataset_type in datasets:
|
53 |
+
file_name, expected_files = datasets[dataset_type].values()
|
54 |
url = f"{base_url}{file_name}"
|
55 |
local_zip_path = os.path.join(data_dir, file_name)
|
56 |
extract_to = os.path.join(data_dir, dataset_type, "images")
|