henry000 commited on
Commit
5002339
·
1 Parent(s): 58bac2b

♻️ [Refactor] get_dataset to using hydra

Browse files
Files changed (1) hide show
  1. utils/get_dataset.py +12 -7
utils/get_dataset.py CHANGED
@@ -1,8 +1,10 @@
1
- import requests
2
- import zipfile
3
  import os
4
- from tqdm.rich import tqdm
 
 
5
  from loguru import logger
 
 
6
 
7
 
8
  def download_file(url, dest_path):
@@ -41,11 +43,14 @@ def check_files(directory, expected_count):
41
  return num_files == expected_count
42
 
43
 
44
- def download_coco_dataset(data_dir: str = "./data/coco"):
45
- base_url = "http://images.cocodataset.org/zips/"
46
- datasets = {"train2017.zip": ("train", 118287), "test2017.zip": ("test", 40670), "val2017.zip": ("val", 5000)}
 
 
47
 
48
- for file_name, (dataset_type, expected_files) in datasets.items():
 
49
  url = f"{base_url}{file_name}"
50
  local_zip_path = os.path.join(data_dir, file_name)
51
  extract_to = os.path.join(data_dir, dataset_type, "images")
 
 
 
1
  import os
2
+ import zipfile
3
+
4
+ import hydra
5
  from loguru import logger
6
+ import requests
7
+ from tqdm.rich import tqdm
8
 
9
 
10
  def download_file(url, dest_path):
 
43
  return num_files == expected_count
44
 
45
 
46
+ @hydra.main(config_path="../config/data", config_name="download", version_base=None)
47
+ def download_coco_dataset(download_cfg):
48
+ data_dir = download_cfg.path
49
+ base_url = download_cfg.images.base_url
50
+ datasets = download_cfg.images.datasets
51
 
52
+ for dataset_type in datasets:
53
+ file_name, expected_files = datasets[dataset_type].values()
54
  url = f"{base_url}{file_name}"
55
  local_zip_path = os.path.join(data_dir, file_name)
56
  extract_to = os.path.join(data_dir, dataset_type, "images")