Spaces:
Sleeping
Sleeping
File size: 1,939 Bytes
d9272c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
"""
Copyright (c) 2022, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
"""
import os
from pathlib import Path
from omegaconf import OmegaConf
from lavis.common.utils import (
cleanup_dir,
get_abs_path,
get_cache_path,
)
import opendatasets as od
DATA_URL = "https://www.kaggle.com/datasets/hsankesara/flickr-image-dataset"
print(
"""
To download the dataset, you need to have a Kaggle account and the associated key.
See https://www.kaggle.com/docs/api to create account and a new API token.
"""
)
def move_directory(src_dir, dst_dir):
"""
Move files from download_path to storage_path
"""
print("Moving to {}".format(dst_dir))
os.makedirs(dst_dir, exist_ok=True)
for file_name in os.listdir(src_dir):
os.rename(
os.path.join(src_dir, file_name),
os.path.join(dst_dir, file_name),
)
if __name__ == "__main__":
config_path = get_abs_path("configs/datasets/flickr30k/defaults.yaml")
storage_dir = OmegaConf.load(
config_path
).datasets.flickr30k.build_info.images.storage
storage_dir = Path(get_cache_path(storage_dir))
download_dir = storage_dir.parent / "download"
if storage_dir.exists():
print(f"Dataset already exists at {storage_dir}. Aborting.")
exit(0)
os.makedirs(download_dir)
try:
print("Downloading {} to {}".format(DATA_URL, download_dir))
od.download(DATA_URL, download_dir)
except Exception as e:
print(e)
# remove download dir if failed
cleanup_dir(download_dir)
exit(1)
move_directory(
download_dir / "flickr-image-dataset" / "flickr30k_images" / "flickr30k_images",
storage_dir / "flickr30k-images",
)
cleanup_dir(download_dir)
|