CRSArena / download_external_data.py
Nolwenn
Change to docker space
dbb98e1
raw
history blame
1.93 kB
"""Script to download external data for the project at build time."""
import logging
import os
import tarfile
import wget
def download_and_extract_models() -> None:
"""Downloads the models folder from the server and extracts it."""
logging.debug("Downloading models folder.")
models_url = os.environ.get("MODELS_FOLDER_URL")
models_targz = "models.tar.gz"
models_folder = "data/models/"
try:
wget.download(models_url, models_targz)
logging.debug("Extracting models folder.")
with tarfile.open(models_targz, "r:gz") as tar:
tar.extractall(models_folder)
os.remove(models_targz)
logging.debug("Models folder downloaded and extracted.")
except Exception as e:
logging.error(f"Error downloading models folder: {e}")
def download_and_extract_item_embeddings() -> None:
"""Downloads the item embeddings folder from the server and extracts it."""
logging.debug("Downloading item embeddings folder.")
item_embeddings_url = os.environ.get("ITEM_EMBEDDINGS_URL")
item_embeddings_tarbz = "item_embeddings.tar.bz2"
item_embeddings_folder = "data/"
try:
wget.download(item_embeddings_url, item_embeddings_tarbz)
logging.debug("Extracting item embeddings folder.")
with tarfile.open(item_embeddings_tarbz, "r:bz2") as tar:
tar.extractall(item_embeddings_folder)
os.remove(item_embeddings_tarbz)
logging.debug("Item embeddings folder downloaded and extracted.")
except Exception as e:
logging.error(f"Error downloading item embeddings folder: {e}")
if __name__ == "__main__":
if not os.path.exists("data/models"):
logging.info("Downloading models...")
download_and_extract_models()
if not os.path.exists("data/embed_items"):
logging.info("Downloading item embeddings...")
download_and_extract_item_embeddings()