"""Script to download external data for the project at build time.""" import logging import os import tarfile import wget def download_and_extract_models() -> None: """Downloads the models folder from the server and extracts it.""" logging.debug("Downloading models folder.") models_url = os.environ.get("MODELS_FOLDER_URL") models_targz = "models.tar.gz" models_folder = "data/models/" try: wget.download(models_url, models_targz) logging.debug("Extracting models folder.") with tarfile.open(models_targz, "r:gz") as tar: tar.extractall(models_folder) os.remove(models_targz) logging.debug("Models folder downloaded and extracted.") except Exception as e: logging.error(f"Error downloading models folder: {e}") def download_and_extract_item_embeddings() -> None: """Downloads the item embeddings folder from the server and extracts it.""" logging.debug("Downloading item embeddings folder.") item_embeddings_url = os.environ.get("ITEM_EMBEDDINGS_URL") item_embeddings_tarbz = "item_embeddings.tar.bz2" item_embeddings_folder = "data/" try: wget.download(item_embeddings_url, item_embeddings_tarbz) logging.debug("Extracting item embeddings folder.") with tarfile.open(item_embeddings_tarbz, "r:bz2") as tar: tar.extractall(item_embeddings_folder) os.remove(item_embeddings_tarbz) logging.debug("Item embeddings folder downloaded and extracted.") except Exception as e: logging.error(f"Error downloading item embeddings folder: {e}") if __name__ == "__main__": if not os.path.exists("data/models"): logging.info("Downloading models...") download_and_extract_models() if not os.path.exists("data/embed_items"): logging.info("Downloading item embeddings...") download_and_extract_item_embeddings()