import json from loguru import logger try: from sagemaker.compute_resource_requirements.resource_requirements import ResourceRequirements except ModuleNotFoundError: logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.") from llm_engineering.settings import settings hugging_face_deploy_config = { "HF_MODEL_ID": settings.HF_MODEL_ID, "HUGGING_FACE_HUB_TOKEN": settings.HUGGINGFACE_ACCESS_TOKEN, "SM_NUM_GPUS": json.dumps(settings.SM_NUM_GPUS), # Number of GPU used per replica "MAX_INPUT_LENGTH": json.dumps(settings.MAX_INPUT_LENGTH), # Max length of input text "MAX_TOTAL_TOKENS": json.dumps(settings.MAX_TOTAL_TOKENS), # Max length of the generation (including input text) "MAX_BATCH_TOTAL_TOKENS": json.dumps(settings.MAX_BATCH_TOTAL_TOKENS), "MAX_BATCH_PREFILL_TOKENS": json.dumps(settings.MAX_BATCH_TOTAL_TOKENS), "HF_MODEL_QUANTIZE": "bitsandbytes", } model_resource_config = ResourceRequirements( requests={ "copies": settings.COPIES, # Number of replicas. "num_accelerators": settings.GPUS, # Number of GPUs required. "num_cpus": settings.CPUS, # Number of CPU cores required. "memory": 5 * 1024, # Minimum memory required in Mb (required) }, )