Spaces:
Build error
Build error
from loguru import logger | |
try: | |
from sagemaker.enums import EndpointType | |
from sagemaker.huggingface import get_huggingface_llm_image_uri | |
except ModuleNotFoundError: | |
logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.") | |
from llm_engineering.model.utils import ResourceManager | |
from llm_engineering.settings import settings | |
from .config import hugging_face_deploy_config, model_resource_config | |
from .sagemaker_huggingface import DeploymentService, SagemakerHuggingfaceStrategy | |
def create_endpoint(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED) -> None: | |
assert settings.AWS_ARN_ROLE is not None, "AWS_ARN_ROLE is not set in the .env file." | |
logger.info(f"Creating endpoint with endpoint_type = {endpoint_type} and model_id = {settings.HF_MODEL_ID}") | |
llm_image = get_huggingface_llm_image_uri("huggingface", version="2.2.0") | |
resource_manager = ResourceManager() | |
deployment_service = DeploymentService(resource_manager=resource_manager) | |
SagemakerHuggingfaceStrategy(deployment_service).deploy( | |
role_arn=settings.AWS_ARN_ROLE, | |
llm_image=llm_image, | |
config=hugging_face_deploy_config, | |
endpoint_name=settings.SAGEMAKER_ENDPOINT_INFERENCE, | |
endpoint_config_name=settings.SAGEMAKER_ENDPOINT_CONFIG_INFERENCE, | |
gpu_instance_type=settings.GPU_INSTANCE_TYPE, | |
resources=model_resource_config, | |
endpoint_type=endpoint_type, | |
) | |
if __name__ == "__main__": | |
create_endpoint(endpoint_type=EndpointType.MODEL_BASED) | |