Spaces:

SkazuHD
/

docker-test

Build error

File size: 7,755 Bytes

d660b02

import enum
from typing import Optional

from loguru import logger

try:
    import boto3
    from sagemaker.enums import EndpointType
    from sagemaker.huggingface import HuggingFaceModel
except ModuleNotFoundError:
    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")

from llm_engineering.domain.inference import DeploymentStrategy
from llm_engineering.settings import settings


class SagemakerHuggingfaceStrategy(DeploymentStrategy):
    def __init__(self, deployment_service) -> None:
        """

        Initializes the deployment strategy with the necessary services.



        :param deployment_service: The service handling the deployment details.

        :param logger: Logger for logging information and errors.

        """
        self.deployment_service = deployment_service

    def deploy(

        self,

        role_arn: str,

        llm_image: str,

        config: dict,

        endpoint_name: str,

        endpoint_config_name: str,

        gpu_instance_type: str,

        resources: Optional[dict] = None,

        endpoint_type: enum.Enum = EndpointType.MODEL_BASED,

    ) -> None:
        """

        Initiates the deployment process for a HuggingFace model on AWS SageMaker.



        :param role_arn: AWS role ARN with permissions for SageMaker deployment.

        :param llm_image: URI for the HuggingFace model Docker image.

        :param config: Configuration settings for the model environment.

        :param endpoint_name: Name of the SageMaker endpoint.

        :param endpoint_config_name: Name of the SageMaker endpoint configuration.

        :param resources: Optional resources for the model deployment (used for multi model endpoints)

        :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)

                or EndpointType.INFERENCE_COMPONENT (with inference component)



        """

        logger.info("Starting deployment using Sagemaker Huggingface Strategy...")
        logger.info(
            f"Deployment parameters: nb of replicas: {settings.COPIES}, nb of gpus:{settings.GPUS}, instance_type:{settings.GPU_INSTANCE_TYPE}"
        )
        try:
            # Delegate to the deployment service to handle the actual deployment details
            self.deployment_service.deploy(
                role_arn=role_arn,
                llm_image=llm_image,
                config=config,
                endpoint_name=endpoint_name,
                endpoint_config_name=endpoint_config_name,
                gpu_instance_type=gpu_instance_type,
                resources=resources,
                endpoint_type=endpoint_type,
            )
            logger.info("Deployment completed successfully.")
        except Exception as e:
            logger.error(f"Error during deployment: {e}")
            raise


class DeploymentService:
    def __init__(self, resource_manager):
        """

        Initializes the DeploymentService with necessary dependencies.



        :param resource_manager: Manages resources and configurations for deployments.

        :param settings: Configuration settings for deployment.

        :param logger: Optional logger for logging messages. If None, the standard logging module will be used.

        """

        self.sagemaker_client = boto3.client(
            "sagemaker",
            region_name=settings.AWS_REGION,
            aws_access_key_id=settings.AWS_ACCESS_KEY,
            aws_secret_access_key=settings.AWS_SECRET_KEY,
        )
        self.resource_manager = resource_manager

    def deploy(

        self,

        role_arn: str,

        llm_image: str,

        config: dict,

        endpoint_name: str,

        endpoint_config_name: str,

        gpu_instance_type: str,

        resources: Optional[dict] = None,

        endpoint_type: enum.Enum = EndpointType.MODEL_BASED,

    ) -> None:
        """

        Handles the deployment of a model to SageMaker, including checking and creating

        configurations and endpoints as necessary.



        :param role_arn: The ARN of the IAM role for SageMaker to access resources.

        :param llm_image: URI of the Docker image in ECR for the HuggingFace model.

        :param config: Configuration dictionary for the environment variables of the model.

        :param endpoint_name: The name for the SageMaker endpoint.

        :param endpoint_config_name: The name for the SageMaker endpoint configuration.

        :param resources: Optional resources for the model deployment (used for multi model endpoints)

        :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)

                or EndpointType.INFERENCE_COMPONENT (with inference component)

        :param gpu_instance_type: The instance type for the SageMaker endpoint.

        """

        try:
            # Check if the endpoint configuration exists
            if self.resource_manager.endpoint_config_exists(endpoint_config_name=endpoint_config_name):
                logger.info(f"Endpoint configuration {endpoint_config_name} exists. Using existing configuration...")
            else:
                logger.info(f"Endpoint configuration{endpoint_config_name} does not exist.")

            # Prepare and deploy the HuggingFace model
            self.prepare_and_deploy_model(
                role_arn=role_arn,
                llm_image=llm_image,
                config=config,
                endpoint_name=endpoint_name,
                update_endpoint=False,
                resources=resources,
                endpoint_type=endpoint_type,
                gpu_instance_type=gpu_instance_type,
            )

            logger.info(f"Successfully deployed/updated model to endpoint {endpoint_name}.")
        except Exception as e:
            logger.error(f"Failed to deploy model to SageMaker: {e}")

            raise

    @staticmethod
    def prepare_and_deploy_model(

        role_arn: str,

        llm_image: str,

        config: dict,

        endpoint_name: str,

        update_endpoint: bool,

        gpu_instance_type: str,

        resources: Optional[dict] = None,

        endpoint_type: enum.Enum = EndpointType.MODEL_BASED,

    ) -> None:
        """

        Prepares and deploys/updates the HuggingFace model on SageMaker.



        :param role_arn: The ARN of the IAM role.

        :param llm_image: The Docker image URI for the HuggingFace model.

        :param config: Configuration settings for the model.

        :param endpoint_name: The name of the endpoint.

        :param update_endpoint: Boolean flag to update an existing endpoint.

        :param gpu_instance_type: The instance type for the SageMaker endpoint.

        :param resources: Optional resources for the model deployment(used for multi model endpoints)

        :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)

                or EndpointType.INFERENCE_COMPONENT (with inference component)

        """

        huggingface_model = HuggingFaceModel(
            role=role_arn,
            image_uri=llm_image,
            env=config,
        )

        # Deploy or update the model based on the endpoint existence
        huggingface_model.deploy(
            instance_type=gpu_instance_type,
            initial_instance_count=1,
            endpoint_name=endpoint_name,
            update_endpoint=update_endpoint,
            resources=resources,
            tags=[{"Key": "task", "Value": "model_task"}],
            endpoint_type=endpoint_type,
            container_startup_health_check_timeout=900,
        )