Spaces:
Build error
Build error
File size: 7,755 Bytes
d660b02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import enum
from typing import Optional
from loguru import logger
try:
import boto3
from sagemaker.enums import EndpointType
from sagemaker.huggingface import HuggingFaceModel
except ModuleNotFoundError:
logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
from llm_engineering.domain.inference import DeploymentStrategy
from llm_engineering.settings import settings
class SagemakerHuggingfaceStrategy(DeploymentStrategy):
def __init__(self, deployment_service) -> None:
"""
Initializes the deployment strategy with the necessary services.
:param deployment_service: The service handling the deployment details.
:param logger: Logger for logging information and errors.
"""
self.deployment_service = deployment_service
def deploy(
self,
role_arn: str,
llm_image: str,
config: dict,
endpoint_name: str,
endpoint_config_name: str,
gpu_instance_type: str,
resources: Optional[dict] = None,
endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
) -> None:
"""
Initiates the deployment process for a HuggingFace model on AWS SageMaker.
:param role_arn: AWS role ARN with permissions for SageMaker deployment.
:param llm_image: URI for the HuggingFace model Docker image.
:param config: Configuration settings for the model environment.
:param endpoint_name: Name of the SageMaker endpoint.
:param endpoint_config_name: Name of the SageMaker endpoint configuration.
:param resources: Optional resources for the model deployment (used for multi model endpoints)
:param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
or EndpointType.INFERENCE_COMPONENT (with inference component)
"""
logger.info("Starting deployment using Sagemaker Huggingface Strategy...")
logger.info(
f"Deployment parameters: nb of replicas: {settings.COPIES}, nb of gpus:{settings.GPUS}, instance_type:{settings.GPU_INSTANCE_TYPE}"
)
try:
# Delegate to the deployment service to handle the actual deployment details
self.deployment_service.deploy(
role_arn=role_arn,
llm_image=llm_image,
config=config,
endpoint_name=endpoint_name,
endpoint_config_name=endpoint_config_name,
gpu_instance_type=gpu_instance_type,
resources=resources,
endpoint_type=endpoint_type,
)
logger.info("Deployment completed successfully.")
except Exception as e:
logger.error(f"Error during deployment: {e}")
raise
class DeploymentService:
def __init__(self, resource_manager):
"""
Initializes the DeploymentService with necessary dependencies.
:param resource_manager: Manages resources and configurations for deployments.
:param settings: Configuration settings for deployment.
:param logger: Optional logger for logging messages. If None, the standard logging module will be used.
"""
self.sagemaker_client = boto3.client(
"sagemaker",
region_name=settings.AWS_REGION,
aws_access_key_id=settings.AWS_ACCESS_KEY,
aws_secret_access_key=settings.AWS_SECRET_KEY,
)
self.resource_manager = resource_manager
def deploy(
self,
role_arn: str,
llm_image: str,
config: dict,
endpoint_name: str,
endpoint_config_name: str,
gpu_instance_type: str,
resources: Optional[dict] = None,
endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
) -> None:
"""
Handles the deployment of a model to SageMaker, including checking and creating
configurations and endpoints as necessary.
:param role_arn: The ARN of the IAM role for SageMaker to access resources.
:param llm_image: URI of the Docker image in ECR for the HuggingFace model.
:param config: Configuration dictionary for the environment variables of the model.
:param endpoint_name: The name for the SageMaker endpoint.
:param endpoint_config_name: The name for the SageMaker endpoint configuration.
:param resources: Optional resources for the model deployment (used for multi model endpoints)
:param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
or EndpointType.INFERENCE_COMPONENT (with inference component)
:param gpu_instance_type: The instance type for the SageMaker endpoint.
"""
try:
# Check if the endpoint configuration exists
if self.resource_manager.endpoint_config_exists(endpoint_config_name=endpoint_config_name):
logger.info(f"Endpoint configuration {endpoint_config_name} exists. Using existing configuration...")
else:
logger.info(f"Endpoint configuration{endpoint_config_name} does not exist.")
# Prepare and deploy the HuggingFace model
self.prepare_and_deploy_model(
role_arn=role_arn,
llm_image=llm_image,
config=config,
endpoint_name=endpoint_name,
update_endpoint=False,
resources=resources,
endpoint_type=endpoint_type,
gpu_instance_type=gpu_instance_type,
)
logger.info(f"Successfully deployed/updated model to endpoint {endpoint_name}.")
except Exception as e:
logger.error(f"Failed to deploy model to SageMaker: {e}")
raise
@staticmethod
def prepare_and_deploy_model(
role_arn: str,
llm_image: str,
config: dict,
endpoint_name: str,
update_endpoint: bool,
gpu_instance_type: str,
resources: Optional[dict] = None,
endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
) -> None:
"""
Prepares and deploys/updates the HuggingFace model on SageMaker.
:param role_arn: The ARN of the IAM role.
:param llm_image: The Docker image URI for the HuggingFace model.
:param config: Configuration settings for the model.
:param endpoint_name: The name of the endpoint.
:param update_endpoint: Boolean flag to update an existing endpoint.
:param gpu_instance_type: The instance type for the SageMaker endpoint.
:param resources: Optional resources for the model deployment(used for multi model endpoints)
:param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
or EndpointType.INFERENCE_COMPONENT (with inference component)
"""
huggingface_model = HuggingFaceModel(
role=role_arn,
image_uri=llm_image,
env=config,
)
# Deploy or update the model based on the endpoint existence
huggingface_model.deploy(
instance_type=gpu_instance_type,
initial_instance_count=1,
endpoint_name=endpoint_name,
update_endpoint=update_endpoint,
resources=resources,
tags=[{"Key": "task", "Value": "model_task"}],
endpoint_type=endpoint_type,
container_startup_health_check_timeout=900,
)
|