Spaces:

SkazuHD
/

docker-test

Build error

App Files Files Community

docker-test / llm_engineering /infrastructure /aws /deploy /huggingface /sagemaker_huggingface.py

SkazuHD

init space

d660b02 7 months ago

raw

history blame contribute delete

7.76 kB

	import enum
	from typing import Optional

	from loguru import logger

	try:
	import boto3
	from sagemaker.enums import EndpointType
	from sagemaker.huggingface import HuggingFaceModel
	except ModuleNotFoundError:
	logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")

	from llm_engineering.domain.inference import DeploymentStrategy
	from llm_engineering.settings import settings


	class SagemakerHuggingfaceStrategy(DeploymentStrategy):
	def __init__(self, deployment_service) -> None:
	"""
	Initializes the deployment strategy with the necessary services.

	:param deployment_service: The service handling the deployment details.
	:param logger: Logger for logging information and errors.
	"""
	self.deployment_service = deployment_service

	def deploy(
	self,
	role_arn: str,
	llm_image: str,
	config: dict,
	endpoint_name: str,
	endpoint_config_name: str,
	gpu_instance_type: str,
	resources: Optional[dict] = None,
	endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
	) -> None:
	"""
	Initiates the deployment process for a HuggingFace model on AWS SageMaker.

	:param role_arn: AWS role ARN with permissions for SageMaker deployment.
	:param llm_image: URI for the HuggingFace model Docker image.
	:param config: Configuration settings for the model environment.
	:param endpoint_name: Name of the SageMaker endpoint.
	:param endpoint_config_name: Name of the SageMaker endpoint configuration.
	:param resources: Optional resources for the model deployment (used for multi model endpoints)
	:param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
	or EndpointType.INFERENCE_COMPONENT (with inference component)

	"""

	logger.info("Starting deployment using Sagemaker Huggingface Strategy...")
	logger.info(
	f"Deployment parameters: nb of replicas: {settings.COPIES}, nb of gpus:{settings.GPUS}, instance_type:{settings.GPU_INSTANCE_TYPE}"
	)
	try:
	# Delegate to the deployment service to handle the actual deployment details
	self.deployment_service.deploy(
	role_arn=role_arn,
	llm_image=llm_image,
	config=config,
	endpoint_name=endpoint_name,
	endpoint_config_name=endpoint_config_name,
	gpu_instance_type=gpu_instance_type,
	resources=resources,
	endpoint_type=endpoint_type,
	)
	logger.info("Deployment completed successfully.")
	except Exception as e:
	logger.error(f"Error during deployment: {e}")
	raise


	class DeploymentService:
	def __init__(self, resource_manager):
	"""
	Initializes the DeploymentService with necessary dependencies.

	:param resource_manager: Manages resources and configurations for deployments.
	:param settings: Configuration settings for deployment.
	:param logger: Optional logger for logging messages. If None, the standard logging module will be used.
	"""

	self.sagemaker_client = boto3.client(
	"sagemaker",
	region_name=settings.AWS_REGION,
	aws_access_key_id=settings.AWS_ACCESS_KEY,
	aws_secret_access_key=settings.AWS_SECRET_KEY,
	)
	self.resource_manager = resource_manager

	def deploy(
	self,
	role_arn: str,
	llm_image: str,
	config: dict,
	endpoint_name: str,
	endpoint_config_name: str,
	gpu_instance_type: str,
	resources: Optional[dict] = None,
	endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
	) -> None:
	"""
	Handles the deployment of a model to SageMaker, including checking and creating
	configurations and endpoints as necessary.

	:param role_arn: The ARN of the IAM role for SageMaker to access resources.
	:param llm_image: URI of the Docker image in ECR for the HuggingFace model.
	:param config: Configuration dictionary for the environment variables of the model.
	:param endpoint_name: The name for the SageMaker endpoint.
	:param endpoint_config_name: The name for the SageMaker endpoint configuration.
	:param resources: Optional resources for the model deployment (used for multi model endpoints)
	:param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
	or EndpointType.INFERENCE_COMPONENT (with inference component)
	:param gpu_instance_type: The instance type for the SageMaker endpoint.
	"""

	try:
	# Check if the endpoint configuration exists
	if self.resource_manager.endpoint_config_exists(endpoint_config_name=endpoint_config_name):
	logger.info(f"Endpoint configuration {endpoint_config_name} exists. Using existing configuration...")
	else:
	logger.info(f"Endpoint configuration{endpoint_config_name} does not exist.")

	# Prepare and deploy the HuggingFace model
	self.prepare_and_deploy_model(
	role_arn=role_arn,
	llm_image=llm_image,
	config=config,
	endpoint_name=endpoint_name,
	update_endpoint=False,
	resources=resources,
	endpoint_type=endpoint_type,
	gpu_instance_type=gpu_instance_type,
	)

	logger.info(f"Successfully deployed/updated model to endpoint {endpoint_name}.")
	except Exception as e:
	logger.error(f"Failed to deploy model to SageMaker: {e}")

	raise

	@staticmethod
	def prepare_and_deploy_model(
	role_arn: str,
	llm_image: str,
	config: dict,
	endpoint_name: str,
	update_endpoint: bool,
	gpu_instance_type: str,
	resources: Optional[dict] = None,
	endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
	) -> None:
	"""
	Prepares and deploys/updates the HuggingFace model on SageMaker.

	:param role_arn: The ARN of the IAM role.
	:param llm_image: The Docker image URI for the HuggingFace model.
	:param config: Configuration settings for the model.
	:param endpoint_name: The name of the endpoint.
	:param update_endpoint: Boolean flag to update an existing endpoint.
	:param gpu_instance_type: The instance type for the SageMaker endpoint.
	:param resources: Optional resources for the model deployment(used for multi model endpoints)
	:param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
	or EndpointType.INFERENCE_COMPONENT (with inference component)
	"""

	huggingface_model = HuggingFaceModel(
	role=role_arn,
	image_uri=llm_image,
	env=config,
	)

	# Deploy or update the model based on the endpoint existence
	huggingface_model.deploy(
	instance_type=gpu_instance_type,
	initial_instance_count=1,
	endpoint_name=endpoint_name,
	update_endpoint=update_endpoint,
	resources=resources,
	tags=[{"Key": "task", "Value": "model_task"}],
	endpoint_type=endpoint_type,
	container_startup_health_check_timeout=900,
	)