SkazuHD's picture
init space
d660b02
from loguru import logger
try:
from sagemaker.enums import EndpointType
from sagemaker.huggingface import get_huggingface_llm_image_uri
except ModuleNotFoundError:
logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.")
from llm_engineering.model.utils import ResourceManager
from llm_engineering.settings import settings
from .config import hugging_face_deploy_config, model_resource_config
from .sagemaker_huggingface import DeploymentService, SagemakerHuggingfaceStrategy
def create_endpoint(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED) -> None:
assert settings.AWS_ARN_ROLE is not None, "AWS_ARN_ROLE is not set in the .env file."
logger.info(f"Creating endpoint with endpoint_type = {endpoint_type} and model_id = {settings.HF_MODEL_ID}")
llm_image = get_huggingface_llm_image_uri("huggingface", version="2.2.0")
resource_manager = ResourceManager()
deployment_service = DeploymentService(resource_manager=resource_manager)
SagemakerHuggingfaceStrategy(deployment_service).deploy(
role_arn=settings.AWS_ARN_ROLE,
llm_image=llm_image,
config=hugging_face_deploy_config,
endpoint_name=settings.SAGEMAKER_ENDPOINT_INFERENCE,
endpoint_config_name=settings.SAGEMAKER_ENDPOINT_CONFIG_INFERENCE,
gpu_instance_type=settings.GPU_INSTANCE_TYPE,
resources=model_resource_config,
endpoint_type=endpoint_type,
)
if __name__ == "__main__":
create_endpoint(endpoint_type=EndpointType.MODEL_BASED)