AmmarFahmy
adding all files
105b369
from typing import Optional, Any, Dict, List, Union, cast
from typing_extensions import Literal
from textwrap import dedent
from phi.aws.api_client import AwsApiClient
from phi.aws.resource.base import AwsResource
from phi.aws.resource.ec2.subnet import Subnet
from phi.aws.resource.eks.cluster import EksCluster
from phi.aws.resource.iam.role import IamRole
from phi.cli.console import print_info
from phi.utils.log import logger
class EksNodeGroup(AwsResource):
"""
An Amazon EKS managed node group is an Amazon EC2 Auto Scaling group and associated EC2
instances that are managed by Amazon Web Services for an Amazon EKS cluster.
An Auto Scaling group is a group of EC2 instances that are combined into one management unit.
When you set up an auto-scaling group, you specify a scaling policy and AWS will apply that policy to make sure
that a certain number of instances is automatically running in your group. If the number of instances drops below a
certain value, or if the load increases (depending on the policy),
then AWS will automatically spin up new instances for you.
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.create_nodegroup
"""
resource_type: Optional[str] = "EksNodeGroup"
service_name: str = "eks"
# Name for the node group
name: str
# The cluster to create the EksNodeGroup in
eks_cluster: EksCluster
# The IAM role to associate with your node group.
# The Amazon EKS worker node kubelet daemon makes calls to Amazon Web Services APIs on your behalf.
# Nodes receive permissions for these API calls through an IAM instance profile and associated policies.
# Before you can launch nodes and register them into a cluster,
# you must create an IAM role for those nodes to use when they are launched.
# ARN for the node group IAM role to use
node_role_arn: Optional[str] = None
# If node_role_arn is None, a default role is created if create_role is True
create_role: bool = True
# If node_role is None, a default node_role is created using node_role_name
node_role: Optional[IamRole] = None
# Name for the default node_role when role is None, use "name-iam-role" if not provided
node_role_name: Optional[str] = None
# Provide a list of policy ARNs to attach to the node group role
add_policy_arns: Optional[List[str]] = None
# The scaling configuration details for the Auto Scaling group
# Users can provide a dict for scaling config or use min/max/desired values below
scaling_config: Optional[Dict[str, Union[str, int]]] = None
# The minimum number of nodes that the managed node group can scale in to.
min_size: Optional[int] = None
# The maximum number of nodes that the managed node group can scale out to.
max_size: Optional[int] = None
# The current number of nodes that the managed node group should maintain.
# WARNING: If you use Cluster Autoscaler, you shouldn't change the desired_size value directly,
# as this can cause the Cluster Autoscaler to suddenly scale up or scale down.
# Whenever this parameter changes, the number of worker nodes in the node group is updated to
# the specified size. If this parameter is given a value that is smaller than the current number of
# running worker nodes, the necessary number of worker nodes are terminated to match the given value.
desired_size: Optional[int] = None
# The root device disk size (in GiB) for your node group instances.
# The default disk size is 20 GiB. If you specify launchTemplate,
# then don't specify diskSize, or the node group deployment will fail.
disk_size: Optional[int] = None
# The subnets to use for the Auto Scaling group that is created for your node group.
# If you specify launchTemplate, then don't specify SubnetId in your launch template,
# or the node group deployment will fail.
# For more information about using launch templates with Amazon EKS,
# see Launch template support in the Amazon EKS User Guide.
subnets: Optional[List[str]] = None
# Filter subnets using availability zones
subnet_az: Optional[Union[str, List[str]]] = None
# Specify the instance types for a node group.
# If you specify a GPU instance type, be sure to specify AL2_x86_64_GPU with the amiType parameter.
# If you specify launchTemplate , then you can specify zero or one instance type in your launch template
# or you can specify 0-20 instance types for instanceTypes .
# If however, you specify an instance type in your launch template and specify any instanceTypes ,
# the node group deployment will fail. If you don't specify an instance type in a launch template
# or for instance_types, then t3.medium is used, by default. If you specify Spot for capacityType,
# then we recommend specifying multiple values for instanceTypes .
instance_types: Optional[List[str]] = None
# The AMI type for your node group. GPU instance types should use the AL2_x86_64_GPU AMI type.
# Non-GPU instances should use the AL2_x86_64 AMI type.
# Arm instances should use the AL2_ARM_64 AMI type.
# All types use the Amazon EKS optimized Amazon Linux 2 AMI.
# If you specify launchTemplate , and your launch template uses a custom AMI,
# then don't specify amiType , or the node group deployment will fail.
ami_type: Optional[
Literal[
"AL2_x86_64",
"AL2_x86_64_GPU",
"AL2_ARM_64",
"CUSTOM",
"BOTTLEROCKET_ARM_64",
"BOTTLEROCKET_x86_64",
]
] = None
# The remote access (SSH) configuration to use with your node group.
# If you specify launchTemplate, then don't specify remoteAccess, or the node group deployment will fail. For
# Keys:
# ec2SshKey (string) -- The Amazon EC2 SSH key that provides access for SSH communication with the nodes
# in the managed node group. For more information, see Amazon EC2 key pairs and Linux instances in the
# Amazon Elastic Compute Cloud User Guide for Linux Instances .
# sourceSecurityGroups (list) -- The security groups that are allowed SSH access (port 22) to the nodes.
# If you specify an Amazon EC2 SSH key but do not specify a source security group when you create
# a managed node group, then port 22 on the nodes is opened to the internet (0.0.0.0/0).
# For more information, see Security Groups for Your VPC in the Amazon Virtual Private Cloud User Guide .
remote_access: Optional[Dict[str, str]] = None
# The Kubernetes labels to be applied to the nodes in the node group when they are created.
labels: Optional[Dict[str, str]] = None
# The Kubernetes taints to be applied to the nodes in the node group.
taints: Optional[List[dict]] = None
# The metadata to apply to the node group to assist with categorization and organization.
# Each tag consists of a key and an optional value. You define both.
# Node group tags do not propagate to any other resources associated with the node group,
# such as the Amazon EC2 instances or subnets.
tags: Optional[Dict[str, str]] = None
# Unique, case-sensitive identifier that you provide to ensure the idempotency of the request.
# This field is autopopulated if not provided.
client_request_token: Optional[str] = None
# An object representing a node group's launch template specification.
# If specified, then do not specify instanceTypes, diskSize, or remoteAccess and make sure that the launch template
# meets the requirements in launchTemplateSpecification .
launch_template: Optional[Dict[str, str]] = None
# The node group update configuration.
update_config: Optional[Dict[str, int]] = None
# The capacity type for your node group.
capacity_type: Optional[Literal["ON_DEMAND", "SPOT"]] = None
# The Kubernetes version to use for your managed nodes.
# By default, the Kubernetes version of the cluster is used, and this is the only accepted specified value.
# If you specify launchTemplate , and your launch template uses a custom AMI,
# then don't specify version , or the node group deployment will fail.
version: Optional[str] = None
# The AMI version of the Amazon EKS optimized AMI to use with your node group.
# By default, the latest available AMI version for the node group's current Kubernetes version is used.
release_version: Optional[str] = None
# provided by api on create
created_at: Optional[str] = None
nodegroup_status: Optional[str] = None
# provided by api on update
update_id: Optional[str] = None
update_status: Optional[str] = None
# bump the wait time for Eks to 30 seconds
waiter_delay: int = 30
def _create(self, aws_client: AwsApiClient) -> bool:
"""Creates a NodeGroup for your Amazon EKS cluster.
Args:
aws_client: The AwsApiClient for the current cluster
"""
print_info(f"Creating {self.get_resource_type()}: {self.get_resource_name()}")
# Step 1: Get NodeGroup IamRole
nodegroup_iam_role_arn = self.node_role_arn
if nodegroup_iam_role_arn is None and self.create_role:
# Create NodeGroup IamRole and get nodegroup_iam_role_arn
nodegroup_iam_role = self.get_nodegroup_iam_role()
try:
nodegroup_iam_role.create(aws_client)
nodegroup_iam_role_arn = nodegroup_iam_role.read(aws_client).arn
print_info(f"ARN for {nodegroup_iam_role.name}: {nodegroup_iam_role_arn}")
except Exception as e:
logger.error("NodeGroup IamRole creation failed, please fix and try again")
logger.error(e)
return False
if nodegroup_iam_role_arn is None:
logger.error("IamRole ARN not available, please fix and try again")
return False
# Step 2: Get the subnets
subnets: Optional[List[str]] = self.subnets
if subnets is None:
# Use subnets from EKSCluster if subnets not provided
subnets = self.eks_cluster.get_subnets(aws_client=aws_client)
# Filter subnets using availability zones
if self.subnet_az is not None:
azs_filter = []
if isinstance(self.subnet_az, str):
azs_filter.append(self.subnet_az)
elif isinstance(self.subnet_az, list):
azs_filter.extend(self.subnet_az)
subnets = [
subnet_id
for subnet_id in subnets
if Subnet(name=subnet_id).get_availability_zone(aws_client=aws_client) in azs_filter
]
logger.debug(f"Using subnets from EKSCluster: {subnets}")
# cast for type checker
subnets = cast(List[str], subnets)
# Step 3: Get the scaling_config
scaling_config: Optional[Dict[str, Union[str, int]]] = self.scaling_config
if scaling_config is None:
# Build the scaling_config
if self.min_size is not None:
if scaling_config is None:
scaling_config = {}
scaling_config["minSize"] = self.min_size
# use min_size as the default for maxSize/desiredSize incase maxSize/desiredSize is not provided
scaling_config["maxSize"] = self.min_size
scaling_config["desiredSize"] = self.min_size
if self.max_size is not None:
if scaling_config is None:
scaling_config = {}
scaling_config["maxSize"] = self.max_size
if self.desired_size is not None:
if scaling_config is None:
scaling_config = {}
scaling_config["desiredSize"] = self.desired_size
# create a dict of args which are not null, otherwise aws type validation fails
not_null_args: Dict[str, Any] = {}
if scaling_config is not None:
not_null_args["scalingConfig"] = scaling_config
if self.disk_size is not None:
not_null_args["diskSize"] = self.disk_size
if self.instance_types is not None:
not_null_args["instanceTypes"] = self.instance_types
if self.ami_type is not None:
not_null_args["amiType"] = self.ami_type
if self.remote_access is not None:
not_null_args["remoteAccess"] = self.remote_access
if self.labels is not None:
not_null_args["labels"] = self.labels
if self.taints is not None:
not_null_args["taints"] = self.taints
if self.tags is not None:
not_null_args["tags"] = self.tags
if self.client_request_token is not None:
not_null_args["clientRequestToken"] = self.client_request_token
if self.launch_template is not None:
not_null_args["launchTemplate"] = self.launch_template
if self.update_config is not None:
not_null_args["updateConfig"] = self.update_config
if self.capacity_type is not None:
not_null_args["capacityType"] = self.capacity_type
if self.version is not None:
not_null_args["version"] = self.version
if self.release_version is not None:
not_null_args["release_version"] = self.release_version
# Step 4: Create EksNodeGroup
service_client = self.get_service_client(aws_client)
try:
create_response = service_client.create_nodegroup(
clusterName=self.eks_cluster.name,
nodegroupName=self.name,
subnets=subnets,
nodeRole=nodegroup_iam_role_arn,
**not_null_args,
)
logger.debug(f"EksNodeGroup: {create_response}")
nodegroup_dict = create_response.get("nodegroup", {})
# Validate EksNodeGroup creation
self.created_at = nodegroup_dict.get("createdAt", None)
self.nodegroup_status = nodegroup_dict.get("status", None)
logger.debug(f"created_at: {self.created_at}")
logger.debug(f"nodegroup_status: {self.nodegroup_status}")
if self.created_at is not None:
print_info(f"EksNodeGroup created: {self.name}")
self.active_resource = create_response
return True
except service_client.exceptions.ResourceInUseException:
print_info(f"EksNodeGroup already exists: {self.name}")
return True
except Exception as e:
logger.error(f"{self.get_resource_type()} could not be created.")
logger.error(e)
return False
def post_create(self, aws_client: AwsApiClient) -> bool:
# Wait for EksNodeGroup to be created
if self.wait_for_create:
try:
print_info(f"Waiting for {self.get_resource_type()} to be created.")
waiter = self.get_service_client(aws_client).get_waiter("nodegroup_active")
waiter.wait(
clusterName=self.eks_cluster.name,
nodegroupName=self.name,
WaiterConfig={
"Delay": self.waiter_delay,
"MaxAttempts": self.waiter_max_attempts,
},
)
except Exception as e:
logger.error("Waiter failed.")
logger.error(e)
return True
def _read(self, aws_client: AwsApiClient) -> Optional[Any]:
"""Returns the EksNodeGroup
Args:
aws_client: The AwsApiClient for the current cluster
"""
logger.debug(f"Reading {self.get_resource_type()}: {self.get_resource_name()}")
from botocore.exceptions import ClientError
service_client = self.get_service_client(aws_client)
try:
describe_response = service_client.describe_nodegroup(
clusterName=self.eks_cluster.name,
nodegroupName=self.name,
)
# logger.debug(f"describe_response: {describe_response}")
nodegroup_dict = describe_response.get("nodegroup", {})
self.created_at = nodegroup_dict.get("createdAt", None)
self.nodegroup_status = nodegroup_dict.get("status", None)
logger.debug(f"NodeGroup created_at: {self.created_at}")
logger.debug(f"NodeGroup status: {self.nodegroup_status}")
if self.created_at is not None:
logger.debug(f"EksNodeGroup found: {self.name}")
self.active_resource = describe_response
except ClientError as ce:
logger.debug(f"ClientError: {ce}")
except Exception as e:
logger.error(f"Error reading {self.get_resource_type()}.")
logger.error(e)
return self.active_resource
def _delete(self, aws_client: AwsApiClient) -> bool:
"""Deletes the EksNodeGroup
Args:
aws_client: The AwsApiClient for the current cluster
"""
print_info(f"Deleting {self.get_resource_type()}: {self.get_resource_name()}")
# Step 1: Delete the IamRole
if self.node_role_arn is None and self.create_role:
nodegroup_iam_role = self.get_nodegroup_iam_role()
try:
nodegroup_iam_role.delete(aws_client)
except Exception as e:
logger.error("IamRole deletion failed, please try again or delete manually")
logger.error(e)
# Step 2: Delete the NodeGroup
service_client = self.get_service_client(aws_client)
self.active_resource = None
try:
delete_response = service_client.delete_nodegroup(
clusterName=self.eks_cluster.name,
nodegroupName=self.name,
)
logger.debug(f"EksNodeGroup: {delete_response}")
return True
except Exception as e:
logger.error(f"{self.get_resource_type()} could not be deleted.")
logger.error("Please try again or delete resources manually.")
logger.error(e)
return False
def post_delete(self, aws_client: AwsApiClient) -> bool:
# Wait for EksNodeGroup to be deleted
if self.wait_for_delete:
try:
print_info(f"Waiting for {self.get_resource_type()} to be deleted.")
waiter = self.get_service_client(aws_client).get_waiter("nodegroup_deleted")
waiter.wait(
clusterName=self.eks_cluster.name,
nodegroupName=self.name,
WaiterConfig={
"Delay": self.waiter_delay,
"MaxAttempts": self.waiter_max_attempts,
},
)
return True
except Exception as e:
logger.error("Waiter failed.")
logger.error(e)
return True
def get_nodegroup_iam_role(self) -> IamRole:
"""
Create an IAM role and attach the required Amazon EKS IAM managed policy to it.
"""
if self.node_role is not None:
return self.node_role
policy_arns = [
"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy",
"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly",
"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy",
"arn:aws:iam::aws:policy/AmazonS3FullAccess",
"arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy",
"arn:aws:iam::aws:policy/service-role/AmazonEFSCSIDriverPolicy",
]
if self.add_policy_arns is not None and isinstance(self.add_policy_arns, list):
policy_arns.extend(self.add_policy_arns)
return IamRole(
name=self.node_role_name or f"{self.name}-iam-role",
assume_role_policy_document=dedent(
"""\
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
"""
),
policy_arns=policy_arns,
)
def _update(self, aws_client: AwsApiClient) -> bool:
"""Update EKsNodeGroup"""
print_info(f"Updating {self.get_resource_type()}: {self.get_resource_name()}")
scaling_config: Optional[Dict[str, Union[str, int]]] = self.scaling_config
if scaling_config is None:
# Build the scaling_config
if self.min_size is not None:
if scaling_config is None:
scaling_config = {}
scaling_config["minSize"] = self.min_size
# use min_size as the default for maxSize/desiredSize incase maxSize/desiredSize is not provided
scaling_config["maxSize"] = self.min_size
scaling_config["desiredSize"] = self.min_size
if self.max_size is not None:
if scaling_config is None:
scaling_config = {}
scaling_config["maxSize"] = self.max_size
if self.desired_size is not None:
if scaling_config is None:
scaling_config = {}
scaling_config["desiredSize"] = self.desired_size
# TODO: Add logic to calculate updated_labels and updated_taints
updated_labels = None
updated_taints = None
# create a dict of args which are not null, otherwise aws type validation fails
not_null_args: Dict[str, Any] = {}
if scaling_config is not None:
not_null_args["scalingConfig"] = scaling_config
if updated_labels is not None:
not_null_args["labels"] = updated_labels
if updated_taints is not None:
not_null_args["taints"] = updated_taints
if self.update_config is not None:
not_null_args["updateConfig"] = self.update_config
# Step 4: Update EksNodeGroup
service_client = self.get_service_client(aws_client)
try:
update_response = service_client.update_nodegroup_config(
clusterName=self.eks_cluster.name,
nodegroupName=self.name,
**not_null_args,
)
logger.debug(f"EksNodeGroup: {update_response}")
nodegroup_dict = update_response.get("update", {})
# Validate EksNodeGroup update
self.update_id = nodegroup_dict.get("id", None)
self.update_status = nodegroup_dict.get("status", None)
logger.debug(f"update_id: {self.update_id}")
logger.debug(f"update_status: {self.update_status}")
if self.update_id is not None:
print_info(f"EksNodeGroup updated: {self.name}")
return True
except Exception as e:
logger.error(f"{self.get_resource_type()} could not be updated.")
logger.error(e)
return False