Skip to content

Instantly share code, notes, and snippets.

@kevinmehall
Last active February 20, 2022 23:24
Show Gist options
  • Select an option

  • Save kevinmehall/f3e53a0be9d59f0759d8fbba872bc8cf to your computer and use it in GitHub Desktop.

Select an option

Save kevinmehall/f3e53a0be9d59f0759d8fbba872bc8cf to your computer and use it in GitHub Desktop.

Revisions

  1. kevinmehall revised this gist Aug 3, 2020. 1 changed file with 8 additions and 5 deletions.
    13 changes: 8 additions & 5 deletions replace_ecs_cluster_instances.py
    Original file line number Diff line number Diff line change
    @@ -48,16 +48,19 @@
    assert(describe_group['AutoScalingGroupName'] == group_name)

    desired_capacity = describe_group['DesiredCapacity']
    target_launch_configuration = describe_group['LaunchConfigurationName']
    target_launch_template = describe_group['LaunchTemplate']
    asg_instances = [instance['InstanceId'] for instance in describe_group['Instances']]
    prev_desired_count = describe_group['DesiredCapacity']

    print(f"Target launch configuration {target_launch_configuration}")
    print(f"Target launch template {target_launch_template['LaunchTemplateId']} {target_launch_template['Version']}")

    describe_launch_configuration = autoscaling.describe_launch_configurations(LaunchConfigurationNames=[target_launch_configuration])['LaunchConfigurations'][0]
    describe_launch_template = ec2.describe_launch_template_versions(
    LaunchTemplateId = target_launch_template['LaunchTemplateId'],
    Versions = [target_launch_template['Version']]
    )['LaunchTemplateVersions'][0]

    target_ami = describe_launch_configuration['ImageId']
    target_instance_type = describe_launch_configuration['InstanceType']
    target_ami = describe_launch_template['LaunchTemplateData']['ImageId']
    target_instance_type = describe_launch_template['LaunchTemplateData']['InstanceType']

    print(f"Target AMI {target_ami} on {target_instance_type}")

  2. kevinmehall revised this gist Apr 1, 2019. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion replace_ecs_cluster_instances.py
    Original file line number Diff line number Diff line change
    @@ -123,7 +123,10 @@
    time.sleep(10)

    for instance_id in instances_to_replace:
    if input(f"Terminate instance {instance_id}? ") == "y":
    if input(f"Terminate instance {instance_id}? (y/n) ") == "y":
    autoscaling.terminate_instance_in_auto_scaling_group(InstanceId=instance_id, ShouldDecrementDesiredCapacity=True)
    print("Terminated instance")
    else:
    print("Not terminating this instance")

    print("Done")
  3. kevinmehall created this gist Feb 19, 2019.
    129 changes: 129 additions & 0 deletions replace_ecs_cluster_instances.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,129 @@
    #!/usr/bin/env python3
    #
    # Script to replace EC2 instances in an ECS cluster's auto-scaling group after
    # changing the AMI or instance type in the launch configuration. It
    # checks for instances with the incorrect AMI or type, scales up the
    # auto-scaling group with replacement instances, then drains the tasks
    # from the old instances.
    #
    # Usage: aws-vault exec profile-name -- python3 replace_ecs_cluster_instances.py --group=asg-name --cluster=ecs-cluster-name --count=3
    #
    # The count is specified so that it knows what the "real" desired count is in
    # case it is interrupted and restarted after increasing the desired count.
    #
    # License: ISC
    # Copyright 2019 3D Robotics
    # Permission to use, copy, modify, and/or distribute this software for any
    # purpose with or without fee is hereby granted, provided that the above
    # copyright notice and this permission notice appear in all copies.
    #
    # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
    # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
    # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
    # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
    # USE OR PERFORMANCE OF THIS SOFTWARE.
    #

    import boto3
    import argparse
    import time

    autoscaling = boto3.client('autoscaling')
    ec2 = boto3.client('ec2')
    ecs = boto3.client('ecs')

    parser = argparse.ArgumentParser(description='Update the instances in the autoscaling group.')
    parser.add_argument('--group', metavar='NAME', required=True, help='Autoscaling group name')
    parser.add_argument('--cluster', metavar='NAME', required=True, help='ECS cluster name')
    parser.add_argument('--count', metavar='ARN', type=int, required=True, help='Desired count of instances in autoscaling group')
    args = parser.parse_args()

    group_name = args.group
    cluster_name = args.cluster
    desired_count = args.count

    describe_group = autoscaling.describe_auto_scaling_groups(AutoScalingGroupNames=[group_name])['AutoScalingGroups'][0]
    assert(describe_group['AutoScalingGroupName'] == group_name)

    desired_capacity = describe_group['DesiredCapacity']
    target_launch_configuration = describe_group['LaunchConfigurationName']
    asg_instances = [instance['InstanceId'] for instance in describe_group['Instances']]
    prev_desired_count = describe_group['DesiredCapacity']

    print(f"Target launch configuration {target_launch_configuration}")

    describe_launch_configuration = autoscaling.describe_launch_configurations(LaunchConfigurationNames=[target_launch_configuration])['LaunchConfigurations'][0]

    target_ami = describe_launch_configuration['ImageId']
    target_instance_type = describe_launch_configuration['InstanceType']

    print(f"Target AMI {target_ami} on {target_instance_type}")

    describe_instances_reservations = ec2.describe_instances(InstanceIds=asg_instances)['Reservations']

    instances_to_replace = []

    for reservation in describe_instances_reservations:
    for instance in reservation['Instances']:
    instance_id = instance['InstanceId']
    instance_ami = instance['ImageId']
    instance_type = instance['InstanceType']
    instance_launched = instance['LaunchTime']

    needs_replace = instance_ami != target_ami or instance_type !=target_instance_type

    if needs_replace:
    instances_to_replace.append(instance_id)

    print(f"Instance {instance_id}, created {instance_launched.ctime()}, type {instance_type}, AMI {instance_ami} -- {'REPLACE' if needs_replace else 'OK'}")

    new_desired_count = max(prev_desired_count, desired_count + len(instances_to_replace))
    print(f"Temporarily scaling cluster from {prev_desired_count} to {new_desired_count} instances")
    autoscaling.set_desired_capacity(AutoScalingGroupName=group_name, DesiredCapacity=new_desired_count)

    while True:
    print('\n----\n')

    list_container_instances = ecs.list_container_instances(cluster = cluster_name)['containerInstanceArns']
    container_instances = ecs.describe_container_instances(cluster = cluster_name, containerInstances = list_container_instances)['containerInstances']

    container_instances.sort(key = lambda ci: ci['registeredAt'])

    available_instances = 0
    remaining_tasks = 0
    to_drain = []

    for ci in container_instances:
    ci_ec2_id = ci['ec2InstanceId']
    ci_arn = ci['containerInstanceArn']
    running_tasks = ci['runningTasksCount']
    status = ci['status']

    print(f"{ci_ec2_id} {status}, {running_tasks} tasks")

    if ci_ec2_id in instances_to_replace:
    remaining_tasks += running_tasks
    if status == 'ACTIVE':
    to_drain.append(ci_arn)
    elif status == 'ACTIVE':
    available_instances += 1

    if available_instances < desired_count:
    print("Waiting for new instances to boot")
    elif len(to_drain) > 0:
    print("Draining instances:", to_drain)
    ecs.update_container_instances_state(cluster = cluster_name, containerInstances = to_drain, status='DRAINING')
    elif remaining_tasks == 0:
    break
    else:
    print("Waiting for instances to drain")

    time.sleep(10)

    for instance_id in instances_to_replace:
    if input(f"Terminate instance {instance_id}? ") == "y":
    autoscaling.terminate_instance_in_auto_scaling_group(InstanceId=instance_id, ShouldDecrementDesiredCapacity=True)

    print("Done")