#!/usr/bin/env python3 # # Script to replace EC2 instances in an ECS cluster's auto-scaling group after # changing the AMI or instance type in the launch configuration. It # checks for instances with the incorrect AMI or type, scales up the # auto-scaling group with replacement instances, then drains the tasks # from the old instances. # # Usage: aws-vault exec profile-name -- python3 replace_ecs_cluster_instances.py --group=asg-name --cluster=ecs-cluster-name --count=3 # # The count is specified so that it knows what the "real" desired count is in # case it is interrupted and restarted after increasing the desired count. # # License: ISC # Copyright 2019 3D Robotics # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE # USE OR PERFORMANCE OF THIS SOFTWARE. # import boto3 import argparse import time autoscaling = boto3.client('autoscaling') ec2 = boto3.client('ec2') ecs = boto3.client('ecs') parser = argparse.ArgumentParser(description='Update the instances in the autoscaling group.') parser.add_argument('--group', metavar='NAME', required=True, help='Autoscaling group name') parser.add_argument('--cluster', metavar='NAME', required=True, help='ECS cluster name') parser.add_argument('--count', metavar='ARN', type=int, required=True, help='Desired count of instances in autoscaling group') args = parser.parse_args() group_name = args.group cluster_name = args.cluster desired_count = args.count describe_group = autoscaling.describe_auto_scaling_groups(AutoScalingGroupNames=[group_name])['AutoScalingGroups'][0] assert(describe_group['AutoScalingGroupName'] == group_name) desired_capacity = describe_group['DesiredCapacity'] target_launch_template = describe_group['LaunchTemplate'] asg_instances = [instance['InstanceId'] for instance in describe_group['Instances']] prev_desired_count = describe_group['DesiredCapacity'] print(f"Target launch template {target_launch_template['LaunchTemplateId']} {target_launch_template['Version']}") describe_launch_template = ec2.describe_launch_template_versions( LaunchTemplateId = target_launch_template['LaunchTemplateId'], Versions = [target_launch_template['Version']] )['LaunchTemplateVersions'][0] target_ami = describe_launch_template['LaunchTemplateData']['ImageId'] target_instance_type = describe_launch_template['LaunchTemplateData']['InstanceType'] print(f"Target AMI {target_ami} on {target_instance_type}") describe_instances_reservations = ec2.describe_instances(InstanceIds=asg_instances)['Reservations'] instances_to_replace = [] for reservation in describe_instances_reservations: for instance in reservation['Instances']: instance_id = instance['InstanceId'] instance_ami = instance['ImageId'] instance_type = instance['InstanceType'] instance_launched = instance['LaunchTime'] needs_replace = instance_ami != target_ami or instance_type !=target_instance_type if needs_replace: instances_to_replace.append(instance_id) print(f"Instance {instance_id}, created {instance_launched.ctime()}, type {instance_type}, AMI {instance_ami} -- {'REPLACE' if needs_replace else 'OK'}") new_desired_count = max(prev_desired_count, desired_count + len(instances_to_replace)) print(f"Temporarily scaling cluster from {prev_desired_count} to {new_desired_count} instances") autoscaling.set_desired_capacity(AutoScalingGroupName=group_name, DesiredCapacity=new_desired_count) while True: print('\n----\n') list_container_instances = ecs.list_container_instances(cluster = cluster_name)['containerInstanceArns'] container_instances = ecs.describe_container_instances(cluster = cluster_name, containerInstances = list_container_instances)['containerInstances'] container_instances.sort(key = lambda ci: ci['registeredAt']) available_instances = 0 remaining_tasks = 0 to_drain = [] for ci in container_instances: ci_ec2_id = ci['ec2InstanceId'] ci_arn = ci['containerInstanceArn'] running_tasks = ci['runningTasksCount'] status = ci['status'] print(f"{ci_ec2_id} {status}, {running_tasks} tasks") if ci_ec2_id in instances_to_replace: remaining_tasks += running_tasks if status == 'ACTIVE': to_drain.append(ci_arn) elif status == 'ACTIVE': available_instances += 1 if available_instances < desired_count: print("Waiting for new instances to boot") elif len(to_drain) > 0: print("Draining instances:", to_drain) ecs.update_container_instances_state(cluster = cluster_name, containerInstances = to_drain, status='DRAINING') elif remaining_tasks == 0: break else: print("Waiting for instances to drain") time.sleep(10) for instance_id in instances_to_replace: if input(f"Terminate instance {instance_id}? (y/n) ") == "y": autoscaling.terminate_instance_in_auto_scaling_group(InstanceId=instance_id, ShouldDecrementDesiredCapacity=True) print("Terminated instance") else: print("Not terminating this instance") print("Done")