import datetime import time import boto3 from botocore.exceptions import ClientError elb_client = boto3.client('elb') ec2_client = boto3.client('ec2') elb = 'FILL THIS OUT' snapshots = {} def maybeSnapshotInstance(instance_id): ids = [] ids.append(instance_id) instance_description = ec2_client.describe_instances(InstanceIds=ids) for reservation in instance_description['Reservations']: for instance in reservation['Instances']: ts = time.time() # if not older than 5 min dont take snapshot now = datetime.datetime.now(instance['LaunchTime'].tzinfo) if instance['LaunchTime'] >= now - datetime.timedelta(minutes=5): continue print "{} Taking AMI of non responding box with id {}".format(datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'), instance['InstanceId']) try: ami_response = ec2_client.create_image( Description="AMI of non responding box: {} check the logs".format(instance['InstanceId']), Name="non-respondingbox-{}".format(instance['InstanceId']), InstanceId=instance['InstanceId'] ) snapshots[hash(instance_id)] = True # 5 min cooldown period time.sleep(300) except: pass # ignore errors def checkInstances(): resp = elb_client.describe_instance_health(LoadBalancerName=elb) for instance in resp['InstanceStates']: have_snapshotted = False try: have_snapshotted = snapshots[hash(instance['InstanceId'])] if have_snapshotted: continue # we already snapshotted this instance except: pass if instance['State'] != 'InService': maybeSnapshotInstance(instance['InstanceId']) def main(): while True: checkInstances() time.sleep(5) if __name__=='__main__': main()