import os from datetime import datetime, timedelta, timezone import boto3 def main(): aws_profile = os.getenv("AWS_PROFILE") if not aws_profile: raise ValueError("Missing variable AWS_PROFILE") bucket_name = os.getenv("S3_BUCKET") if not bucket_name: raise ValueError("Missing variable S3_BUCKET") days = os.getenv("DAYS") if not days: raise ValueError("Missing variable DAYS") days = int(days) assert days > 0 date_threshold = datetime.now(timezone.utc) - timedelta(days=days) session = boto3.session.Session(profile_name=aws_profile) s3_resource = session.resource("s3") if not s3_resource.Bucket(bucket_name).creation_date: raise ValueError(f"Unknown bucket {bucket_name}") keys = [] s3_client = session.client("s3") s3_paginator = s3_client.get_paginator("list_objects_v2") s3_pages = s3_paginator.paginate(Bucket=bucket_name) for s3_page in s3_pages: if "Contents" in s3_page: print(f"Found {len(s3_page['Contents'])} objects") for key_object in s3_page["Contents"]: if key_object["LastModified"] < date_threshold: keys.append({ "Key": key_object["Key"], }) if len(keys) > 999: print(f"Will delete {len(keys)} objects") s3_client.delete_objects( Bucket=bucket_name, Delete={ "Objects": keys, }, ) keys = [] if len(keys) > 1: print(f"Will delete {len(keys)} keys") s3_client.delete_objects( Bucket=bucket_name, Delete={ "Objects": keys, }, ) if __name__ == "__main__": main()