Last active
September 28, 2023 17:18
-
-
Save dlinsley/19632c38f660b236ba3f8656b76b2496 to your computer and use it in GitHub Desktop.
Revisions
-
dlinsley revised this gist
Dec 31, 2019 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ #!/usr/bin/env python3 import boto3 import argparse import string -
dlinsley revised this gist
Oct 20, 2018 . 1 changed file with 8 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,13 @@ import boto3 import argparse import string parser = argparse.ArgumentParser('Find duplicate objects in an aws s3 bucket') parser.add_argument('--bucket', dest='myBucket', default='yourBucketName', help='S3 Bucket to search') cliArgs = parser.parse_args() myBucket = cliArgs.myBucket # each list_objects_v2 request will return up to 1000 objects. # We will loop for every 1000, make another list_objects_v2 until end of bucket is reached -
dlinsley revised this gist
Oct 17, 2017 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,8 +13,8 @@ s3 = boto3.client('s3') print('searching for duplicate objects') print('') while lastReqLength == 1000: if (lastKey == ""): @@ -28,10 +28,10 @@ thisSize = obj['Size'] thisEtag = obj['ETag'] if thisSize > 0: if thisEtag in existing: #duplicate found: print('!!Duplicate: - %s - %s' % (existing[thisEtag], thisKey)) else: existing[thisEtag] = thisKey print('... The End.') -
dlinsley created this gist
Oct 14, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,37 @@ import boto3 myBucket = 'yourBucketName' # each list_objects_v2 request will return up to 1000 objects. # We will loop for every 1000, make another list_objects_v2 until end of bucket is reached lastReqLength = 1000 # at the end of each 1000, know the last key so we can get the next 1000 after it lastKey = "" existing = {} s3 = boto3.client('s3') print 'searching for duplicate objects' print while lastReqLength == 1000: if (lastKey == ""): myObjects = s3.list_objects_v2(Bucket=myBucket) else: myObjects = s3.list_objects_v2(Bucket=myBucket,StartAfter=lastKey) lastReqLength = len(myObjects['Contents']) for obj in myObjects['Contents']: lastKey = obj['Key'] thisKey = obj['Key'] thisSize = obj['Size'] thisEtag = obj['ETag'] if thisSize > 0: if existing.has_key(thisEtag): #duplicate found: print '!!Duplicate: -',existing[thisEtag],' - ',thisKey else: existing[thisEtag] = thisKey print '... The End.'