Last active
          March 4, 2025 21:55 
        
      - 
      
- 
        Save teasherm/bb73f21ed2f3b46bc1c2ca48ec2c1cf5 to your computer and use it in GitHub Desktop. 
Revisions
- 
        teasherm revised this gist Aug 9, 2017 . 1 changed file with 32 additions and 16 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ import argparse import os import boto3 @@ -11,8 +12,9 @@ def __init__(self, bucket, key, local_path, part_size=int(15e6), profile_name=None, region_name="eu-west-1", verbose=False): self.bucket = bucket self.key = key @@ -22,13 +24,15 @@ def __init__(self, assert part_size > self.PART_MINIMUM assert (self.total_bytes % part_size == 0 or self.total_bytes % part_size > self.PART_MINIMUM) self.s3 = boto3.session.Session( profile_name=profile_name, region_name=region_name).client("s3") if verbose: boto3.set_stream_logger(name="botocore") def abort_all(self): mpus = self.s3.list_multipart_uploads(Bucket=self.bucket) aborted = [] print("Aborting", len(mpus), "uploads") if "Uploads" in mpus: for u in mpus["Uploads"]: upload_id = u["UploadId"] @@ -45,44 +49,56 @@ def create(self): def upload(self, mpu_id): parts = [] uploaded_bytes = 0 with open(self.path, "rb") as f: i = 1 while True: data = f.read(self.part_bytes) if not len(data): break part = self.s3.upload_part( Body=data, Bucket=self.bucket, Key=self.key, UploadId=mpu_id, PartNumber=i) parts.append({"PartNumber": i, "ETag": part["ETag"]}) uploaded_bytes += len(data) print("{0} of {1} uploaded ({2:.3f}%)".format( uploaded_bytes, self.total_bytes, as_percent(uploaded_bytes, self.total_bytes))) i += 1 return parts def complete(self, mpu_id, parts): result = self.s3.complete_multipart_upload( Bucket=self.bucket, Key=self.key, UploadId=mpu_id, MultipartUpload={"Parts": parts}) return result # Helper def as_percent(num, denom): return float(num) / float(denom) * 100.0 def parse_args(): parser = argparse.ArgumentParser(description='Multipart upload') parser.add_argument('--bucket', required=True) parser.add_argument('--key', required=True) parser.add_argument('--path', required=True) parser.add_argument('--region', default="eu-west-1") parser.add_argument('--profile', default=None) return parser.parse_args() def main(): args = parse_args() mpu = S3MultipartUpload( args.bucket, args.key, args.path, profile_name=args.profile, region_name=args.region) # abort all multipart uploads for this bucket (optional, for starting over) mpu.abort_all() # create new multipart upload mpu_id = mpu.create() # upload parts 
- 
        teasherm revised this gist Mar 1, 2017 . No changes.There are no files selected for viewing
- 
        teasherm created this gist Mar 1, 2017 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,95 @@ import os import boto3 class S3MultipartUpload(object): # AWS throws EntityTooSmall error for parts smaller than 5 MB PART_MINIMUM = int(5e6) def __init__(self, bucket, key, local_path, part_size=int(10e6), region_name="us-east-1", verbose=False): self.bucket = bucket self.key = key self.path = local_path self.total_bytes = os.stat(local_path).st_size self.part_bytes = part_size assert part_size > self.PART_MINIMUM assert (self.total_bytes % part_size == 0 or self.total_bytes % part_size > self.PART_MINIMUM) self.s3 = boto3.client("s3", region_name=region_name) if verbose: boto3.set_stream_logger(name="botocore") def abort_all(self): mpus = self.s3.list_multipart_uploads(Bucket=self.bucket) aborted = [] if "Uploads" in mpus: for u in mpus["Uploads"]: upload_id = u["UploadId"] aborted.append( self.s3.abort_multipart_upload( Bucket=self.bucket, Key=self.key, UploadId=upload_id)) return aborted def create(self): mpu = self.s3.create_multipart_upload(Bucket=self.bucket, Key=self.key) mpu_id = mpu["UploadId"] return mpu_id def upload(self, mpu_id): parts = [] uploaded_bytes = 0 with open(path, "rb") as f: i = 1 while True: data = f.read(part_bytes) if not len(data): break part = self.s3.upload_part( Body=data, Bucket=bucket, Key=key, UploadId=mpu_id, PartNumber=i) parts.append({"PartNumber": i, "ETag": part["ETag"]}) uploaded_bytes += len(data) print("{0} of {1} uploaded ({2:.3f}%)".format( uploaded_bytes, total_bytes, as_percent(uploaded_bytes, total_bytes))) i += 1 return parts def complete(self, mpu_id, parts): result = self.s3.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=mpu_id, MultipartUpload={"Parts": parts}) return result # Helper def as_percent(self, num, denom): return float(num) / float(denom) * 100.0 def main(): mpu = S3MultipartUpload( "bucket", "key", "path/to/file", region_name="eu-west-1") # abort all multipart uploads for this bucket (optional, for starting over) # mpu.abort_all() # create new multipart upload mpu_id = mpu.create() # upload parts parts = mpu.upload(mpu_id) # complete multipart upload print(mpu.complete(mpu_id, parts)) if __name__ == "__main__": main()