Skip to content

Instantly share code, notes, and snippets.

@teasherm
Last active March 4, 2025 21:55
Show Gist options
  • Save teasherm/bb73f21ed2f3b46bc1c2ca48ec2c1cf5 to your computer and use it in GitHub Desktop.
Save teasherm/bb73f21ed2f3b46bc1c2ca48ec2c1cf5 to your computer and use it in GitHub Desktop.

Revisions

  1. teasherm revised this gist Aug 9, 2017. 1 changed file with 32 additions and 16 deletions.
    48 changes: 32 additions & 16 deletions s3_multipart_upload.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    import argparse
    import os

    import boto3
    @@ -11,8 +12,9 @@ def __init__(self,
    bucket,
    key,
    local_path,
    part_size=int(10e6),
    region_name="us-east-1",
    part_size=int(15e6),
    profile_name=None,
    region_name="eu-west-1",
    verbose=False):
    self.bucket = bucket
    self.key = key
    @@ -22,13 +24,15 @@ def __init__(self,
    assert part_size > self.PART_MINIMUM
    assert (self.total_bytes % part_size == 0
    or self.total_bytes % part_size > self.PART_MINIMUM)
    self.s3 = boto3.client("s3", region_name=region_name)
    self.s3 = boto3.session.Session(
    profile_name=profile_name, region_name=region_name).client("s3")
    if verbose:
    boto3.set_stream_logger(name="botocore")

    def abort_all(self):
    mpus = self.s3.list_multipart_uploads(Bucket=self.bucket)
    aborted = []
    print("Aborting", len(mpus), "uploads")
    if "Uploads" in mpus:
    for u in mpus["Uploads"]:
    upload_id = u["UploadId"]
    @@ -45,44 +49,56 @@ def create(self):
    def upload(self, mpu_id):
    parts = []
    uploaded_bytes = 0
    with open(path, "rb") as f:
    with open(self.path, "rb") as f:
    i = 1
    while True:
    data = f.read(part_bytes)
    data = f.read(self.part_bytes)
    if not len(data):
    break
    part = self.s3.upload_part(
    Body=data, Bucket=bucket, Key=key, UploadId=mpu_id, PartNumber=i)
    Body=data, Bucket=self.bucket, Key=self.key, UploadId=mpu_id, PartNumber=i)
    parts.append({"PartNumber": i, "ETag": part["ETag"]})
    uploaded_bytes += len(data)
    print("{0} of {1} uploaded ({2:.3f}%)".format(
    uploaded_bytes, total_bytes,
    as_percent(uploaded_bytes, total_bytes)))
    uploaded_bytes, self.total_bytes,
    as_percent(uploaded_bytes, self.total_bytes)))
    i += 1
    return parts

    def complete(self, mpu_id, parts):
    result = self.s3.complete_multipart_upload(
    Bucket=bucket,
    Key=key,
    Bucket=self.bucket,
    Key=self.key,
    UploadId=mpu_id,
    MultipartUpload={"Parts": parts})
    return result


    # Helper
    def as_percent(self, num, denom):
    def as_percent(num, denom):
    return float(num) / float(denom) * 100.0


    def parse_args():
    parser = argparse.ArgumentParser(description='Multipart upload')
    parser.add_argument('--bucket', required=True)
    parser.add_argument('--key', required=True)
    parser.add_argument('--path', required=True)
    parser.add_argument('--region', default="eu-west-1")
    parser.add_argument('--profile', default=None)
    return parser.parse_args()


    def main():
    args = parse_args()
    mpu = S3MultipartUpload(
    "bucket",
    "key",
    "path/to/file",
    region_name="eu-west-1")
    args.bucket,
    args.key,
    args.path,
    profile_name=args.profile,
    region_name=args.region)
    # abort all multipart uploads for this bucket (optional, for starting over)
    # mpu.abort_all()
    mpu.abort_all()
    # create new multipart upload
    mpu_id = mpu.create()
    # upload parts
  2. teasherm revised this gist Mar 1, 2017. No changes.
  3. teasherm created this gist Mar 1, 2017.
    95 changes: 95 additions & 0 deletions s3_multipart_upload.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,95 @@
    import os

    import boto3


    class S3MultipartUpload(object):
    # AWS throws EntityTooSmall error for parts smaller than 5 MB
    PART_MINIMUM = int(5e6)

    def __init__(self,
    bucket,
    key,
    local_path,
    part_size=int(10e6),
    region_name="us-east-1",
    verbose=False):
    self.bucket = bucket
    self.key = key
    self.path = local_path
    self.total_bytes = os.stat(local_path).st_size
    self.part_bytes = part_size
    assert part_size > self.PART_MINIMUM
    assert (self.total_bytes % part_size == 0
    or self.total_bytes % part_size > self.PART_MINIMUM)
    self.s3 = boto3.client("s3", region_name=region_name)
    if verbose:
    boto3.set_stream_logger(name="botocore")

    def abort_all(self):
    mpus = self.s3.list_multipart_uploads(Bucket=self.bucket)
    aborted = []
    if "Uploads" in mpus:
    for u in mpus["Uploads"]:
    upload_id = u["UploadId"]
    aborted.append(
    self.s3.abort_multipart_upload(
    Bucket=self.bucket, Key=self.key, UploadId=upload_id))
    return aborted

    def create(self):
    mpu = self.s3.create_multipart_upload(Bucket=self.bucket, Key=self.key)
    mpu_id = mpu["UploadId"]
    return mpu_id

    def upload(self, mpu_id):
    parts = []
    uploaded_bytes = 0
    with open(path, "rb") as f:
    i = 1
    while True:
    data = f.read(part_bytes)
    if not len(data):
    break
    part = self.s3.upload_part(
    Body=data, Bucket=bucket, Key=key, UploadId=mpu_id, PartNumber=i)
    parts.append({"PartNumber": i, "ETag": part["ETag"]})
    uploaded_bytes += len(data)
    print("{0} of {1} uploaded ({2:.3f}%)".format(
    uploaded_bytes, total_bytes,
    as_percent(uploaded_bytes, total_bytes)))
    i += 1
    return parts

    def complete(self, mpu_id, parts):
    result = self.s3.complete_multipart_upload(
    Bucket=bucket,
    Key=key,
    UploadId=mpu_id,
    MultipartUpload={"Parts": parts})
    return result


    # Helper
    def as_percent(self, num, denom):
    return float(num) / float(denom) * 100.0


    def main():
    mpu = S3MultipartUpload(
    "bucket",
    "key",
    "path/to/file",
    region_name="eu-west-1")
    # abort all multipart uploads for this bucket (optional, for starting over)
    # mpu.abort_all()
    # create new multipart upload
    mpu_id = mpu.create()
    # upload parts
    parts = mpu.upload(mpu_id)
    # complete multipart upload
    print(mpu.complete(mpu_id, parts))


    if __name__ == "__main__":
    main()