Skip to content

Instantly share code, notes, and snippets.

@veselosky
Last active August 29, 2024 11:32
Show Gist options
  • Save veselosky/9427faa38cee75cd8e27 to your computer and use it in GitHub Desktop.
Save veselosky/9427faa38cee75cd8e27 to your computer and use it in GitHub Desktop.

Revisions

  1. veselosky revised this gist Dec 1, 2015. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion s3gzip.py
    Original file line number Diff line number Diff line change
    @@ -45,7 +45,8 @@
    )
    retr = s3.get_object(Bucket=bucket, Key='gztest.txt')
    # Now the fun part. Reading it back requires this little dance, because
    # GzipFile insists that it's underlying file-like thing implement tell and
    # GzipFile insists that its underlying file-like thing implement tell and
    # seek, but boto3's io stream does not.
    bytestream = BytesIO(retr['Body'].read())
    got_text = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8')
    assert got_text == text_body
  2. veselosky created this gist Dec 1, 2015.
    51 changes: 51 additions & 0 deletions s3gzip.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,51 @@
    # vim: set fileencoding=utf-8 :
    #
    # How to store and retrieve gzip-compressed objects in AWS S3
    ###########################################################################
    #
    # Copyright 2015 Vince Veselosky and contributors
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
    # you may not use this file except in compliance with the License.
    # You may obtain a copy of the License at
    #
    # http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    #
    from __future__ import absolute_import, print_function, unicode_literals
    from io import BytesIO
    from gzip import GzipFile

    import boto3
    s3 = boto3.client('s3')
    bucket = 'bluebucket.mindvessel.net'

    # Read in some example text, as unicode
    with open("utext.txt") as fi:
    text_body = fi.read().decode("utf-8")

    # A GzipFile must wrap a real file or a file-like object. We do not want to
    # write to disk, so we use a BytesIO as a buffer.
    gz_body = BytesIO()
    gz = GzipFile(None, 'wb', 9, gz_body)
    gz.write(text_body.encode('utf-8')) # convert unicode strings to bytes!
    gz.close()
    # GzipFile has written the compressed bytes into our gz_body
    s3.put_object(
    Bucket=bucket,
    Key='gztest.txt', # Note: NO .gz extension!
    ContentType='text/plain', # the original type
    ContentEncoding='gzip', # MUST have or browsers will error
    Body=gz_body.getvalue()
    )
    retr = s3.get_object(Bucket=bucket, Key='gztest.txt')
    # Now the fun part. Reading it back requires this little dance, because
    # GzipFile insists that it's underlying file-like thing implement tell and
    # seek, but boto3's io stream does not.
    bytestream = BytesIO(retr['Body'].read())
    got_text = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8')