# vim: set fileencoding=utf-8 :
#
# How to store and retrieve gzip-compressed objects in AWS S3
###########################################################################
#
#   Copyright 2015 Vince Veselosky and contributors
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#
from __future__ import absolute_import, print_function, unicode_literals
from io import BytesIO
from gzip import GzipFile

import boto3
s3 = boto3.client('s3')
bucket = 'bluebucket.mindvessel.net'

# Read in some example text, as unicode
with open("utext.txt") as fi:
    text_body = fi.read().decode("utf-8")

# A GzipFile must wrap a real file or a file-like object. We do not want to
# write to disk, so we use a BytesIO as a buffer.
gz_body = BytesIO()
gz = GzipFile(None, 'wb', 9, gz_body)
gz.write(text_body.encode('utf-8'))  # convert unicode strings to bytes!
gz.close()
# GzipFile has written the compressed bytes into our gz_body
s3.put_object(
    Bucket=bucket,
    Key='gztest.txt',  # Note: NO .gz extension!
    ContentType='text/plain',  # the original type
    ContentEncoding='gzip',  # MUST have or browsers will error
    Body=gz_body.getvalue()
)
retr = s3.get_object(Bucket=bucket, Key='gztest.txt')
# Now the fun part. Reading it back requires this little dance, because
# GzipFile insists that its underlying file-like thing implement tell and
# seek, but boto3's io stream does not.
bytestream = BytesIO(retr['Body'].read())
got_text = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8')
assert got_text == text_body