-
-
Save veselosky/9427faa38cee75cd8e27 to your computer and use it in GitHub Desktop.
| # vim: set fileencoding=utf-8 : | |
| # | |
| # How to store and retrieve gzip-compressed objects in AWS S3 | |
| ########################################################################### | |
| # | |
| # Copyright 2015 Vince Veselosky and contributors | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| from __future__ import absolute_import, print_function, unicode_literals | |
| from io import BytesIO | |
| from gzip import GzipFile | |
| import boto3 | |
| s3 = boto3.client('s3') | |
| bucket = 'bluebucket.mindvessel.net' | |
| # Read in some example text, as unicode | |
| with open("utext.txt") as fi: | |
| text_body = fi.read().decode("utf-8") | |
| # A GzipFile must wrap a real file or a file-like object. We do not want to | |
| # write to disk, so we use a BytesIO as a buffer. | |
| gz_body = BytesIO() | |
| gz = GzipFile(None, 'wb', 9, gz_body) | |
| gz.write(text_body.encode('utf-8')) # convert unicode strings to bytes! | |
| gz.close() | |
| # GzipFile has written the compressed bytes into our gz_body | |
| s3.put_object( | |
| Bucket=bucket, | |
| Key='gztest.txt', # Note: NO .gz extension! | |
| ContentType='text/plain', # the original type | |
| ContentEncoding='gzip', # MUST have or browsers will error | |
| Body=gz_body.getvalue() | |
| ) | |
| retr = s3.get_object(Bucket=bucket, Key='gztest.txt') | |
| # Now the fun part. Reading it back requires this little dance, because | |
| # GzipFile insists that its underlying file-like thing implement tell and | |
| # seek, but boto3's io stream does not. | |
| bytestream = BytesIO(retr['Body'].read()) | |
| got_text = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8') | |
| assert got_text == text_body |
Quite nice however it has a really big issue: I have the feeling that you need to hold the compressed file in memory before sending it. Might work some something quite small but will definitely be a pain for very large files
Quite nice however it has a really big issue: I have the feeling that you need to hold the compressed file in memory before sending it. Might work some something quite small but will definitely be a pain for very large files
if you have a use case that need to handle a bigger size, I think you can update LN:50,51 to stream to file.
Thank you so much, Saved my day !
Thank you, this was very helpful after many struggles and searches
@sanjayadroll Did you ever solved it? Looks like source file has characters that could not be encoded ....
Thanks a lot !!