Created
July 29, 2016 09:49
-
-
Save bancek/7fe5fe0476e81495f99633cf91b6ee29 to your computer and use it in GitHub Desktop.
Revisions
-
bancek created this gist
Jul 29, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,143 @@ import struct import zipfile import time import os from binascii import crc32 def commonprefix(m): "Given a list of pathnames, returns the longest common leading component" if not m: return '' s1 = min(m) s2 = max(m) for i, c in enumerate(s1): if c != s2[i]: return s1[:i] return s1 def relpath(path, start=os.path.curdir): """Return a relative version of a path""" if not path: raise ValueError("no path specified") start_list = [x for x in os.path.abspath(start).split(os.path.sep) if x] path_list = [x for x in os.path.abspath(path).split(os.path.sep) if x] i = len(commonprefix([start_list, path_list])) rel_list = [os.path.pardir] * (len(start_list)-i) + path_list[i:] if not rel_list: return os.path.curdir return os.path.join(*rel_list) class ZipBuffer(object): """ A file-like object for zipfile.ZipFile to write into. """ def __init__(self): self.data = [] self.pos = 0 def write(self, data): self.data.append(data) self.pos += len(data) def tell(self): # zipfile calls this so we need it return self.pos def flush(self): # zipfile calls this so we need it pass def get_and_clear(self): result = self.data self.data = [] return result class XZipFile(zipfile.ZipFile): def write_streaming(self, zinfo_or_arcname, filename, compress_type=None): """Write a file into the archive. The contents is the string 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or the name of the file in the archive.""" if not isinstance(zinfo_or_arcname, zipfile.ZipInfo): zinfo = zipfile.ZipInfo( filename=zinfo_or_arcname, date_time=time.localtime(time.time())[:6]) zinfo.compress_type = self.compression zinfo.flag_bits = 0x08 # magic zinfo.external_attr = 0600 << 16 else: zinfo = zinfo_or_arcname if not self.fp: raise RuntimeError( "Attempt to write to ZIP archive that was already closed") if compress_type is not None: raise RuntimeError("Compression not supported!") zinfo.CRC = CRC = 0 zinfo.compress_size = 0 zinfo.file_size = file_size = 0 zinfo.header_offset = self.fp.tell() # Start of header bytes self._writecheck(zinfo) self._didModify = True self.fp.write(zinfo.FileHeader()) with open(filename, 'rb') as fp: while 1: buf = fp.read(1024 * 8) if not buf: break file_size = file_size + len(buf) CRC = crc32(buf, CRC) & 0xffffffff self.fp.write(buf) yield None zinfo.CRC = CRC zinfo.file_size = file_size zinfo.compress_size = file_size if zinfo.flag_bits & 0x08: # Write CRC and file sizes after the file data self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, zinfo.file_size)) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo class FileGenerator(object): def __init__(self, src_dir, passes_filters): self.name = os.path.basename(src_dir.rstrip('/')) + 'zip' self.generator = _stream_folder(src_dir, passes_filters) def read(self, n): try: x = self.generator.next() while not len(x): x = self.generator.next() return x except StopIteration: return '' def stream_folder(src_dir, passes_filters): return FileGenerator(src_dir, passes_filters) def _stream_folder(src_dir, passes_filters): sink = ZipBuffer() archive = XZipFile(sink, mode='w', compression=zipfile.ZIP_STORED, allowZip64=True) for root, _, files in os.walk(unicode(src_dir)): for f in files: path = os.path.join(root, f) if passes_filters(path): rel_path = relpath(path, src_dir).replace(os.path.sep, '/') for _ in archive.write_streaming(rel_path, path): for chunk in sink.get_and_clear(): yield chunk archive.close() # close() generates some more data, so we yield that too for chunk in sink.get_and_clear(): yield chunk