Created
January 8, 2021 03:27
-
-
Save peterspackman/928467e0c90f7771d20ea74d2f8f0eb4 to your computer and use it in GitHub Desktop.
Revisions
-
peterspackman created this gist
Jan 8, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,277 @@ #!/usr/bin/env python from __future__ import print_function import argparse from datetime import datetime, timedelta import logging import os import sqlite3 import zlib import stat import time from collections import namedtuple SqliteArchiveFile = namedtuple('SqliteArchiveFile', 'name mode mtime sz') LOG = logging.getLogger("sqlar.py") _filemode_table = ( ((stat.S_IFLNK, "l"), (stat.S_IFREG, "-"), (stat.S_IFBLK, "b"), (stat.S_IFDIR, "d"), (stat.S_IFCHR, "c"), (stat.S_IFIFO, "p")), ((stat.S_IRUSR, "r"),), ((stat.S_IWUSR, "w"),), ((stat.S_IXUSR|stat.S_ISUID, "s"), (stat.S_ISUID, "S"), (stat.S_IXUSR, "x")), ((stat.S_IRGRP, "r"),), ((stat.S_IWGRP, "w"),), ((stat.S_IXGRP|stat.S_ISGID, "s"), (stat.S_ISGID, "S"), (stat.S_IXGRP, "x")), ((stat.S_IROTH, "r"),), ((stat.S_IWOTH, "w"),), ((stat.S_IXOTH|stat.S_ISVTX, "t"), (stat.S_ISVTX, "T"), (stat.S_IXOTH, "x")) ) def filemode(mode): """Convert a file's mode to a string of the form '-rwxrwxrwx'.""" perm = [] for table in _filemode_table: for bit, char in table: if mode & bit == bit: perm.append(char) break else: perm.append("-") return "".join(perm) def readable_size(num, suffix='B'): for unit in ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'): if abs(num) < 1024.0: return "{:3.1f}{}{}".format(num, unit, suffix) num /= 1024.0 return "{:.1f}{}{}".format(num, "Yi", suffix) def readable_time(td): fmt = '{:0.2f} {}' ms = float(td.microseconds)/1000 if ms > 100: return fmt.format(td.seconds + ms/1000, "s") else: return fmt.format(ms, "ms") return result class SqliteArchive(object): _filename = None _conn = None _cursor = None _SCHEMA = """ create table if not exists sqlar( name text primary key, mode int, mtime int, sz int, data blob); """ def __init__(self, filename): self._filename = filename self._connect() self._create_table() def _connect(self): self._conn = sqlite3.connect(self._filename) self._cursor = self._conn.cursor() def _create_table(self): self._cursor.execute(self._SCHEMA) @property def filename(self): return self._filename def size(self): return self._cursor.execute( 'select sum(sz) from sqlar').fetchone()[0] def compressed_size(self): return self._cursor.execute( 'select sum(length(data)) from sqlar').fetchone()[0] def disk_size(self): return os.stat(self._filename).st_size def contains(self, filename): row = self._cursor.execute('select name, mode, mtime, sz ' 'from sqlar where name = ?', (filename,)).fetchone() file_info = None if row: file_info = SqliteArchiveFile(*row) return file_info def add(self, filename): if filename == self._filename: LOG.warn("s %s -- " "not possible to add an archive to itself", filename) return stats = os.stat(filename) f_info = self.contains(filename) if f_info and stats.st_mtime <= f_info.mtime: LOG.debug("s %s -- " "no change since last write", filename) return LOG.debug('a %s', filename) with open(filename, 'rb') as f: contents = f.read() self._cursor.execute( "insert or replace into sqlar values(?,?,?,?,?)", (filename.lstrip('./'), stats.st_mode, stats.st_mtime, stats.st_size, sqlite3.Binary(zlib.compress(contents))) ) self._conn.commit() def extract(self, filename): self._cursor.execute( "select mtime, data from sqlar where name = ?", (filename,)) mtime, data = self._cursor.fetchone() # place the file under the cwd try: os.makedirs(os.path.dirname(filename)) except OSError: pass LOG.debug("x %s", filename) with open(filename, 'wb') as f: f.write(zlib.decompress(data)) stats = os.stat(filename) # set correct mtime os.utime(filename, (stats.st_atime, mtime)) @property def files(self): for row in self._cursor.execute( "select name, mode, mtime, sz from sqlar").fetchall(): yield SqliteArchiveFile(*row) def find(self, pattern): for row in self._cursor.execute( "select name, mode, mtime, sz from sqlar where name like ?", (pattern,)).fetchall(): yield SqliteArchiveFile(*row) def contents(self, filename, decode=None): self._cursor.execute( "select data from sqlar where name = ?", (filename,)) data = self._cursor.fetchone() if data is None: return data contents = zlib.decompress(data[0]) if decode: contents = contents.decode(decode) return contents def ls(self): lines = [] for f in self.files: time = datetime.fromtimestamp(f.mtime).strftime("%b %d %H:%M").rjust(12) size = readable_size(f.sz).rjust(10) lines.append(' '.join((filemode(f.mode), size, time, f.name))) return '\n'.join(lines) def __len__(self): return self._cursor.execute('select count(*) from sqlar').fetchone()[0] def main(): times = { 'start': time.time(), } parser = argparse.ArgumentParser() parser.add_argument("ARCHIVE", type=str, help="Archive filename") parser.add_argument("FILES", nargs='*', type=str, help="File names to add to archive") parser.add_argument("-l", action='store_true', default=False, help="See the contents of the archive.") parser.add_argument("-x", action='store_true', default=False, help="Extract the contents of an archive.") parser.add_argument("-v", action='store_true', default=False, help="Enable verbose output.") parser.add_argument("-r", action='store_true', default=False, help='Report time and size information') args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.v else logging.INFO, format="%(message)s") times['s_read'] = time.time() archive = SqliteArchive(args.ARCHIVE) times['e_read'] = time.time() if args.x: if args.FILES: for f in args.FILES: archive.extract(f.lstrip('./')) else: for f in archive.files: archive.extract(f.name) elif args.l: LOG.info("%s\n%s", archive.filename, archive.ls()) else: for f in args.FILES: archive.add(f) times['done'] = time.time() if args.r: size_uncompressed = archive.size() size_compressed = archive.compressed_size() row_marker = '+' + '-' * 31 + '+' LOG.info('\nSize usage') LOG.info(row_marker) LOG.info("| Raw blobs %s |", readable_size(size_uncompressed).rjust(12)) LOG.info("| Compressed blobs %s |", readable_size(size_compressed).rjust(12)) LOG.info("| Disk size %s |", readable_size(archive.disk_size()).rjust(12)) LOG.info("| Disk size (%%) %s |", "{:>12.2f}".format(100 * float(archive.disk_size())/size_uncompressed)) LOG.info(row_marker) LOG.info('\nTime usage') LOG.info(row_marker) total = readable_time(timedelta(seconds=times['done'] - times['start'])) parse = readable_time(timedelta(seconds=times['s_read'] - times['start'])) read = readable_time(timedelta(seconds=times['e_read'] - times['s_read'])) task = readable_time(timedelta(seconds=times['done'] - times['e_read'])) per_file = (times['done'] - times['e_read'])/len(archive) per_file = readable_time(timedelta(seconds=per_file)) LOG.info("| Parse args %s |", parse.rjust(12)) LOG.info("| Read sqlite %s |", read.rjust(12)) LOG.info("| Task %s |", task.rjust(12)) LOG.info("| Per file %s |", per_file.rjust(12)) LOG.info("| Total %s |", total.rjust(12)) LOG.info(row_marker) if __name__ == '__main__': main()