Skip to content

Instantly share code, notes, and snippets.

@peterspackman
Created January 8, 2021 03:27
Show Gist options
  • Select an option

  • Save peterspackman/928467e0c90f7771d20ea74d2f8f0eb4 to your computer and use it in GitHub Desktop.

Select an option

Save peterspackman/928467e0c90f7771d20ea74d2f8f0eb4 to your computer and use it in GitHub Desktop.

Revisions

  1. peterspackman created this gist Jan 8, 2021.
    277 changes: 277 additions & 0 deletions sqlar.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,277 @@
    #!/usr/bin/env python
    from __future__ import print_function

    import argparse
    from datetime import datetime, timedelta
    import logging
    import os
    import sqlite3
    import zlib
    import stat
    import time
    from collections import namedtuple

    SqliteArchiveFile = namedtuple('SqliteArchiveFile', 'name mode mtime sz')
    LOG = logging.getLogger("sqlar.py")

    _filemode_table = (
    ((stat.S_IFLNK, "l"),
    (stat.S_IFREG, "-"),
    (stat.S_IFBLK, "b"),
    (stat.S_IFDIR, "d"),
    (stat.S_IFCHR, "c"),
    (stat.S_IFIFO, "p")),

    ((stat.S_IRUSR, "r"),),
    ((stat.S_IWUSR, "w"),),
    ((stat.S_IXUSR|stat.S_ISUID, "s"),
    (stat.S_ISUID, "S"),
    (stat.S_IXUSR, "x")),

    ((stat.S_IRGRP, "r"),),
    ((stat.S_IWGRP, "w"),),
    ((stat.S_IXGRP|stat.S_ISGID, "s"),
    (stat.S_ISGID, "S"),
    (stat.S_IXGRP, "x")),

    ((stat.S_IROTH, "r"),),
    ((stat.S_IWOTH, "w"),),
    ((stat.S_IXOTH|stat.S_ISVTX, "t"),
    (stat.S_ISVTX, "T"),
    (stat.S_IXOTH, "x"))
    )


    def filemode(mode):
    """Convert a file's mode to a string of the form '-rwxrwxrwx'."""
    perm = []
    for table in _filemode_table:
    for bit, char in table:
    if mode & bit == bit:
    perm.append(char)
    break
    else:
    perm.append("-")
    return "".join(perm)


    def readable_size(num, suffix='B'):
    for unit in ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'):
    if abs(num) < 1024.0:
    return "{:3.1f}{}{}".format(num, unit, suffix)
    num /= 1024.0
    return "{:.1f}{}{}".format(num, "Yi", suffix)


    def readable_time(td):
    fmt = '{:0.2f} {}'
    ms = float(td.microseconds)/1000
    if ms > 100:
    return fmt.format(td.seconds + ms/1000, "s")
    else:
    return fmt.format(ms, "ms")
    return result

    class SqliteArchive(object):
    _filename = None
    _conn = None
    _cursor = None

    _SCHEMA = """
    create table if not exists sqlar(
    name text primary key,
    mode int,
    mtime int,
    sz int,
    data blob);
    """

    def __init__(self, filename):
    self._filename = filename
    self._connect()
    self._create_table()


    def _connect(self):
    self._conn = sqlite3.connect(self._filename)
    self._cursor = self._conn.cursor()

    def _create_table(self):
    self._cursor.execute(self._SCHEMA)

    @property
    def filename(self):
    return self._filename

    def size(self):
    return self._cursor.execute(
    'select sum(sz) from sqlar').fetchone()[0]

    def compressed_size(self):
    return self._cursor.execute(
    'select sum(length(data)) from sqlar').fetchone()[0]

    def disk_size(self):
    return os.stat(self._filename).st_size

    def contains(self, filename):
    row = self._cursor.execute('select name, mode, mtime, sz '
    'from sqlar where name = ?', (filename,)).fetchone()
    file_info = None
    if row:
    file_info = SqliteArchiveFile(*row)
    return file_info

    def add(self, filename):
    if filename == self._filename:
    LOG.warn("s %s -- "
    "not possible to add an archive to itself", filename)
    return
    stats = os.stat(filename)
    f_info = self.contains(filename)

    if f_info and stats.st_mtime <= f_info.mtime:
    LOG.debug("s %s -- "
    "no change since last write", filename)
    return

    LOG.debug('a %s', filename)
    with open(filename, 'rb') as f:
    contents = f.read()

    self._cursor.execute(
    "insert or replace into sqlar values(?,?,?,?,?)",
    (filename.lstrip('./'),
    stats.st_mode,
    stats.st_mtime,
    stats.st_size,
    sqlite3.Binary(zlib.compress(contents)))
    )
    self._conn.commit()

    def extract(self, filename):
    self._cursor.execute(
    "select mtime, data from sqlar where name = ?", (filename,))
    mtime, data = self._cursor.fetchone()

    # place the file under the cwd
    try:
    os.makedirs(os.path.dirname(filename))
    except OSError:
    pass

    LOG.debug("x %s", filename)
    with open(filename, 'wb') as f:
    f.write(zlib.decompress(data))

    stats = os.stat(filename)
    # set correct mtime
    os.utime(filename, (stats.st_atime, mtime))

    @property
    def files(self):
    for row in self._cursor.execute(
    "select name, mode, mtime, sz from sqlar").fetchall():
    yield SqliteArchiveFile(*row)

    def find(self, pattern):
    for row in self._cursor.execute(
    "select name, mode, mtime, sz from sqlar where name like ?",
    (pattern,)).fetchall():
    yield SqliteArchiveFile(*row)

    def contents(self, filename, decode=None):
    self._cursor.execute(
    "select data from sqlar where name = ?", (filename,))
    data = self._cursor.fetchone()
    if data is None:
    return data
    contents = zlib.decompress(data[0])
    if decode:
    contents = contents.decode(decode)
    return contents


    def ls(self):
    lines = []
    for f in self.files:
    time = datetime.fromtimestamp(f.mtime).strftime("%b %d %H:%M").rjust(12)
    size = readable_size(f.sz).rjust(10)
    lines.append(' '.join((filemode(f.mode), size, time, f.name)))
    return '\n'.join(lines)

    def __len__(self):
    return self._cursor.execute('select count(*) from sqlar').fetchone()[0]



    def main():
    times = {
    'start': time.time(),
    }
    parser = argparse.ArgumentParser()
    parser.add_argument("ARCHIVE", type=str,
    help="Archive filename")
    parser.add_argument("FILES", nargs='*', type=str,
    help="File names to add to archive")
    parser.add_argument("-l", action='store_true', default=False,
    help="See the contents of the archive.")
    parser.add_argument("-x", action='store_true', default=False,
    help="Extract the contents of an archive.")
    parser.add_argument("-v", action='store_true', default=False,
    help="Enable verbose output.")
    parser.add_argument("-r", action='store_true', default=False,
    help='Report time and size information')
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.v else logging.INFO,
    format="%(message)s")

    times['s_read'] = time.time()
    archive = SqliteArchive(args.ARCHIVE)
    times['e_read'] = time.time()
    if args.x:
    if args.FILES:
    for f in args.FILES:
    archive.extract(f.lstrip('./'))
    else:
    for f in archive.files:
    archive.extract(f.name)
    elif args.l:
    LOG.info("%s\n%s", archive.filename, archive.ls())
    else:
    for f in args.FILES:
    archive.add(f)
    times['done'] = time.time()

    if args.r:
    size_uncompressed = archive.size()
    size_compressed = archive.compressed_size()
    row_marker = '+' + '-' * 31 + '+'
    LOG.info('\nSize usage')
    LOG.info(row_marker)
    LOG.info("| Raw blobs %s |", readable_size(size_uncompressed).rjust(12))
    LOG.info("| Compressed blobs %s |", readable_size(size_compressed).rjust(12))
    LOG.info("| Disk size %s |", readable_size(archive.disk_size()).rjust(12))
    LOG.info("| Disk size (%%) %s |",
    "{:>12.2f}".format(100 * float(archive.disk_size())/size_uncompressed))
    LOG.info(row_marker)

    LOG.info('\nTime usage')
    LOG.info(row_marker)
    total = readable_time(timedelta(seconds=times['done'] - times['start']))
    parse = readable_time(timedelta(seconds=times['s_read'] - times['start']))
    read = readable_time(timedelta(seconds=times['e_read'] - times['s_read']))
    task = readable_time(timedelta(seconds=times['done'] - times['e_read']))
    per_file = (times['done'] - times['e_read'])/len(archive)
    per_file = readable_time(timedelta(seconds=per_file))
    LOG.info("| Parse args %s |", parse.rjust(12))
    LOG.info("| Read sqlite %s |", read.rjust(12))
    LOG.info("| Task %s |", task.rjust(12))
    LOG.info("| Per file %s |", per_file.rjust(12))
    LOG.info("| Total %s |", total.rjust(12))
    LOG.info(row_marker)


    if __name__ == '__main__':
    main()