Skip to content

Instantly share code, notes, and snippets.

@jaseg
Created January 21, 2016 19:14
Show Gist options
  • Select an option

  • Save jaseg/ea14381e7a301dbb12b0 to your computer and use it in GitHub Desktop.

Select an option

Save jaseg/ea14381e7a301dbb12b0 to your computer and use it in GitHub Desktop.

Revisions

  1. jaseg created this gist Jan 21, 2016.
    31 changes: 31 additions & 0 deletions tabular_deltastore.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,31 @@
    import struct

    def deltaify(list_of_tuples):
    return list_of_tuples[0], [ tuple( b-a for a,b in zip(tup1, tup2) ) for tup1, tup2 in zip(list_of_tuples[:-1], list_of_tuples[1:]) ]

    smallest_format = lambda num: 'b' if num < 2**7 else 'h' if num < 2**15 else 'i' if num < 2**31 else 'q'
    make_format = lambda es: ''.join(smallest_format(e) for e in es)

    def pack(list_of_tuples):
    offx, deltas = deltaify(list_of_tuples)
    of, df = make_format(offx), make_format([max(ds) for ds in zip(*deltas)])
    return bytes((len(of),)) +\
    (of + df).encode() +\
    struct.pack(of, *offx) +\
    b''.join( struct.pack('!'+df, *tup) for tup in deltas )

    def undeltaify(offx, deltas):
    st, rv = offx, [offx]
    for delta in deltas:
    st = tuple( a+b for a,b in zip(st, delta) )
    rv.append(st)
    return rv

    def unpack(bytestring):
    # data format: [element count] {means format} {data format} {means} {data}
    n, bytestring = bytestring[0], bytestring[1:]
    of, df, bytestring = bytestring[:n], bytestring[n:2*n], bytestring[2*n:]
    offx = struct.unpack_from(of.decode(), bytestring)
    deltas = list(struct.iter_unpack('!'+df.decode(), bytestring[ struct.calcsize(of): ]))
    return undeltaify(offx, deltas)