Skip to content

Instantly share code, notes, and snippets.

@andrewrabert
Last active April 9, 2024 17:56
Show Gist options
  • Save andrewrabert/70ccd9ce5170571ad16e715a79244429 to your computer and use it in GitHub Desktop.
Save andrewrabert/70ccd9ce5170571ad16e715a79244429 to your computer and use it in GitHub Desktop.

Revisions

  1. andrewrabert revised this gist Mar 11, 2020. 1 changed file with 24 additions and 20 deletions.
    44 changes: 24 additions & 20 deletions json_vs_msgpack.py
    Original file line number Diff line number Diff line change
    @@ -4,13 +4,14 @@
    import time

    import brotli
    import msgpack
    import tabulate
    import umsgpack
    import zstd

    DATA = {
    'a': 'aardvark',
    'map': {i: i for i in range(1000)},
    'map': {str(i): i for i in range(1000)},
    'nums': list(range(1000)),
    }

    @@ -32,27 +33,30 @@ def benchmark(to_func, from_func):
    def main():
    results = []

    results.append([
    'json', *benchmark(
    funcs = {
    'json': [
    lambda x: json.dumps(x).encode(),
    json.loads)
    ])
    results.append([
    'msgpack', *benchmark(
    json.loads,
    ],
    'msgpack': [
    msgpack.packb,
    msgpack.unpackb,
    ],
    'umsgpack': [
    umsgpack.packb,
    umsgpack.unpackb)
    ])
    for c in [brotli, gzip, zstd]:
    results.append([
    f'json-{c.__name__}', *benchmark(
    lambda d: c.compress(json.dumps(d).encode()),
    lambda d: json.loads(c.decompress(d)))
    ])
    results.append([
    f'msgpack-{c.__name__}', *benchmark(
    lambda d: c.compress(umsgpack.packb(d)),
    lambda d: umsgpack.unpackb(c.decompress(d)))
    ])
    umsgpack.unpackb,
    ]
    }

    for name, (to_func, from_func) in funcs.items():
    results.append([name, *benchmark(to_func, from_func)])

    for c in [brotli, gzip, zstd]:
    results.append([
    f'{name}-{c.__name__}', *benchmark(
    lambda d: c.compress(to_func(d)),
    lambda d: from_func(c.decompress(d)))
    ])

    results = [
    (r[0], f'{r[1]:.2f}', f'{r[2]:.2f}', r[3])
  2. andrewrabert created this gist Mar 11, 2020.
    69 changes: 69 additions & 0 deletions json_vs_msgpack.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,69 @@
    #!/usr/bin/env python3
    import json
    import gzip
    import time

    import brotli
    import tabulate
    import umsgpack
    import zstd

    DATA = {
    'a': 'aardvark',
    'map': {i: i for i in range(1000)},
    'nums': list(range(1000)),
    }

    ITERATIONS = 500


    def benchmark(to_func, from_func):
    start = time.time()
    for _ in range(ITERATIONS):
    serialized = to_func(DATA)
    to_seconds = time.time() - start
    start = time.time()
    for _ in range(ITERATIONS):
    from_func(serialized)
    from_seconds = time.time() - start
    return to_seconds, from_seconds, len(serialized)


    def main():
    results = []

    results.append([
    'json', *benchmark(
    lambda x: json.dumps(x).encode(),
    json.loads)
    ])
    results.append([
    'msgpack', *benchmark(
    umsgpack.packb,
    umsgpack.unpackb)
    ])
    for c in [brotli, gzip, zstd]:
    results.append([
    f'json-{c.__name__}', *benchmark(
    lambda d: c.compress(json.dumps(d).encode()),
    lambda d: json.loads(c.decompress(d)))
    ])
    results.append([
    f'msgpack-{c.__name__}', *benchmark(
    lambda d: c.compress(umsgpack.packb(d)),
    lambda d: umsgpack.unpackb(c.decompress(d)))
    ])

    results = [
    (r[0], f'{r[1]:.2f}', f'{r[2]:.2f}', r[3])
    for r in results
    ]
    results = sorted(results, key=lambda x: x[0])
    print(
    tabulate.tabulate(
    results,
    headers=['format', 'to_seconds', 'from_seconds', 'byte size']))


    if __name__ == '__main__':
    main()