Last active
April 9, 2024 17:56
-
-
Save andrewrabert/70ccd9ce5170571ad16e715a79244429 to your computer and use it in GitHub Desktop.
Revisions
-
andrewrabert revised this gist
Mar 11, 2020 . 1 changed file with 24 additions and 20 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,13 +4,14 @@ import time import brotli import msgpack import tabulate import umsgpack import zstd DATA = { 'a': 'aardvark', 'map': {str(i): i for i in range(1000)}, 'nums': list(range(1000)), } @@ -32,27 +33,30 @@ def benchmark(to_func, from_func): def main(): results = [] funcs = { 'json': [ lambda x: json.dumps(x).encode(), json.loads, ], 'msgpack': [ msgpack.packb, msgpack.unpackb, ], 'umsgpack': [ umsgpack.packb, umsgpack.unpackb, ] } for name, (to_func, from_func) in funcs.items(): results.append([name, *benchmark(to_func, from_func)]) for c in [brotli, gzip, zstd]: results.append([ f'{name}-{c.__name__}', *benchmark( lambda d: c.compress(to_func(d)), lambda d: from_func(c.decompress(d))) ]) results = [ (r[0], f'{r[1]:.2f}', f'{r[2]:.2f}', r[3]) -
andrewrabert created this gist
Mar 11, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,69 @@ #!/usr/bin/env python3 import json import gzip import time import brotli import tabulate import umsgpack import zstd DATA = { 'a': 'aardvark', 'map': {i: i for i in range(1000)}, 'nums': list(range(1000)), } ITERATIONS = 500 def benchmark(to_func, from_func): start = time.time() for _ in range(ITERATIONS): serialized = to_func(DATA) to_seconds = time.time() - start start = time.time() for _ in range(ITERATIONS): from_func(serialized) from_seconds = time.time() - start return to_seconds, from_seconds, len(serialized) def main(): results = [] results.append([ 'json', *benchmark( lambda x: json.dumps(x).encode(), json.loads) ]) results.append([ 'msgpack', *benchmark( umsgpack.packb, umsgpack.unpackb) ]) for c in [brotli, gzip, zstd]: results.append([ f'json-{c.__name__}', *benchmark( lambda d: c.compress(json.dumps(d).encode()), lambda d: json.loads(c.decompress(d))) ]) results.append([ f'msgpack-{c.__name__}', *benchmark( lambda d: c.compress(umsgpack.packb(d)), lambda d: umsgpack.unpackb(c.decompress(d))) ]) results = [ (r[0], f'{r[1]:.2f}', f'{r[2]:.2f}', r[3]) for r in results ] results = sorted(results, key=lambda x: x[0]) print( tabulate.tabulate( results, headers=['format', 'to_seconds', 'from_seconds', 'byte size'])) if __name__ == '__main__': main()