-
-
Save tyong920/8b5112dc578eeb398de31ed6d834f401 to your computer and use it in GitHub Desktop.
Benchmark of msgspec, orjson, pydantic, ... taken from Python discord
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This is a modified version of `orig_benchmark.py`, using different data to | |
| # highlight performance differences. | |
| import json | |
| import random | |
| import string | |
| import timeit | |
| from statistics import mean, stdev | |
| import orjson | |
| import simdjson | |
| import msgspec | |
| import pydantic | |
| def describe_json(buf: bytes) -> None: | |
| """Describe the type of values found in a JSON message""" | |
| json_types = [ | |
| ("objects", dict), | |
| ("arrays", list), | |
| ("strs", str), | |
| ("ints", int), | |
| ("floats", float), | |
| ("bools", bool), | |
| ("nulls", type(None)), | |
| ] | |
| counts = dict.fromkeys([v for _, v in json_types], 0) | |
| def inner(obj): | |
| typ = type(obj) | |
| counts[typ] += 1 | |
| if typ is list: | |
| for i in obj: | |
| inner(i) | |
| elif typ is dict: | |
| for k, v in obj.items(): | |
| inner(k) | |
| inner(v) | |
| inner(msgspec.json.decode(buf)) | |
| total = sum(counts.values()) | |
| print("JSON Types:") | |
| results = [(k, counts[v]) for k, v in json_types if counts[v]] | |
| results.sort(key=lambda row: row[1], reverse=True) | |
| for kind, count in results: | |
| print(f"- {kind}: {count} ({count/total:.2f})") | |
| random.seed(42) | |
| def randstr(): | |
| return "".join(random.choices(string.printable, k=10)) | |
| class ItemStruct(msgspec.Struct): | |
| name: str | |
| value: int | |
| class UserStruct(msgspec.Struct): | |
| username: str | |
| exp: float | |
| level: float | |
| items: list[ItemStruct] | |
| class ItemPydantic(pydantic.BaseModel): | |
| name: str | |
| value: int | |
| class UserPydantic(pydantic.BaseModel): | |
| username: str | |
| exp: float | |
| level: float | |
| items: list[ItemPydantic] | |
| N = 10000 | |
| msg = msgspec.json.encode( | |
| [ | |
| UserStruct( | |
| randstr(), | |
| random.random(), | |
| random.uniform(0, 100), | |
| [ | |
| ItemStruct(randstr(), random.randint(0, 100)) | |
| for _ in range(random.randrange(10, 20)) | |
| ], | |
| ) | |
| for _ in range(N) | |
| ] | |
| ) | |
| BENCHMARKS = [ | |
| ("stdlib json", json.loads), | |
| ("orjson", orjson.loads), | |
| ("simdjson", simdjson.loads), | |
| ("msgspec-dict", msgspec.json.decode), | |
| ("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode), | |
| ("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json), | |
| ] | |
| describe_json(msg) | |
| print("") | |
| print("Benchmarks:") | |
| results = {} | |
| for name, fun in BENCHMARKS: | |
| results[name] = times = timeit.repeat( | |
| "loads(msg)", | |
| repeat=20, | |
| number=10, | |
| globals={"loads": fun, "msg": msg}, | |
| ) | |
| print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This is a cleaned up version of the original benchmark. Semantically it's the | |
| # same, just a bit easier to read. The performance numbers also match those | |
| # seen on discord. | |
| import json | |
| import random | |
| import string | |
| import timeit | |
| from statistics import mean, stdev | |
| import orjson | |
| import simdjson | |
| import msgspec | |
| import pydantic | |
| def describe_json(buf: bytes) -> None: | |
| """Describe the type of values found in a JSON message""" | |
| json_types = [ | |
| ("objects", dict), | |
| ("arrays", list), | |
| ("strs", str), | |
| ("ints", int), | |
| ("floats", float), | |
| ("bools", bool), | |
| ("nulls", type(None)), | |
| ] | |
| counts = dict.fromkeys([v for _, v in json_types], 0) | |
| def inner(obj): | |
| typ = type(obj) | |
| counts[typ] += 1 | |
| if typ is list: | |
| for i in obj: | |
| inner(i) | |
| elif typ is dict: | |
| for k, v in obj.items(): | |
| inner(k) | |
| inner(v) | |
| inner(msgspec.json.decode(buf)) | |
| total = sum(counts.values()) | |
| print("JSON Types:") | |
| results = [(k, counts[v]) for k, v in json_types if counts[v]] | |
| results.sort(key=lambda row: row[1], reverse=True) | |
| for kind, count in results: | |
| print(f"- {kind}: {count} ({count/total:.2f})") | |
| random.seed(42) | |
| def randstr(): | |
| return "".join(random.choices(string.printable, k=10)) | |
| class UserStruct(msgspec.Struct): | |
| username: str | |
| exp: float | |
| level: float | |
| last_values: list[float] | |
| class UserPydantic(pydantic.BaseModel): | |
| username: str | |
| exp: float | |
| level: float | |
| last_values: list[float] | |
| N = 10000 | |
| msg = msgspec.json.encode( | |
| [ | |
| UserStruct( | |
| randstr(), | |
| random.random(), | |
| random.uniform(0, 100), | |
| [random.uniform(0, 100) for _ in range(random.randrange(10, 20))], | |
| ) | |
| for _ in range(N) | |
| ] | |
| ) | |
| BENCHMARKS = [ | |
| ("stdlib json", json.loads), | |
| ("orjson", orjson.loads), | |
| ("simdjson", simdjson.loads), | |
| ("msgspec-dict", msgspec.json.decode), | |
| ("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode), | |
| ("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json), | |
| ] | |
| describe_json(msg) | |
| print("") | |
| print("Benchmarks:") | |
| results = {} | |
| for name, fun in BENCHMARKS: | |
| results[name] = times = timeit.repeat( | |
| "loads(msg)", | |
| repeat=20, | |
| number=10, | |
| globals={"loads": fun, "msg": msg}, | |
| ) | |
| print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment