Skip to content

Instantly share code, notes, and snippets.

@tyong920
Forked from jcrist/benchmark.py
Created March 4, 2025 03:25
Show Gist options
  • Save tyong920/8b5112dc578eeb398de31ed6d834f401 to your computer and use it in GitHub Desktop.
Save tyong920/8b5112dc578eeb398de31ed6d834f401 to your computer and use it in GitHub Desktop.
Benchmark of msgspec, orjson, pydantic, ... taken from Python discord
# This is a modified version of `orig_benchmark.py`, using different data to
# highlight performance differences.
import json
import random
import string
import timeit
from statistics import mean, stdev
import orjson
import simdjson
import msgspec
import pydantic
def describe_json(buf: bytes) -> None:
"""Describe the type of values found in a JSON message"""
json_types = [
("objects", dict),
("arrays", list),
("strs", str),
("ints", int),
("floats", float),
("bools", bool),
("nulls", type(None)),
]
counts = dict.fromkeys([v for _, v in json_types], 0)
def inner(obj):
typ = type(obj)
counts[typ] += 1
if typ is list:
for i in obj:
inner(i)
elif typ is dict:
for k, v in obj.items():
inner(k)
inner(v)
inner(msgspec.json.decode(buf))
total = sum(counts.values())
print("JSON Types:")
results = [(k, counts[v]) for k, v in json_types if counts[v]]
results.sort(key=lambda row: row[1], reverse=True)
for kind, count in results:
print(f"- {kind}: {count} ({count/total:.2f})")
random.seed(42)
def randstr():
return "".join(random.choices(string.printable, k=10))
class ItemStruct(msgspec.Struct):
name: str
value: int
class UserStruct(msgspec.Struct):
username: str
exp: float
level: float
items: list[ItemStruct]
class ItemPydantic(pydantic.BaseModel):
name: str
value: int
class UserPydantic(pydantic.BaseModel):
username: str
exp: float
level: float
items: list[ItemPydantic]
N = 10000
msg = msgspec.json.encode(
[
UserStruct(
randstr(),
random.random(),
random.uniform(0, 100),
[
ItemStruct(randstr(), random.randint(0, 100))
for _ in range(random.randrange(10, 20))
],
)
for _ in range(N)
]
)
BENCHMARKS = [
("stdlib json", json.loads),
("orjson", orjson.loads),
("simdjson", simdjson.loads),
("msgspec-dict", msgspec.json.decode),
("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode),
("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json),
]
describe_json(msg)
print("")
print("Benchmarks:")
results = {}
for name, fun in BENCHMARKS:
results[name] = times = timeit.repeat(
"loads(msg)",
repeat=20,
number=10,
globals={"loads": fun, "msg": msg},
)
print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}")
# This is a cleaned up version of the original benchmark. Semantically it's the
# same, just a bit easier to read. The performance numbers also match those
# seen on discord.
import json
import random
import string
import timeit
from statistics import mean, stdev
import orjson
import simdjson
import msgspec
import pydantic
def describe_json(buf: bytes) -> None:
"""Describe the type of values found in a JSON message"""
json_types = [
("objects", dict),
("arrays", list),
("strs", str),
("ints", int),
("floats", float),
("bools", bool),
("nulls", type(None)),
]
counts = dict.fromkeys([v for _, v in json_types], 0)
def inner(obj):
typ = type(obj)
counts[typ] += 1
if typ is list:
for i in obj:
inner(i)
elif typ is dict:
for k, v in obj.items():
inner(k)
inner(v)
inner(msgspec.json.decode(buf))
total = sum(counts.values())
print("JSON Types:")
results = [(k, counts[v]) for k, v in json_types if counts[v]]
results.sort(key=lambda row: row[1], reverse=True)
for kind, count in results:
print(f"- {kind}: {count} ({count/total:.2f})")
random.seed(42)
def randstr():
return "".join(random.choices(string.printable, k=10))
class UserStruct(msgspec.Struct):
username: str
exp: float
level: float
last_values: list[float]
class UserPydantic(pydantic.BaseModel):
username: str
exp: float
level: float
last_values: list[float]
N = 10000
msg = msgspec.json.encode(
[
UserStruct(
randstr(),
random.random(),
random.uniform(0, 100),
[random.uniform(0, 100) for _ in range(random.randrange(10, 20))],
)
for _ in range(N)
]
)
BENCHMARKS = [
("stdlib json", json.loads),
("orjson", orjson.loads),
("simdjson", simdjson.loads),
("msgspec-dict", msgspec.json.decode),
("msgspec-struct", msgspec.json.Decoder(list[UserStruct]).decode),
("pydantic-v2", pydantic.TypeAdapter(list[UserPydantic]).validate_json),
]
describe_json(msg)
print("")
print("Benchmarks:")
results = {}
for name, fun in BENCHMARKS:
results[name] = times = timeit.repeat(
"loads(msg)",
repeat=20,
number=10,
globals={"loads": fun, "msg": msg},
)
print(f"- {name}: {mean(times):.2f} ± {stdev(times):.2f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment