```python # What are the size overheads for serializing tensors? # import io import sys import numpy as np # https://huggingface.co/docs/safetensors/index #from safetensors.torch import save_file # https://huggingface.co/docs/safetensors/api/numpy from safetensors.numpy import save nx, ny, nz = map(int, sys.argv[1:4]) print(nx, ny, nz) x2 = (np.arange(nx)*4/nx)**2 y2 = (np.arange(ny)*5/ny)**2 z2 = (np.arange(nz)*5/nz)**2 A = 10000 x = (A*np.exp(-0.5*(x2[:,None] + y2[None,:]))).astype('float32') y = (A*np.exp(-0.5*(x2[:,None] + z2[None,:]))).astype('int16') print(x.nbytes + y.nbytes) print(x.nbytes + (y != 0).sum()*16//8) # bytes in nonzeros data = {"x": x, "y": y} with io.BytesIO() as f: np.savez(f, data) sz = f.getbuffer().nbytes print(f"np.savez: {sz}") sz = len(save(data)) print(f"safetensors: {sz}") import torch tdata = {"x": torch.tensor(x), "y": torch.tensor(y)} print(tdata["x"].nbytes+tdata["y"].nbytes) with io.BytesIO() as f: torch.save(tdata, f) sz = f.getbuffer().nbytes print(f"torch.save: {sz}") import h5py # https://docs.h5py.org/en/stable/high/dataset.html#shuffle-filter #compression = "gzip" compression = "lzf" with io.BytesIO() as f: with h5py.File(f, "w") as h5: for k, v in data.items(): h5.create_dataset(k, data=v, compression=compression, shuffle=True) sz = f.getbuffer().nbytes print(f"hdf5: {sz}") import zfpy # github.com/llnl/zfp # Doesn't name tensors or accept int16, but that's OK. # We add a header size to be fair. sz = len( zfpy.compress_numpy(x, write_header=True) ) \ + len( zfpy.compress_numpy(y.astype('int32'), write_header=True) ) \ + len(b'{"x":____,"y": ____}') print(f"zfpy: {sz}") ``` ```shell % python3 sizes.py 100 1000 1 100 1000 1 400200 400200 np.savez: 400802 safetensors: 400344 400200 torch.save: 401560 hdf5: 280551 zfpy: 195340 % python3 sizes.py 50 50 30 50 50 30 13000 12192 np.savez: 13604 safetensors: 13136 13000 torch.save: 14360 hdf5: 16784 zfpy: 8356 % python3 sizes.py 500 500 30 500 500 30 1030000 1021746 np.savez: 1030607 safetensors: 1030144 1030000 torch.save: 1031384 hdf5: 704563 zfpy: 418020 ```