Last active
June 28, 2024 19:50
-
-
Save frobnitzem/6b38e562cf751938c5c1e2c67402f87c to your computer and use it in GitHub Desktop.
Revisions
-
frobnitzem revised this gist
Jun 28, 2024 . 1 changed file with 13 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,7 +6,6 @@ import sys import numpy as np # https://huggingface.co/docs/safetensors/index #from safetensors.torch import save_file # https://huggingface.co/docs/safetensors/api/numpy @@ -43,7 +42,6 @@ with io.BytesIO() as f: print(f"torch.save: {sz}") import h5py # https://docs.h5py.org/en/stable/high/dataset.html#shuffle-filter #compression = "gzip" compression = "lzf" @@ -55,6 +53,14 @@ with io.BytesIO() as f: shuffle=True) sz = f.getbuffer().nbytes print(f"hdf5: {sz}") import zfpy # github.com/llnl/zfp # Doesn't name tensors or accept int16, but that's OK. # We add a header size to be fair. sz = len( zfpy.compress_numpy(x, write_header=True) ) \ + len( zfpy.compress_numpy(y.astype('int32'), write_header=True) ) \ + len(b'{"x":____,"y": ____}') print(f"zfpy: {sz}") ``` ```shell @@ -67,6 +73,7 @@ safetensors: 400344 400200 torch.save: 401560 hdf5: 280551 zfpy: 195340 % python3 sizes.py 50 50 30 50 50 30 @@ -77,6 +84,7 @@ safetensors: 13136 13000 torch.save: 14360 hdf5: 16784 zfpy: 8356 % python3 sizes.py 500 500 30 500 500 30 @@ -87,4 +95,6 @@ safetensors: 1030144 1030000 torch.save: 1031384 hdf5: 704563 zfpy: 418020 ``` -
frobnitzem created this gist
Jun 25, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,90 @@ ```python # What are the size overheads for serializing tensors? # import io import sys import numpy as np # https://huggingface.co/docs/safetensors/index #from safetensors.torch import save_file # https://huggingface.co/docs/safetensors/api/numpy from safetensors.numpy import save nx, ny, nz = map(int, sys.argv[1:4]) print(nx, ny, nz) x2 = (np.arange(nx)*4/nx)**2 y2 = (np.arange(ny)*5/ny)**2 z2 = (np.arange(nz)*5/nz)**2 A = 10000 x = (A*np.exp(-0.5*(x2[:,None] + y2[None,:]))).astype('float32') y = (A*np.exp(-0.5*(x2[:,None] + z2[None,:]))).astype('int16') print(x.nbytes + y.nbytes) print(x.nbytes + (y != 0).sum()*16//8) # bytes in nonzeros data = {"x": x, "y": y} with io.BytesIO() as f: np.savez(f, data) sz = f.getbuffer().nbytes print(f"np.savez: {sz}") sz = len(save(data)) print(f"safetensors: {sz}") import torch tdata = {"x": torch.tensor(x), "y": torch.tensor(y)} print(tdata["x"].nbytes+tdata["y"].nbytes) with io.BytesIO() as f: torch.save(tdata, f) sz = f.getbuffer().nbytes print(f"torch.save: {sz}") import h5py # https://docs.h5py.org/en/stable/high/dataset.html#shuffle-filter #compression = "gzip" compression = "lzf" with io.BytesIO() as f: with h5py.File(f, "w") as h5: for k, v in data.items(): h5.create_dataset(k, data=v, compression=compression, shuffle=True) sz = f.getbuffer().nbytes print(f"hdf5: {sz}") ``` ```shell % python3 sizes.py 100 1000 1 100 1000 1 400200 400200 np.savez: 400802 safetensors: 400344 400200 torch.save: 401560 hdf5: 280551 % python3 sizes.py 50 50 30 50 50 30 13000 12192 np.savez: 13604 safetensors: 13136 13000 torch.save: 14360 hdf5: 16784 % python3 sizes.py 500 500 30 500 500 30 1030000 1021746 np.savez: 1030607 safetensors: 1030144 1030000 torch.save: 1031384 hdf5: 704563 ```