Skip to content

Instantly share code, notes, and snippets.

@frobnitzem
Last active June 28, 2024 19:50
Show Gist options
  • Select an option

  • Save frobnitzem/6b38e562cf751938c5c1e2c67402f87c to your computer and use it in GitHub Desktop.

Select an option

Save frobnitzem/6b38e562cf751938c5c1e2c67402f87c to your computer and use it in GitHub Desktop.

Revisions

  1. frobnitzem revised this gist Jun 28, 2024. 1 changed file with 13 additions and 3 deletions.
    16 changes: 13 additions & 3 deletions serialized_tensor_sizes.md
    Original file line number Diff line number Diff line change
    @@ -6,7 +6,6 @@ import sys

    import numpy as np


    # https://huggingface.co/docs/safetensors/index
    #from safetensors.torch import save_file
    # https://huggingface.co/docs/safetensors/api/numpy
    @@ -43,7 +42,6 @@ with io.BytesIO() as f:
    print(f"torch.save: {sz}")

    import h5py

    # https://docs.h5py.org/en/stable/high/dataset.html#shuffle-filter
    #compression = "gzip"
    compression = "lzf"
    @@ -55,6 +53,14 @@ with io.BytesIO() as f:
    shuffle=True)
    sz = f.getbuffer().nbytes
    print(f"hdf5: {sz}")

    import zfpy # github.com/llnl/zfp
    # Doesn't name tensors or accept int16, but that's OK.
    # We add a header size to be fair.
    sz = len( zfpy.compress_numpy(x, write_header=True) ) \
    + len( zfpy.compress_numpy(y.astype('int32'), write_header=True) ) \
    + len(b'{"x":____,"y": ____}')
    print(f"zfpy: {sz}")
    ```

    ```shell
    @@ -67,6 +73,7 @@ safetensors: 400344
    400200
    torch.save: 401560
    hdf5: 280551
    zfpy: 195340

    % python3 sizes.py 50 50 30
    50 50 30
    @@ -77,6 +84,7 @@ safetensors: 13136
    13000
    torch.save: 14360
    hdf5: 16784
    zfpy: 8356

    % python3 sizes.py 500 500 30
    500 500 30
    @@ -87,4 +95,6 @@ safetensors: 1030144
    1030000
    torch.save: 1031384
    hdf5: 704563
    ```
    zfpy: 418020
    ```

  2. frobnitzem created this gist Jun 25, 2024.
    90 changes: 90 additions & 0 deletions serialized_tensor_sizes.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,90 @@
    ```python
    # What are the size overheads for serializing tensors?
    #
    import io
    import sys

    import numpy as np


    # https://huggingface.co/docs/safetensors/index
    #from safetensors.torch import save_file
    # https://huggingface.co/docs/safetensors/api/numpy
    from safetensors.numpy import save

    nx, ny, nz = map(int, sys.argv[1:4])
    print(nx, ny, nz)

    x2 = (np.arange(nx)*4/nx)**2
    y2 = (np.arange(ny)*5/ny)**2
    z2 = (np.arange(nz)*5/nz)**2
    A = 10000
    x = (A*np.exp(-0.5*(x2[:,None] + y2[None,:]))).astype('float32')
    y = (A*np.exp(-0.5*(x2[:,None] + z2[None,:]))).astype('int16')
    print(x.nbytes + y.nbytes)
    print(x.nbytes + (y != 0).sum()*16//8) # bytes in nonzeros

    data = {"x": x, "y": y}

    with io.BytesIO() as f:
    np.savez(f, data)
    sz = f.getbuffer().nbytes
    print(f"np.savez: {sz}")

    sz = len(save(data))
    print(f"safetensors: {sz}")

    import torch
    tdata = {"x": torch.tensor(x), "y": torch.tensor(y)}
    print(tdata["x"].nbytes+tdata["y"].nbytes)
    with io.BytesIO() as f:
    torch.save(tdata, f)
    sz = f.getbuffer().nbytes
    print(f"torch.save: {sz}")

    import h5py

    # https://docs.h5py.org/en/stable/high/dataset.html#shuffle-filter
    #compression = "gzip"
    compression = "lzf"
    with io.BytesIO() as f:
    with h5py.File(f, "w") as h5:
    for k, v in data.items():
    h5.create_dataset(k, data=v,
    compression=compression,
    shuffle=True)
    sz = f.getbuffer().nbytes
    print(f"hdf5: {sz}")
    ```

    ```shell
    % python3 sizes.py 100 1000 1
    100 1000 1
    400200
    400200
    np.savez: 400802
    safetensors: 400344
    400200
    torch.save: 401560
    hdf5: 280551

    % python3 sizes.py 50 50 30
    50 50 30
    13000
    12192
    np.savez: 13604
    safetensors: 13136
    13000
    torch.save: 14360
    hdf5: 16784

    % python3 sizes.py 500 500 30
    500 500 30
    1030000
    1021746
    np.savez: 1030607
    safetensors: 1030144
    1030000
    torch.save: 1031384
    hdf5: 704563
    ```