Skip to content

Instantly share code, notes, and snippets.

@jpivarski
Last active October 28, 2024 20:12
Show Gist options
  • Save jpivarski/98d704d1ce992e2eb154e719fa688c60 to your computer and use it in GitHub Desktop.
Save jpivarski/98d704d1ce992e2eb154e719fa688c60 to your computer and use it in GitHub Desktop.

Revisions

  1. jpivarski revised this gist Oct 28, 2024. 2 changed files with 19 additions and 37 deletions.
    44 changes: 17 additions & 27 deletions draft-lazy.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,4 @@
    from typing import Callable
    from collections.abc import MutableMapping

    import awkward as ak

    @@ -11,56 +10,50 @@ def __init__(
    length: ak._nplikes.shape.ShapeItem,
    form: ak.forms.Form,
    backend: ak._backends.backend.Backend,
    cache: MutableMapping,
    ):
    self._materialize = materialize
    self._length = length
    self._form = form
    self._backend = backend
    self._cache = cache
    self._init(form._parameters, backend)

    def _get_materialized(self):
    materialized = self._cache.get(self._form.form_key, None)
    self._materialized = None

    if materialized is None:
    materialized = self._materialize()
    @property
    def materialized(self):
    if self._materialized is None:
    self._materialized = self._materialize()

    if not isinstance(self, type(materialized)):
    if not isinstance(self, type(self._materialized)):
    # note: there's probably a better way to find the direct Content subclass than __mro__[2]
    raise TypeError(
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(materialized).__name__}, rather than {type(self).__mro__[2].__name__}"
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(self._materialized).__name__}, rather than {type(self).__mro__[2].__name__}"
    )

    if materialized.length != self._length:
    if self._materialized.length != self._length:
    raise ValueError(
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {materialized.length}, rather than the expected {self._length}"
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {self._materialized.length}, rather than the expected {self._length}"
    )
    if not materialized.form.is_equal_to(
    if not self._materialized.form.is_equal_to(
    self._form, all_parameters=True, form_key=False
    ):
    eoln = "\n"
    raise ValueError(
    f"""when the lazy array with form_key {self._form.form_key!r} was materialized, it had form
    {str(materialized.form).replace(eoln, eoln + ' ')}
    {str(self._materialized.form).replace(eoln, eoln + ' ')}
    rather than the expected
    {str(self._form).replace(eoln, eoln + ' ')}"""
    )

    if materialized.backend != self._backend:
    if self._materialized.backend != self._backend:
    raise ValueError(
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {materialized.backend!r}, rather than the expected {self._backend!r}"
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {self._materialized.backend!r}, rather than the expected {self._backend!r}"
    )

    self._cache[self._form.form_key] = materialized

    return materialized

    def __del__(self):
    self._cache.pop(self._form.form_key, None)
    return self._materialized

    # every Content has length and form
    @property
    @@ -76,8 +69,7 @@ class LazyNumpyArray(LazyMixin, ak.contents.NumpyArray):
    # accessing the (private) self._data invokes materialization
    @property
    def _data(self):
    materialized = self._get_materialized()
    return materialized._data
    return self.materialized._data

    # accessing shape, inner_shape, and dtype don't invoke materialization
    @property
    @@ -97,10 +89,8 @@ class LazyListOffsetArray(LazyMixin, ak.contents.ListOffsetArray):
    # accessing the (private) self._offsets or self._content invokes materialization
    @property
    def _offsets(self):
    materialized = self._get_materialized()
    return materialized._offsets
    return self.materialized._offsets

    @property
    def _content(self):
    materialized = self._get_materialized()
    return materialized._content
    return self.materialized._content
    12 changes: 2 additions & 10 deletions test.md
    Original file line number Diff line number Diff line change
    @@ -3,9 +3,8 @@
    ... print("run")
    ... return ak.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False)
    ...
    >>> cache = {}
    >>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64", form_key="node1"), form_key="node0")
    >>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance(), cache)
    >>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64"))
    >>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance())
    >>> layout.length
    3
    >>> layout.parameters
    @@ -19,11 +18,4 @@ run
    <NumpyArray dtype='float64' len='0'>[]</NumpyArray>
    >>> layout[2]
    <NumpyArray dtype='float64' len='2'>[4.4 5.5]</NumpyArray>
    >>> cache
    {'node0': <ListOffsetArray len='3'>
    <offsets><Index dtype='int64' len='4'>
    [0 3 3 5]
    </Index></offsets>
    <content><NumpyArray dtype='float64' len='5'>[1.1 2.2 3.3 4.4 5.5]</NumpyArray></content>
    </ListOffsetArray>}
    ```
  2. jpivarski created this gist Oct 28, 2024.
    106 changes: 106 additions & 0 deletions draft-lazy.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,106 @@
    from typing import Callable
    from collections.abc import MutableMapping

    import awkward as ak


    class LazyMixin:
    def __init__(
    self,
    materialize: Callable[[], ak.contents.Content],
    length: ak._nplikes.shape.ShapeItem,
    form: ak.forms.Form,
    backend: ak._backends.backend.Backend,
    cache: MutableMapping,
    ):
    self._materialize = materialize
    self._length = length
    self._form = form
    self._backend = backend
    self._cache = cache
    self._init(form._parameters, backend)

    def _get_materialized(self):
    materialized = self._cache.get(self._form.form_key, None)

    if materialized is None:
    materialized = self._materialize()

    if not isinstance(self, type(materialized)):
    # note: there's probably a better way to find the direct Content subclass than __mro__[2]
    raise TypeError(
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(materialized).__name__}, rather than {type(self).__mro__[2].__name__}"
    )

    if materialized.length != self._length:
    raise ValueError(
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {materialized.length}, rather than the expected {self._length}"
    )
    if not materialized.form.is_equal_to(
    self._form, all_parameters=True, form_key=False
    ):
    eoln = "\n"
    raise ValueError(
    f"""when the lazy array with form_key {self._form.form_key!r} was materialized, it had form
    {str(materialized.form).replace(eoln, eoln + ' ')}
    rather than the expected
    {str(self._form).replace(eoln, eoln + ' ')}"""
    )

    if materialized.backend != self._backend:
    raise ValueError(
    f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {materialized.backend!r}, rather than the expected {self._backend!r}"
    )

    self._cache[self._form.form_key] = materialized

    return materialized

    def __del__(self):
    self._cache.pop(self._form.form_key, None)

    # every Content has length and form
    @property
    def length(self):
    return self._length

    @property
    def form(self):
    return self._form


    class LazyNumpyArray(LazyMixin, ak.contents.NumpyArray):
    # accessing the (private) self._data invokes materialization
    @property
    def _data(self):
    materialized = self._get_materialized()
    return materialized._data

    # accessing shape, inner_shape, and dtype don't invoke materialization
    @property
    def shape(self):
    return (self.length,) + self.inner_shape

    @property
    def inner_shape(self):
    return self._form.inner_shape

    @property
    def dtype(self):
    return ak.types.primitive_to_dtype(self._form.primitive)


    class LazyListOffsetArray(LazyMixin, ak.contents.ListOffsetArray):
    # accessing the (private) self._offsets or self._content invokes materialization
    @property
    def _offsets(self):
    materialized = self._get_materialized()
    return materialized._offsets

    @property
    def _content(self):
    materialized = self._get_materialized()
    return materialized._content
    29 changes: 29 additions & 0 deletions test.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    ```python
    >>> def run():
    ... print("run")
    ... return ak.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False)
    ...
    >>> cache = {}
    >>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64", form_key="node1"), form_key="node0")
    >>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance(), cache)
    >>> layout.length
    3
    >>> layout.parameters
    {}
    >>> print(layout.form.type)
    var * float64
    >>> layout[0]
    run
    <NumpyArray dtype='float64' len='3'>[1.1 2.2 3.3]</NumpyArray>
    >>> layout[1]
    <NumpyArray dtype='float64' len='0'>[]</NumpyArray>
    >>> layout[2]
    <NumpyArray dtype='float64' len='2'>[4.4 5.5]</NumpyArray>
    >>> cache
    {'node0': <ListOffsetArray len='3'>
    <offsets><Index dtype='int64' len='4'>
    [0 3 3 5]
    </Index></offsets>
    <content><NumpyArray dtype='float64' len='5'>[1.1 2.2 3.3 4.4 5.5]</NumpyArray></content>
    </ListOffsetArray>}
    ```