Last active
October 28, 2024 20:12
-
-
Save jpivarski/98d704d1ce992e2eb154e719fa688c60 to your computer and use it in GitHub Desktop.
Revisions
-
jpivarski revised this gist
Oct 28, 2024 . 2 changed files with 19 additions and 37 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,4 @@ from typing import Callable import awkward as ak @@ -11,56 +10,50 @@ def __init__( length: ak._nplikes.shape.ShapeItem, form: ak.forms.Form, backend: ak._backends.backend.Backend, ): self._materialize = materialize self._length = length self._form = form self._backend = backend self._init(form._parameters, backend) self._materialized = None @property def materialized(self): if self._materialized is None: self._materialized = self._materialize() if not isinstance(self, type(self._materialized)): # note: there's probably a better way to find the direct Content subclass than __mro__[2] raise TypeError( f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(self._materialized).__name__}, rather than {type(self).__mro__[2].__name__}" ) if self._materialized.length != self._length: raise ValueError( f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {self._materialized.length}, rather than the expected {self._length}" ) if not self._materialized.form.is_equal_to( self._form, all_parameters=True, form_key=False ): eoln = "\n" raise ValueError( f"""when the lazy array with form_key {self._form.form_key!r} was materialized, it had form {str(self._materialized.form).replace(eoln, eoln + ' ')} rather than the expected {str(self._form).replace(eoln, eoln + ' ')}""" ) if self._materialized.backend != self._backend: raise ValueError( f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {self._materialized.backend!r}, rather than the expected {self._backend!r}" ) return self._materialized # every Content has length and form @property @@ -76,8 +69,7 @@ class LazyNumpyArray(LazyMixin, ak.contents.NumpyArray): # accessing the (private) self._data invokes materialization @property def _data(self): return self.materialized._data # accessing shape, inner_shape, and dtype don't invoke materialization @property @@ -97,10 +89,8 @@ class LazyListOffsetArray(LazyMixin, ak.contents.ListOffsetArray): # accessing the (private) self._offsets or self._content invokes materialization @property def _offsets(self): return self.materialized._offsets @property def _content(self): return self.materialized._content This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,9 +3,8 @@ ... print("run") ... return ak.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False) ... >>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64")) >>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance()) >>> layout.length 3 >>> layout.parameters @@ -19,11 +18,4 @@ run <NumpyArray dtype='float64' len='0'>[]</NumpyArray> >>> layout[2] <NumpyArray dtype='float64' len='2'>[4.4 5.5]</NumpyArray> ``` -
jpivarski created this gist
Oct 28, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,106 @@ from typing import Callable from collections.abc import MutableMapping import awkward as ak class LazyMixin: def __init__( self, materialize: Callable[[], ak.contents.Content], length: ak._nplikes.shape.ShapeItem, form: ak.forms.Form, backend: ak._backends.backend.Backend, cache: MutableMapping, ): self._materialize = materialize self._length = length self._form = form self._backend = backend self._cache = cache self._init(form._parameters, backend) def _get_materialized(self): materialized = self._cache.get(self._form.form_key, None) if materialized is None: materialized = self._materialize() if not isinstance(self, type(materialized)): # note: there's probably a better way to find the direct Content subclass than __mro__[2] raise TypeError( f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had class type {type(materialized).__name__}, rather than {type(self).__mro__[2].__name__}" ) if materialized.length != self._length: raise ValueError( f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had length {materialized.length}, rather than the expected {self._length}" ) if not materialized.form.is_equal_to( self._form, all_parameters=True, form_key=False ): eoln = "\n" raise ValueError( f"""when the lazy array with form_key {self._form.form_key!r} was materialized, it had form {str(materialized.form).replace(eoln, eoln + ' ')} rather than the expected {str(self._form).replace(eoln, eoln + ' ')}""" ) if materialized.backend != self._backend: raise ValueError( f"when the lazy array with form_key {self._form.form_key!r} was materialized, it had backend {materialized.backend!r}, rather than the expected {self._backend!r}" ) self._cache[self._form.form_key] = materialized return materialized def __del__(self): self._cache.pop(self._form.form_key, None) # every Content has length and form @property def length(self): return self._length @property def form(self): return self._form class LazyNumpyArray(LazyMixin, ak.contents.NumpyArray): # accessing the (private) self._data invokes materialization @property def _data(self): materialized = self._get_materialized() return materialized._data # accessing shape, inner_shape, and dtype don't invoke materialization @property def shape(self): return (self.length,) + self.inner_shape @property def inner_shape(self): return self._form.inner_shape @property def dtype(self): return ak.types.primitive_to_dtype(self._form.primitive) class LazyListOffsetArray(LazyMixin, ak.contents.ListOffsetArray): # accessing the (private) self._offsets or self._content invokes materialization @property def _offsets(self): materialized = self._get_materialized() return materialized._offsets @property def _content(self): materialized = self._get_materialized() return materialized._content This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,29 @@ ```python >>> def run(): ... print("run") ... return ak.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False) ... >>> cache = {} >>> form = ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64", form_key="node1"), form_key="node0") >>> layout = LazyListOffsetArray(run, 3, form, ak._backends.numpy.NumpyBackend.instance(), cache) >>> layout.length 3 >>> layout.parameters {} >>> print(layout.form.type) var * float64 >>> layout[0] run <NumpyArray dtype='float64' len='3'>[1.1 2.2 3.3]</NumpyArray> >>> layout[1] <NumpyArray dtype='float64' len='0'>[]</NumpyArray> >>> layout[2] <NumpyArray dtype='float64' len='2'>[4.4 5.5]</NumpyArray> >>> cache {'node0': <ListOffsetArray len='3'> <offsets><Index dtype='int64' len='4'> [0 3 3 5] </Index></offsets> <content><NumpyArray dtype='float64' len='5'>[1.1 2.2 3.3 4.4 5.5]</NumpyArray></content> </ListOffsetArray>} ```