sergpolly · February 14, 2024 14:50 · sergpolly · Feb 14, 2024
diff --git a/snip_helpers.py b/snip_helpers.py
 def triu(arr, value=np.nan, k=0, copy=False):
    """
    Upper triangle of an array.

    Modifies input array, such that its elements below the k-th diagonal
    are filled with value (returns modified copy, when requested instead).
    Assumes that the input array is 2D.

    Parameters
    ----------
    arr : array_like
        The 2D array to be modified.
    value : float
        The value to fill with, by default nan.
    k : int, optional
        Diagonal, below which to fill, by default 0.
    copy : bool, optional
        If True, a copy of arr is returned, False - modified in place.

    Returns
    -------
    ndarray
        The filled array.

    Notes
    -----
    Similar to numpy.triu, but allows for a custom value to be filled.
    This solution was borrowed from
    https://stackoverflow.com/questions/33029129/make-a-numpy-upper-triangular-matrix-padded-with-nan-instead-of-zero
    It is quicker than the naive one based on numpy.tril_indices.
    """
    if copy:
        arr = arr.copy()
    m, n = arr.shape
    _tril_mask = np.arange(m)[:,None] > np.arange(n) - k
    arr[_tril_mask] = value
    return arr

 from scipy.sparse import issparse
 def _snip(matrix, offset, bad_bins_masks, min_diag, bbox):
    """
    Extract snippet from the matrix

    Parameters
    ----------
    matrix : matrix-like
        2D matrix from which bbox will be extracted.
        Supports dense, sparse and lazy subscriptable interfaces
    offset : (int, int)
        2D offset of the matrix: (offset1, offset2)
    bad_bins : ( ndarray[bool], ndarray[bool])
        Bad-bins masks for the matrix. Corresponding values are
        filled with nans.
    min_diag : int
        Diagonal of the matrix below which values are masked with nans.
        It takes offset into account: offset2 - offset1 is its main diagonal
    bbox : (int, int, int, int)
        Bounding box of the snippet to be extracted: (lo1, hi1, lo2, hi2)
        Indices are relative to the matrix.

    Returns
    -------
    np.array
        Requested snippet.
    """

    lo1, hi1, lo2, hi2 = bbox
    offset1, offset2 = offset
    bad_bins1, bad_bins2 = bad_bins_masks

    m, n = matrix.shape
    dm, dn = hi1 - lo1, hi2 - lo2

    # when snippet is out of bound, return empty snippet
    if lo1 < 0 or lo2 < 0 or hi1 > m or hi2 > n:
        return np.full((dm, dn), np.nan)
    # TODO implement proper padding instead

    if issparse(matrix):
        snippet = matrix[lo1:hi1, lo2:hi2].toarray()
    else:
        snippet = matrix[lo1:hi1, lo2:hi2].copy()

    # fill bad_bins with nans
    snippet[bad_bins1[lo1:hi1], :] = np.nan
    snippet[:, bad_bins2[lo2:hi2]] = np.nan

    # mask tril diagonals of the snippet when requested:
    if min_diag is not None:
        # low-left diagonal of the snip
        lowleft_diag_snip = (lo2 + offset2) - (hi1 + offset1)
        # number of lowleft diags to mask, if there are any:
        if (num_tril_diags := min_diag - lowleft_diag_snip) > 0:
            diag_below_which_nan = num_tril_diags - dm
            snippet = triu( snippet, k=diag_below_which_nan )

    return snippet


 def array_to_csr_data(arr, sparse_arr):
    """
    Extract elements of an array corresponding to non-zero (nnz) elements
    of a reference sparse array sparse_arr.

    arr has to have the same shape as sparse_arr. It can be dense
    or any other 2D subscriptable object, e.g. LazyToeplitz.

    Parameters
    ----------
    arr : array-like
        2D array/matrix oe subscriptable object
    sparse_arr : scipy.sparse.csr_matrix
        CSR matrix to derive nnz indices from
    
    Returns
    -------
    arr_data : ndarray
        array of the same length as sparse_arr.data

    Notes
    -----
    Such a row-by-row extraction from arr seems like a good compromise
    for performance/memory usage. Alternatives include fully dense version,
    and using COO
    """
    # assuming arr.shape == sparse_arr.shape
    num_rows = sparse_arr.shape[0]

    arr_data = np.empty_like(sparse_arr.data)
    # extract arr values matching sparse_arr nnz values, row by row
    for i in range(num_rows):
        lo, hi = sparse_arr.indptr[i], sparse_arr.indptr[i+1]
        # determine col indices, and append if there are any
        js = sparse_arr.indices[lo:hi]
        # arr @ i-th row, js cols
        arr_data[lo:hi] = arr[i].squeeze()[js]
    
    return arr_data
	def triu(arr, value=np.nan, k=0, copy=False):
	"""
	Upper triangle of an array.

	Modifies input array, such that its elements below the k-th diagonal
	are filled with value (returns modified copy, when requested instead).
	Assumes that the input array is 2D.

	Parameters
	----------
	arr : array_like
	The 2D array to be modified.
	value : float
	The value to fill with, by default nan.
	k : int, optional
	Diagonal, below which to fill, by default 0.
	copy : bool, optional
	If True, a copy of arr is returned, False - modified in place.

	Returns
	-------
	ndarray
	The filled array.

	Notes
	-----
	Similar to numpy.triu, but allows for a custom value to be filled.
	This solution was borrowed from
	https://stackoverflow.com/questions/33029129/make-a-numpy-upper-triangular-matrix-padded-with-nan-instead-of-zero
	It is quicker than the naive one based on numpy.tril_indices.
	"""
	if copy:
	arr = arr.copy()
	m, n = arr.shape
	_tril_mask = np.arange(m)[:,None] > np.arange(n) - k
	arr[_tril_mask] = value
	return arr

	from scipy.sparse import issparse
	def _snip(matrix, offset, bad_bins_masks, min_diag, bbox):
	"""
	Extract snippet from the matrix

	Parameters
	----------
	matrix : matrix-like
	2D matrix from which bbox will be extracted.
	Supports dense, sparse and lazy subscriptable interfaces
	offset : (int, int)
	2D offset of the matrix: (offset1, offset2)
	bad_bins : ( ndarray[bool], ndarray[bool])
	Bad-bins masks for the matrix. Corresponding values are
	filled with nans.
	min_diag : int
	Diagonal of the matrix below which values are masked with nans.
	It takes offset into account: offset2 - offset1 is its main diagonal
	bbox : (int, int, int, int)
	Bounding box of the snippet to be extracted: (lo1, hi1, lo2, hi2)
	Indices are relative to the matrix.

	Returns
	-------
	np.array
	Requested snippet.
	"""

	lo1, hi1, lo2, hi2 = bbox
	offset1, offset2 = offset
	bad_bins1, bad_bins2 = bad_bins_masks

	m, n = matrix.shape
	dm, dn = hi1 - lo1, hi2 - lo2

	# when snippet is out of bound, return empty snippet
	if lo1 < 0 or lo2 < 0 or hi1 > m or hi2 > n:
	return np.full((dm, dn), np.nan)
	# TODO implement proper padding instead

	if issparse(matrix):
	snippet = matrix[lo1:hi1, lo2:hi2].toarray()
	else:
	snippet = matrix[lo1:hi1, lo2:hi2].copy()

	# fill bad_bins with nans
	snippet[bad_bins1[lo1:hi1], :] = np.nan
	snippet[:, bad_bins2[lo2:hi2]] = np.nan

	# mask tril diagonals of the snippet when requested:
	if min_diag is not None:
	# low-left diagonal of the snip
	lowleft_diag_snip = (lo2 + offset2) - (hi1 + offset1)
	# number of lowleft diags to mask, if there are any:
	if (num_tril_diags := min_diag - lowleft_diag_snip) > 0:
	diag_below_which_nan = num_tril_diags - dm
	snippet = triu( snippet, k=diag_below_which_nan )

	return snippet


	def array_to_csr_data(arr, sparse_arr):
	"""
	Extract elements of an array corresponding to non-zero (nnz) elements
	of a reference sparse array sparse_arr.

	arr has to have the same shape as sparse_arr. It can be dense
	or any other 2D subscriptable object, e.g. LazyToeplitz.

	Parameters
	----------
	arr : array-like
	2D array/matrix oe subscriptable object
	sparse_arr : scipy.sparse.csr_matrix
	CSR matrix to derive nnz indices from

	Returns
	-------
	arr_data : ndarray
	array of the same length as sparse_arr.data

	Notes
	-----
	Such a row-by-row extraction from arr seems like a good compromise
	for performance/memory usage. Alternatives include fully dense version,
	and using COO
	"""
	# assuming arr.shape == sparse_arr.shape
	num_rows = sparse_arr.shape[0]

	arr_data = np.empty_like(sparse_arr.data)
	# extract arr values matching sparse_arr nnz values, row by row
	for i in range(num_rows):
	lo, hi = sparse_arr.indptr[i], sparse_arr.indptr[i+1]
	# determine col indices, and append if there are any
	js = sparse_arr.indices[lo:hi]
	# arr @ i-th row, js cols
	arr_data[lo:hi] = arr[i].squeeze()[js]

	return arr_data