def triu(arr, value=np.nan, k=0, copy=False): """ Upper triangle of an array. Modifies input array, such that its elements below the k-th diagonal are filled with value (returns modified copy, when requested instead). Assumes that the input array is 2D. Parameters ---------- arr : array_like The 2D array to be modified. value : float The value to fill with, by default nan. k : int, optional Diagonal, below which to fill, by default 0. copy : bool, optional If True, a copy of arr is returned, False - modified in place. Returns ------- ndarray The filled array. Notes ----- Similar to numpy.triu, but allows for a custom value to be filled. This solution was borrowed from https://stackoverflow.com/questions/33029129/make-a-numpy-upper-triangular-matrix-padded-with-nan-instead-of-zero It is quicker than the naive one based on numpy.tril_indices. """ if copy: arr = arr.copy() m, n = arr.shape _tril_mask = np.arange(m)[:,None] > np.arange(n) - k arr[_tril_mask] = value return arr from scipy.sparse import issparse def _snip(matrix, offset, bad_bins_masks, min_diag, bbox): """ Extract snippet from the matrix Parameters ---------- matrix : matrix-like 2D matrix from which bbox will be extracted. Supports dense, sparse and lazy subscriptable interfaces offset : (int, int) 2D offset of the matrix: (offset1, offset2) bad_bins : ( ndarray[bool], ndarray[bool]) Bad-bins masks for the matrix. Corresponding values are filled with nans. min_diag : int Diagonal of the matrix below which values are masked with nans. It takes offset into account: offset2 - offset1 is its main diagonal bbox : (int, int, int, int) Bounding box of the snippet to be extracted: (lo1, hi1, lo2, hi2) Indices are relative to the matrix. Returns ------- np.array Requested snippet. """ lo1, hi1, lo2, hi2 = bbox offset1, offset2 = offset bad_bins1, bad_bins2 = bad_bins_masks m, n = matrix.shape dm, dn = hi1 - lo1, hi2 - lo2 # when snippet is out of bound, return empty snippet if lo1 < 0 or lo2 < 0 or hi1 > m or hi2 > n: return np.full((dm, dn), np.nan) # TODO implement proper padding instead if issparse(matrix): snippet = matrix[lo1:hi1, lo2:hi2].toarray() else: snippet = matrix[lo1:hi1, lo2:hi2].copy() # fill bad_bins with nans snippet[bad_bins1[lo1:hi1], :] = np.nan snippet[:, bad_bins2[lo2:hi2]] = np.nan # mask tril diagonals of the snippet when requested: if min_diag is not None: # low-left diagonal of the snip lowleft_diag_snip = (lo2 + offset2) - (hi1 + offset1) # number of lowleft diags to mask, if there are any: if (num_tril_diags := min_diag - lowleft_diag_snip) > 0: diag_below_which_nan = num_tril_diags - dm snippet = triu( snippet, k=diag_below_which_nan ) return snippet def array_to_csr_data(arr, sparse_arr): """ Extract elements of an array corresponding to non-zero (nnz) elements of a reference sparse array sparse_arr. arr has to have the same shape as sparse_arr. It can be dense or any other 2D subscriptable object, e.g. LazyToeplitz. Parameters ---------- arr : array-like 2D array/matrix oe subscriptable object sparse_arr : scipy.sparse.csr_matrix CSR matrix to derive nnz indices from Returns ------- arr_data : ndarray array of the same length as sparse_arr.data Notes ----- Such a row-by-row extraction from arr seems like a good compromise for performance/memory usage. Alternatives include fully dense version, and using COO """ # assuming arr.shape == sparse_arr.shape num_rows = sparse_arr.shape[0] arr_data = np.empty_like(sparse_arr.data) # extract arr values matching sparse_arr nnz values, row by row for i in range(num_rows): lo, hi = sparse_arr.indptr[i], sparse_arr.indptr[i+1] # determine col indices, and append if there are any js = sparse_arr.indices[lo:hi] # arr @ i-th row, js cols arr_data[lo:hi] = arr[i].squeeze()[js] return arr_data