Created
February 14, 2024 14:50
-
-
Save sergpolly/76566fd8d559ffad5c5f1c19c5660a1a to your computer and use it in GitHub Desktop.
helper functions for snipping module in cooltools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def triu(arr, value=np.nan, k=0, copy=False): | |
| """ | |
| Upper triangle of an array. | |
| Modifies input array, such that its elements below the k-th diagonal | |
| are filled with value (returns modified copy, when requested instead). | |
| Assumes that the input array is 2D. | |
| Parameters | |
| ---------- | |
| arr : array_like | |
| The 2D array to be modified. | |
| value : float | |
| The value to fill with, by default nan. | |
| k : int, optional | |
| Diagonal, below which to fill, by default 0. | |
| copy : bool, optional | |
| If True, a copy of arr is returned, False - modified in place. | |
| Returns | |
| ------- | |
| ndarray | |
| The filled array. | |
| Notes | |
| ----- | |
| Similar to numpy.triu, but allows for a custom value to be filled. | |
| This solution was borrowed from | |
| https://stackoverflow.com/questions/33029129/make-a-numpy-upper-triangular-matrix-padded-with-nan-instead-of-zero | |
| It is quicker than the naive one based on numpy.tril_indices. | |
| """ | |
| if copy: | |
| arr = arr.copy() | |
| m, n = arr.shape | |
| _tril_mask = np.arange(m)[:,None] > np.arange(n) - k | |
| arr[_tril_mask] = value | |
| return arr | |
| from scipy.sparse import issparse | |
| def _snip(matrix, offset, bad_bins_masks, min_diag, bbox): | |
| """ | |
| Extract snippet from the matrix | |
| Parameters | |
| ---------- | |
| matrix : matrix-like | |
| 2D matrix from which bbox will be extracted. | |
| Supports dense, sparse and lazy subscriptable interfaces | |
| offset : (int, int) | |
| 2D offset of the matrix: (offset1, offset2) | |
| bad_bins : ( ndarray[bool], ndarray[bool]) | |
| Bad-bins masks for the matrix. Corresponding values are | |
| filled with nans. | |
| min_diag : int | |
| Diagonal of the matrix below which values are masked with nans. | |
| It takes offset into account: offset2 - offset1 is its main diagonal | |
| bbox : (int, int, int, int) | |
| Bounding box of the snippet to be extracted: (lo1, hi1, lo2, hi2) | |
| Indices are relative to the matrix. | |
| Returns | |
| ------- | |
| np.array | |
| Requested snippet. | |
| """ | |
| lo1, hi1, lo2, hi2 = bbox | |
| offset1, offset2 = offset | |
| bad_bins1, bad_bins2 = bad_bins_masks | |
| m, n = matrix.shape | |
| dm, dn = hi1 - lo1, hi2 - lo2 | |
| # when snippet is out of bound, return empty snippet | |
| if lo1 < 0 or lo2 < 0 or hi1 > m or hi2 > n: | |
| return np.full((dm, dn), np.nan) | |
| # TODO implement proper padding instead | |
| if issparse(matrix): | |
| snippet = matrix[lo1:hi1, lo2:hi2].toarray() | |
| else: | |
| snippet = matrix[lo1:hi1, lo2:hi2].copy() | |
| # fill bad_bins with nans | |
| snippet[bad_bins1[lo1:hi1], :] = np.nan | |
| snippet[:, bad_bins2[lo2:hi2]] = np.nan | |
| # mask tril diagonals of the snippet when requested: | |
| if min_diag is not None: | |
| # low-left diagonal of the snip | |
| lowleft_diag_snip = (lo2 + offset2) - (hi1 + offset1) | |
| # number of lowleft diags to mask, if there are any: | |
| if (num_tril_diags := min_diag - lowleft_diag_snip) > 0: | |
| diag_below_which_nan = num_tril_diags - dm | |
| snippet = triu( snippet, k=diag_below_which_nan ) | |
| return snippet | |
| def array_to_csr_data(arr, sparse_arr): | |
| """ | |
| Extract elements of an array corresponding to non-zero (nnz) elements | |
| of a reference sparse array sparse_arr. | |
| arr has to have the same shape as sparse_arr. It can be dense | |
| or any other 2D subscriptable object, e.g. LazyToeplitz. | |
| Parameters | |
| ---------- | |
| arr : array-like | |
| 2D array/matrix oe subscriptable object | |
| sparse_arr : scipy.sparse.csr_matrix | |
| CSR matrix to derive nnz indices from | |
| Returns | |
| ------- | |
| arr_data : ndarray | |
| array of the same length as sparse_arr.data | |
| Notes | |
| ----- | |
| Such a row-by-row extraction from arr seems like a good compromise | |
| for performance/memory usage. Alternatives include fully dense version, | |
| and using COO | |
| """ | |
| # assuming arr.shape == sparse_arr.shape | |
| num_rows = sparse_arr.shape[0] | |
| arr_data = np.empty_like(sparse_arr.data) | |
| # extract arr values matching sparse_arr nnz values, row by row | |
| for i in range(num_rows): | |
| lo, hi = sparse_arr.indptr[i], sparse_arr.indptr[i+1] | |
| # determine col indices, and append if there are any | |
| js = sparse_arr.indices[lo:hi] | |
| # arr @ i-th row, js cols | |
| arr_data[lo:hi] = arr[i].squeeze()[js] | |
| return arr_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
here is how they can be tested:
triu - should behave like numpy.triu, so it is tested against it:
_snipshould be tested extensively as it is a core of the snipping extraction: