#!/usr/bin/env python3
"""
Functions used for change detection.
cmdoret, 20200403
"""
from typing import Iterable
import scipy.sparse as sp
import numpy as np
import chromosight.utils.detection as cud
import chromosight.utils.preprocessing as cup
[docs]def reps_bg_diff(mats: Iterable[sp.spmatrix]) -> np.ndarray:
"""
Given multiple sample matrices, return a 1D array of all differences
between each sample matrix's pixels and their corresponding median
background value. All input matrices must have the same shape, nonzero
coordinates and format.
Parameters
----------
mats : Iterable of scipy.sparse.spmatrix
The list of data matrices from different samples.
Returns
-------
numpy.ndarray of floats :
The distribution of pixel differences to the median background. If
there are S matrices of P nonzero pixels, this 1D array will contain
P*S elements.
"""
med_bg = median_bg(mats)
diffs = [m.data - med_bg.data for m in mats]
diffs = np.hstack(diffs)
return diffs
[docs]def get_sse_mat(mats: Iterable[sp.spmatrix]) -> sp.spmatrix:
"""
Given multiple matrices, return the matrix of position-wise sum of
squared errors to median. All input matrices must have the same shape,
nonzero coordinates and format. Output matrix will have the same format
as inputs.
Parameters
----------
mats : Iterable of scipy.sparse.spmatrix
The list of data matrices from different samples.
Returns
-------
scipy.sparse.spmatrix :
A sparse matrix where each nonzero pixel is the sum of squared
difference of all samples to their median background, at the
corresponding nonzero pixel.
"""
bg = median_bg(mats)
sse = bg.copy()
se = np.array([(m.data - bg.data) ** 2 for m in mats])
sse.data = np.sum(se, axis=0)
return sse
[docs]def get_win_density(
mat: sp.csr_matrix, win_size: int = 3, sym_upper: bool = False
) -> sp.csr_matrix:
"""Compute local pixel density in sparse matrices using convolution.
The convolution is performed in 'full' mode: Computations are performed
all the way to the edges by trimming the kernel.
Parameters
----------
mat : scipy.sparse.csr_matrix
The input sparse matrix to convolve.
win_size : int
The size of the area in which to compute the proportion of nonzero
pixels. This will be the kernel size used in convolution.
sym_upper : bool
Whether the input matrix is symmetric upper, in which case only the
upper triangle is returned.
Returns
-------
scipy.sparse.csr_matrix:
The result of the convolution of the uniform kernel
(win_size x win_size) on the binarized input matrix.
Each value represents the proportion of nonzero pixels
in the neighbourhood.
"""
ms, ns = mat.shape
# Generate a binary matrix (pixels are either empty or full)
bin_mat = mat.copy()
if sym_upper:
bin_mat = sp.triu(bin_mat)
bin_mat.data = bin_mat.data.astype(bool)
# Adding a frame of zeros around the signal
tmp = sp.csr_matrix((win_size - 1, ns), dtype=bool)
bin_mat = sp.vstack([tmp, bin_mat, tmp], format=mat.format)
tmp = sp.csr_matrix((ms + 2 * (win_size - 1), win_size - 1), dtype=bool)
bin_mat = sp.hstack([tmp, bin_mat, tmp], format=mat.format)
# Convolve the uniform kernel with this matrix to get the proportion of nonzero
# pixels in each neighbourhood
kernel = np.ones((win_size, win_size))
win_area = win_size ** 2
density = cud.xcorr2(bin_mat, kernel / win_area)
# Compute convolution of uniform kernel with a frame of ones to get number
# of missing pixels in each window.
frame = cup.frame_missing_mask(
sp.csr_matrix(mat.shape, dtype=bool), kernel.shape, sym_upper=sym_upper
)
frame = cud.xcorr2(frame, kernel).tocoo()
# From now on, frame.data contains the number of 'present' samples.
# (where there is at least one missing pixel)
frame.data = win_area - frame.data
# Adjust the proportion for values close to the border (lower denominator)
density[frame.row, frame.col] = (
density[frame.row, frame.col].A1 * win_area / frame.data
)
# Trim the frame out from the signal
density = density[
win_size - 1 : -win_size + 1, win_size - 1 : -win_size + 1
]
if sym_upper:
density = sp.triu(density)
return density