Source code for hexrd.convolution.convolve

# Based on code from astropy
# Licensed under a 3-clause BSD style license

import warnings

import os
import ctypes
from functools import partial

import numpy as np
from numpy.ctypeslib import ndpointer, load_library

from .utils import KernelSizeError, has_even_axis, raise_even_kernel_exception

LIBRARY_PATH = os.path.dirname(__file__)

try:
    _convolve = load_library("_convolve", LIBRARY_PATH)
except Exception:
    raise ImportError("Convolution C extension is missing. Try re-building astropy.")

# The GIL is automatically released by default when calling functions imported
# from libraries loaded by ctypes.cdll.LoadLibrary(<path>)

# Declare prototypes
# Boundary None
_convolveNd_c = _convolve.convolveNd_c
_convolveNd_c.restype = None
_convolveNd_c.argtypes = [ndpointer(ctypes.c_double, flags={"C_CONTIGUOUS", "WRITEABLE"}),  # return array
                          ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),  # input array
                          ctypes.c_uint,  # N dim
                          # size array for input and result unless
                          # embed_result_within_padded_region is False,
                          # in which case the result array is assumed to be
                          # input.shape - 2*(kernel.shape//2). Note: integer division.
                          ndpointer(ctypes.c_size_t, flags="C_CONTIGUOUS"),
                          ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),  # kernel array
                          ndpointer(ctypes.c_size_t, flags="C_CONTIGUOUS"),  # size array for kernel
                          ctypes.c_bool,  # nan_interpolate
                          ctypes.c_bool,  # embed_result_within_padded_region
                          ctypes.c_uint]  # n_threads

# Disabling all doctests in this module until a better way of handling warnings
# in doctests can be determined
__doctest_skip__ = ['*']

BOUNDARY_OPTIONS = [None, 'fill', 'wrap', 'extend']

MAX_NORMALIZATION = 100

def _copy_input_if_needed(input, dtype=float, order='C', nan_treatment=None,
                          mask=None, fill_value=None):
    # strip quantity attributes
    if hasattr(input, 'unit'):
        input = input.value
    output = input
    # Copy input
    try:
        # Anything that's masked must be turned into NaNs for the interpolation.
        # This requires copying. A copy is also needed for nan_treatment == 'fill'
        # A copy prevents possible function side-effects of the input array.
        if nan_treatment == 'fill' or np.ma.is_masked(input) or mask is not None:
            if np.ma.is_masked(input):
                # ``np.ma.maskedarray.filled()`` returns a copy, however there
                # is no way to specify the return type or order etc. In addition
                # ``np.nan`` is a ``float`` and there is no conversion to an
                # ``int`` type. Therefore, a pre-fill copy is needed for non
                # ``float`` masked arrays. ``subok=True`` is needed to retain
                # ``np.ma.maskedarray.filled()``. ``copy=False`` allows the fill
                # to act as the copy if type and order are already correct.
                output = np.array(input, dtype=dtype, copy=False, order=order, subok=True)
                output = output.filled(fill_value)
            else:
                # Since we're making a copy, we might as well use `subok=False` to save,
                # what is probably, a negligible amount of memory.
                output = np.array(input, dtype=dtype, copy=True, order=order, subok=False)

            if mask is not None:
                # mask != 0 yields a bool mask for all ints/floats/bool
                output[mask != 0] = fill_value
        else:
            # The call below is synonymous with np.asanyarray(array, ftype=float, order='C')
            # The advantage of `subok=True` is that it won't copy when array is an ndarray subclass. If it
            # is and `subok=False` (default), then it will copy even if `copy=False`. This uses less memory
            # when ndarray subclasses are passed in.
            output = np.array(input, dtype=dtype, copy=False, order=order, subok=True)
    except (TypeError, ValueError) as e:
        raise TypeError('input should be a Numpy array or something '
                        'convertible into a float array', e)
    return output


[docs]def convolve(array, kernel, boundary='fill', fill_value=0.,
             nan_treatment='interpolate', normalize_kernel=True, mask=None,
             preserve_nan=False, normalization_zero_tol=1e-8):
    """
    Convolve an array with a kernel.

    This routine differs from `scipy.ndimage.convolve` because
    it includes a special treatment for ``NaN`` values. Rather than
    including ``NaN`` values in the array in the convolution calculation, which
    causes large ``NaN`` holes in the convolved array, ``NaN`` values are
    replaced with interpolated values using the kernel as an interpolation
    function.

    Parameters
    ----------
    array : `numpy.ndarray`
        The array to convolve. This should be a 1, 2, or 3-dimensional array
        or a list or a set of nested lists representing a 1, 2, or
        3-dimensional array.
    kernel : `numpy.ndarray`
        The convolution kernel. The number of dimensions should match those for
        the array, and the dimensions should be odd in all directions.  If a
        masked array, the masked values will be replaced by ``fill_value``.
    boundary : str, optional
        A flag indicating how to handle boundaries:
            * `None`
                Set the ``result`` values to zero where the kernel
                extends beyond the edge of the array.
            * 'fill'
                Set values outside the array boundary to ``fill_value`` (default).
            * 'wrap'
                Periodic boundary that wrap to the other side of ``array``.
            * 'extend'
                Set values outside the array to the nearest ``array``
                value.
    fill_value : float, optional
        The value to use outside the array when using ``boundary='fill'``
    normalize_kernel : bool, optional
        Whether to normalize the kernel to have a sum of one.
    nan_treatment : {'interpolate', 'fill'}
        interpolate will result in renormalization of the kernel at each
        position ignoring (pixels that are NaN in the image) in both the image
        and the kernel.
        'fill' will replace the NaN pixels with a fixed numerical value (default
        zero, see ``fill_value``) prior to convolution
        Note that if the kernel has a sum equal to zero, NaN interpolation
        is not possible and will raise an exception.
    preserve_nan : bool
        After performing convolution, should pixels that were originally NaN
        again become NaN?
    mask : `None` or `numpy.ndarray`
        A "mask" array.  Shape must match ``array``, and anything that is masked
        (i.e., not 0/`False`) will be set to NaN for the convolution.  If
        `None`, no masking will be performed unless ``array`` is a masked array.
        If ``mask`` is not `None` *and* ``array`` is a masked array, a pixel is
        masked of it is masked in either ``mask`` *or* ``array.mask``.
    normalization_zero_tol: float, optional
        The absolute tolerance on whether the kernel is different than zero.
        If the kernel sums to zero to within this precision, it cannot be
        normalized. Default is "1e-8".

    Returns
    -------
    result : `numpy.ndarray`
        An array with the same dimensions and as the input array,
        convolved with kernel.  The data type depends on the input
        array type.  If array is a floating point type, then the
        return array keeps the same data type, otherwise the type
        is ``numpy.float``.

    Notes
    -----
    For masked arrays, masked values are treated as NaNs.  The convolution
    is always done at ``numpy.float`` precision.
    """

    if boundary not in BOUNDARY_OPTIONS:
        raise ValueError("Invalid boundary option: must be one of {}"
                         .format(BOUNDARY_OPTIONS))

    if nan_treatment not in ('interpolate', 'fill'):
        raise ValueError("nan_treatment must be one of 'interpolate','fill'")

    # OpenMP support is disabled at the C src code level, changing this will have
    # no effect.
    n_threads = 1

    # Keep refs to originals
    passed_kernel = kernel
    passed_array = array

    # The C routines all need float type inputs (so, a particular
    # bit size, endianness, etc.).  So we have to convert, which also
    # has the effect of making copies so we don't modify the inputs.
    # After this, the variables we work with will be array_internal, and
    # kernel_internal.  However -- we do want to keep track of what type
    # the input array was so we can cast the result to that at the end
    # if it's a floating point type.  Don't bother with this for lists --
    # just always push those as float.
    # It is always necessary to make a copy of kernel (since it is modified),
    # but, if we just so happen to be lucky enough to have the input array
    # have exactly the desired type, we just alias to array_internal
    # Convert kernel to ndarray if not already

    # Copy or alias array to array_internal
    array_internal = _copy_input_if_needed(passed_array, dtype=float, order='C',
                                           nan_treatment=nan_treatment, mask=mask,
                                           fill_value=np.nan)
    array_dtype = getattr(passed_array, 'dtype', array_internal.dtype)
    # Copy or alias kernel to kernel_internal
    kernel_internal = _copy_input_if_needed(passed_kernel, dtype=float, order='C',
                                            nan_treatment=None, mask=None,
                                            fill_value=fill_value)

    # Make sure kernel has all odd axes
    if has_even_axis(kernel_internal):
        raise_even_kernel_exception()

    # -----------------------------------------------------------------------
    # From this point onwards refer only to ``array_internal`` and
    # ``kernel_internal``.
    # Assume both are base np.ndarrays and NOT subclasses e.g. NOT
    # ``Kernel`` nor ``np.ma.maskedarray`` classes.
    # -----------------------------------------------------------------------

    # Check dimensionality
    if array_internal.ndim == 0:
        raise Exception("cannot convolve 0-dimensional arrays")
    elif array_internal.ndim > 3:
        raise NotImplementedError('convolve only supports 1, 2, and 3-dimensional '
                                  'arrays at this time')
    elif array_internal.ndim != kernel_internal.ndim:
        raise Exception('array and kernel have differing number of '
                        'dimensions.')

    array_shape = np.array(array_internal.shape)
    kernel_shape = np.array(kernel_internal.shape)
    pad_width = kernel_shape//2

    # For boundary=None only the center space is convolved. All array indices within a
    # distance kernel.shape//2 from the edge are completely ignored (zeroed).
    # E.g. (1D list) only the indices len(kernel)//2 : len(array)-len(kernel)//2
    # are convolved. It is therefore not possible to use this method to convolve an
    # array by a kernel that is larger (see note below) than the array - as ALL pixels would be ignored
    # leaving an array of only zeros.
    # Note: For even kernels the correctness condition is array_shape > kernel_shape.
    # For odd kernels it is:
    # array_shape >= kernel_shape OR array_shape > kernel_shape-1 OR array_shape > 2*(kernel_shape//2).
    # Since the latter is equal to the former two for even lengths, the latter condition is complete.
    if boundary is None and not np.all(array_shape > 2*pad_width):
        raise KernelSizeError("for boundary=None all kernel axes must be smaller than array's - "
                              "use boundary in ['fill', 'extend', 'wrap'] instead.")

    # NaN interpolation significantly slows down the C convolution
    # computation. Since nan_treatment = 'interpolate', is the default
    # check whether it is even needed, if not, don't interpolate.
    # NB: np.isnan(array_internal.sum()) is faster than np.isnan(array_internal).any()
    nan_interpolate = (nan_treatment == 'interpolate') and np.isnan(array_internal.sum())

    # Check if kernel is normalizable
    if normalize_kernel or nan_interpolate:
        kernel_sum = kernel_internal.sum()
        kernel_sums_to_zero = np.isclose(kernel_sum, 0, atol=normalization_zero_tol)

        if kernel_sum < 1. / MAX_NORMALIZATION or kernel_sums_to_zero:
            raise ValueError("The kernel can't be normalized, because its sum is "
                             "close to zero. The sum of the given kernel is < {}"
                             .format(1. / MAX_NORMALIZATION))

    # Mark the NaN values so we can replace them later if interpolate_nan is
    # not set
    if preserve_nan or nan_treatment == 'fill':
        initially_nan = np.isnan(array_internal)
        if nan_treatment == 'fill':
            array_internal[initially_nan] = fill_value

    # Avoid any memory allocation within the C code. Allocate output array
    # here and pass through instead.
    result = np.zeros(array_internal.shape, dtype=float, order='C')

    embed_result_within_padded_region = True
    array_to_convolve = array_internal
    if boundary in ('fill', 'extend', 'wrap'):
        embed_result_within_padded_region = False
        if boundary == 'fill':
            # This method is faster than using numpy.pad(..., mode='constant')
            array_to_convolve = np.full(array_shape + 2*pad_width, fill_value=fill_value, dtype=float, order='C')
            # Use bounds [pad_width[0]:array_shape[0]+pad_width[0]] instead of [pad_width[0]:-pad_width[0]]
            # to account for when the kernel has size of 1 making pad_width = 0.
            if array_internal.ndim == 1:
                array_to_convolve[pad_width[0]:array_shape[0]+pad_width[0]] = array_internal
            elif array_internal.ndim == 2:
                array_to_convolve[pad_width[0]:array_shape[0]+pad_width[0],
                                  pad_width[1]:array_shape[1]+pad_width[1]] = array_internal
            else:
                array_to_convolve[pad_width[0]:array_shape[0]+pad_width[0],
                                  pad_width[1]:array_shape[1]+pad_width[1],
                                  pad_width[2]:array_shape[2]+pad_width[2]] = array_internal
        else:
            np_pad_mode_dict = {'fill': 'constant', 'extend': 'edge', 'wrap': 'wrap'}
            np_pad_mode = np_pad_mode_dict[boundary]
            pad_width = kernel_shape // 2

            if array_internal.ndim == 1:
                np_pad_width = (pad_width[0],)
            elif array_internal.ndim == 2:
                np_pad_width = ((pad_width[0],), (pad_width[1],))
            else:
                np_pad_width = ((pad_width[0],), (pad_width[1],), (pad_width[2],))

            array_to_convolve = np.pad(array_internal, pad_width=np_pad_width,
                                       mode=np_pad_mode)

    _convolveNd_c(result, array_to_convolve,
                  array_to_convolve.ndim,
                  np.array(array_to_convolve.shape, dtype=ctypes.c_size_t, order='C'),
                  kernel_internal,
                  np.array(kernel_shape, dtype=ctypes.c_size_t, order='C'),
                  nan_interpolate, embed_result_within_padded_region,
                  n_threads)

    # So far, normalization has only occured for nan_treatment == 'interpolate'
    # because this had to happen within the C extension so as to ignore
    # any NaNs
    if normalize_kernel:
        if not nan_interpolate:
            result /= kernel_sum
    elif nan_interpolate:
        result *= kernel_sum

    if nan_interpolate and not preserve_nan and np.isnan(result.sum()):
        warnings.warn("nan_treatment='interpolate', however, NaN values detected "
                      "post convolution. A contiguous region of NaN values, larger "
                      "than the kernel size, are present in the input array. "
                      "Increase the kernel size to avoid this.")

    if preserve_nan:
        result[initially_nan] = np.nan

    # Convert result to original data type
    if array_dtype.kind == 'f':
        # Try to preserve the input type if it's a floating point type
        # Avoid making another copy if possible
        try:
            return result.astype(array_dtype, copy=False)
        except TypeError:
            return result.astype(array_dtype)
    else:
        return result