Source code for pytcl.mathematical_functions.statistics.estimators

"""
Statistical estimators and descriptive statistics.

This module provides functions for computing sample statistics,
robust estimators, and related quantities used in tracking applications.
"""

from typing import Optional

import numpy as np
from numpy.typing import ArrayLike, NDArray



[docs]
def weighted_mean(
    x: ArrayLike,
    weights: ArrayLike,
    axis: Optional[int] = None,
) -> NDArray[np.floating]:
    """
    Compute weighted mean.

    Parameters
    ----------
    x : array_like
        Input data.
    weights : array_like
        Weights for each data point.
    axis : int, optional
        Axis along which to compute. Default is None (all elements).

    Returns
    -------
    mean : ndarray
        Weighted mean.

    Examples
    --------
    >>> weighted_mean([1, 2, 3], [1, 1, 2])
    2.25
    """
    x = np.asarray(x, dtype=np.float64)
    weights = np.asarray(weights, dtype=np.float64)

    return np.average(x, weights=weights, axis=axis)




[docs]
def weighted_var(
    x: ArrayLike,
    weights: ArrayLike,
    ddof: int = 0,
    axis: Optional[int] = None,
) -> NDArray[np.floating]:
    """
    Compute weighted variance.

    Parameters
    ----------
    x : array_like
        Input data.
    weights : array_like
        Weights for each data point.
    ddof : int, optional
        Delta degrees of freedom. Default is 0 (population variance).
    axis : int, optional
        Axis along which to compute.

    Returns
    -------
    var : ndarray
        Weighted variance.

    Examples
    --------
    >>> x = [1, 2, 3]
    >>> weights = [1, 1, 2]
    >>> weighted_var(x, weights)
    0.5625
    """
    x = np.asarray(x, dtype=np.float64)
    weights = np.asarray(weights, dtype=np.float64)

    mean = np.average(x, weights=weights, axis=axis, keepdims=True)
    variance = np.average((x - mean) ** 2, weights=weights, axis=axis)

    if ddof != 0:
        # Reliability weights correction
        sum_weights = np.sum(weights, axis=axis)
        sum_weights_sq = np.sum(weights**2, axis=axis)
        correction = sum_weights / (sum_weights - ddof * sum_weights_sq / sum_weights)
        variance = variance * correction

    return variance




[docs]
def weighted_cov(
    x: ArrayLike,
    weights: ArrayLike,
    ddof: int = 0,
) -> NDArray[np.floating]:
    """
    Compute weighted covariance matrix.

    Parameters
    ----------
    x : array_like
        Data matrix of shape (n_samples, n_features).
    weights : array_like
        Weights of shape (n_samples,).
    ddof : int, optional
        Delta degrees of freedom. Default is 0.

    Returns
    -------
    cov : ndarray
        Weighted covariance matrix of shape (n_features, n_features).

    Examples
    --------
    >>> x = [[1, 2], [2, 3], [3, 4]]
    >>> weights = [1, 1, 1]
    >>> cov = weighted_cov(x, weights)
    >>> cov.shape
    (2, 2)
    """
    x = np.asarray(x, dtype=np.float64)
    weights = np.asarray(weights, dtype=np.float64)

    if x.ndim == 1:
        x = x.reshape(-1, 1)

    # Normalize weights
    weights = weights / np.sum(weights)

    # Compute weighted mean
    mean = np.sum(weights[:, np.newaxis] * x, axis=0)

    # Compute weighted covariance
    centered = x - mean
    cov = (weights[:, np.newaxis] * centered).T @ centered

    if ddof != 0:
        sum_weights_sq = np.sum(weights**2)
        cov = cov / (1 - ddof * sum_weights_sq)

    return cov




[docs]
def sample_mean(
    x: ArrayLike,
    axis: Optional[int] = None,
) -> NDArray[np.floating]:
    """
    Compute sample mean.

    Parameters
    ----------
    x : array_like
        Input data.
    axis : int, optional
        Axis along which to compute.

    Returns
    -------
    mean : ndarray
        Sample mean.
    """
    return np.mean(x, axis=axis, dtype=np.float64)




[docs]
def sample_var(
    x: ArrayLike,
    ddof: int = 1,
    axis: Optional[int] = None,
) -> NDArray[np.floating]:
    """
    Compute sample variance.

    Parameters
    ----------
    x : array_like
        Input data.
    ddof : int, optional
        Delta degrees of freedom. Default is 1 (unbiased estimator).
    axis : int, optional
        Axis along which to compute.

    Returns
    -------
    var : ndarray
        Sample variance.

    Examples
    --------
    >>> x = [1, 2, 3, 4, 5]
    >>> sample_var(x)
    2.5
    """
    return np.var(x, ddof=ddof, axis=axis, dtype=np.float64)




[docs]
def sample_cov(
    x: ArrayLike,
    y: Optional[ArrayLike] = None,
    ddof: int = 1,
) -> NDArray[np.floating]:
    """
    Compute sample covariance matrix.

    Parameters
    ----------
    x : array_like
        Data matrix of shape (n_samples, n_features) or 1D array.
    y : array_like, optional
        Second variable for cross-covariance.
    ddof : int, optional
        Delta degrees of freedom. Default is 1.

    Returns
    -------
    cov : ndarray
        Covariance matrix.

    Examples
    --------
    >>> x = [[1, 2], [2, 3], [3, 4]]
    >>> cov = sample_cov(x)
    >>> cov.shape
    (2, 2)
    """
    x = np.asarray(x, dtype=np.float64)

    if y is not None:
        y = np.asarray(y, dtype=np.float64)
        return np.cov(x, y, ddof=ddof)

    if x.ndim == 1:
        return np.var(x, ddof=ddof)

    return np.cov(x.T, ddof=ddof)




[docs]
def sample_corr(x: ArrayLike) -> NDArray[np.floating]:
    """
    Compute sample correlation matrix.

    Parameters
    ----------
    x : array_like
        Data matrix of shape (n_samples, n_features).

    Returns
    -------
    corr : ndarray
        Correlation matrix of shape (n_features, n_features).

    Examples
    --------
    >>> x = [[1, 2], [2, 3], [3, 4]]
    >>> corr = sample_corr(x)
    >>> corr.shape
    (2, 2)
    >>> corr[0, 0]  # Correlation of feature 1 with itself
    1.0
    """
    return np.corrcoef(np.asarray(x, dtype=np.float64).T)




[docs]
def median(
    x: ArrayLike,
    axis: Optional[int] = None,
) -> NDArray[np.floating]:
    """
    Compute median.

    Parameters
    ----------
    x : array_like
        Input data.
    axis : int, optional
        Axis along which to compute.

    Returns
    -------
    med : ndarray
        Median value(s).

    Examples
    --------
    >>> median([1, 2, 3, 4, 5])
    3.0
    >>> median([1, 2, 3, 4])
    2.5
    """
    return np.median(x, axis=axis)




[docs]
def mad(
    x: ArrayLike,
    axis: Optional[int] = None,
    scale: float = 1.4826,
) -> NDArray[np.floating]:
    """
    Median Absolute Deviation (MAD).

    A robust measure of statistical dispersion.

    Parameters
    ----------
    x : array_like
        Input data.
    axis : int, optional
        Axis along which to compute.
    scale : float, optional
        Scale factor for consistency with standard deviation for normal
        distributions. Default is 1.4826.

    Returns
    -------
    mad : ndarray
        MAD value(s).

    Examples
    --------
    >>> mad([1, 2, 3, 4, 5])
    1.4826

    Notes
    -----
    For normally distributed data, scale * MAD approximates the
    standard deviation.
    """
    x = np.asarray(x, dtype=np.float64)
    med = np.median(x, axis=axis, keepdims=True)
    return scale * np.median(np.abs(x - med), axis=axis)




[docs]
def iqr(
    x: ArrayLike,
    axis: Optional[int] = None,
) -> NDArray[np.floating]:
    """
    Interquartile range (IQR).

    Parameters
    ----------
    x : array_like
        Input data.
    axis : int, optional
        Axis along which to compute.

    Returns
    -------
    iqr : ndarray
        Interquartile range (Q3 - Q1).

    Examples
    --------
    >>> iqr([1, 2, 3, 4, 5, 6, 7, 8, 9])
    4.5
    """
    x = np.asarray(x, dtype=np.float64)
    q75, q25 = np.percentile(x, [75, 25], axis=axis)
    return q75 - q25




[docs]
def skewness(
    x: ArrayLike,
    axis: Optional[int] = None,
    bias: bool = True,
) -> NDArray[np.floating]:
    """
    Compute sample skewness.

    Parameters
    ----------
    x : array_like
        Input data.
    axis : int, optional
        Axis along which to compute.
    bias : bool, optional
        If False, apply bias correction. Default is True.

    Returns
    -------
    skew : ndarray
        Skewness value(s).

    Examples
    --------
    >>> skewness([1, 2, 3, 4, 5])
    0.0
    """
    from scipy.stats import skew as scipy_skew

    return np.asarray(scipy_skew(x, axis=axis, bias=bias), dtype=np.float64)




[docs]
def kurtosis(
    x: ArrayLike,
    axis: Optional[int] = None,
    fisher: bool = True,
    bias: bool = True,
) -> NDArray[np.floating]:
    """
    Compute sample kurtosis.

    Parameters
    ----------
    x : array_like
        Input data.
    axis : int, optional
        Axis along which to compute.
    fisher : bool, optional
        If True, return excess kurtosis (Fisher definition).
        If False, return Pearson kurtosis. Default is True.
    bias : bool, optional
        If False, apply bias correction. Default is True.

    Returns
    -------
    kurt : ndarray
        Kurtosis value(s).

    Examples
    --------
    >>> kurtosis([1, 2, 3, 4, 5])
    -1.2
    """
    from scipy.stats import kurtosis as scipy_kurtosis

    return np.asarray(
        scipy_kurtosis(x, axis=axis, fisher=fisher, bias=bias), dtype=np.float64
    )




[docs]
def moment(
    x: ArrayLike,
    order: int,
    axis: Optional[int] = None,
    central: bool = True,
) -> NDArray[np.floating]:
    """
    Compute sample moment.

    Parameters
    ----------
    x : array_like
        Input data.
    order : int
        Order of the moment.
    axis : int, optional
        Axis along which to compute.
    central : bool, optional
        If True, compute central moment. Default is True.

    Returns
    -------
    m : ndarray
        Moment value(s).

    Examples
    --------
    >>> moment([1, 2, 3, 4, 5], order=2)
    2.0
    >>> moment([1, 2, 3, 4, 5], order=2, central=False)
    11.0
    """
    from scipy.stats import moment as scipy_moment

    if central:
        return np.asarray(scipy_moment(x, moment=order, axis=axis), dtype=np.float64)
    else:
        x = np.asarray(x, dtype=np.float64)
        return np.mean(x**order, axis=axis)




[docs]
def nees(
    error: ArrayLike,
    covariance: ArrayLike,
) -> NDArray[np.floating]:
    """
    Normalized Estimation Error Squared (NEES).

    A consistency metric for estimators. For a consistent estimator,
    NEES should be chi-squared distributed with n degrees of freedom.

    Parameters
    ----------
    error : array_like
        Estimation error vector(s) of shape (n,) or (m, n).
    covariance : array_like
        Covariance matrix of shape (n, n).

    Returns
    -------
    nees : ndarray
        NEES value(s). Scalar if error is 1D, array if error is 2D.

    Examples
    --------
    >>> error = np.array([1.0, 0.5])
    >>> cov = np.array([[1, 0], [0, 1]])
    >>> nees(error, cov)
    1.25
    """
    error = np.asarray(error, dtype=np.float64)
    covariance = np.asarray(covariance, dtype=np.float64)

    cov_inv = np.linalg.inv(covariance)

    if error.ndim == 1:
        return error @ cov_inv @ error
    else:
        return np.sum(error @ cov_inv * error, axis=1)




[docs]
def nis(
    innovation: ArrayLike,
    innovation_covariance: ArrayLike,
) -> NDArray[np.floating]:
    """
    Normalized Innovation Squared (NIS).

    A filter consistency metric based on measurement innovations.
    For a consistent filter, NIS should be chi-squared distributed.

    Parameters
    ----------
    innovation : array_like
        Innovation (measurement residual) vector(s).
    innovation_covariance : array_like
        Innovation covariance matrix.

    Returns
    -------
    nis : ndarray
        NIS value(s).

    Notes
    -----
    This is equivalent to NEES applied to innovations.
    """
    return nees(innovation, innovation_covariance)



__all__ = [
    "weighted_mean",
    "weighted_var",
    "weighted_cov",
    "sample_mean",
    "sample_var",
    "sample_cov",
    "sample_corr",
    "median",
    "mad",
    "iqr",
    "skewness",
    "kurtosis",
    "moment",
    "nees",
    "nis",
]