"""
Statistical estimators and descriptive statistics.
This module provides functions for computing sample statistics,
robust estimators, and related quantities used in tracking applications.
"""
from typing import Optional
import numpy as np
from numpy.typing import ArrayLike, NDArray
[docs]
def weighted_mean(
x: ArrayLike,
weights: ArrayLike,
axis: Optional[int] = None,
) -> NDArray[np.floating]:
"""
Compute weighted mean.
Parameters
----------
x : array_like
Input data.
weights : array_like
Weights for each data point.
axis : int, optional
Axis along which to compute. Default is None (all elements).
Returns
-------
mean : ndarray
Weighted mean.
Examples
--------
>>> weighted_mean([1, 2, 3], [1, 1, 2])
2.25
"""
x = np.asarray(x, dtype=np.float64)
weights = np.asarray(weights, dtype=np.float64)
return np.average(x, weights=weights, axis=axis)
[docs]
def weighted_var(
x: ArrayLike,
weights: ArrayLike,
ddof: int = 0,
axis: Optional[int] = None,
) -> NDArray[np.floating]:
"""
Compute weighted variance.
Parameters
----------
x : array_like
Input data.
weights : array_like
Weights for each data point.
ddof : int, optional
Delta degrees of freedom. Default is 0 (population variance).
axis : int, optional
Axis along which to compute.
Returns
-------
var : ndarray
Weighted variance.
Examples
--------
>>> x = [1, 2, 3]
>>> weights = [1, 1, 2]
>>> weighted_var(x, weights)
0.5625
"""
x = np.asarray(x, dtype=np.float64)
weights = np.asarray(weights, dtype=np.float64)
mean = np.average(x, weights=weights, axis=axis, keepdims=True)
variance = np.average((x - mean) ** 2, weights=weights, axis=axis)
if ddof != 0:
# Reliability weights correction
sum_weights = np.sum(weights, axis=axis)
sum_weights_sq = np.sum(weights**2, axis=axis)
correction = sum_weights / (sum_weights - ddof * sum_weights_sq / sum_weights)
variance = variance * correction
return variance
[docs]
def weighted_cov(
x: ArrayLike,
weights: ArrayLike,
ddof: int = 0,
) -> NDArray[np.floating]:
"""
Compute weighted covariance matrix.
Parameters
----------
x : array_like
Data matrix of shape (n_samples, n_features).
weights : array_like
Weights of shape (n_samples,).
ddof : int, optional
Delta degrees of freedom. Default is 0.
Returns
-------
cov : ndarray
Weighted covariance matrix of shape (n_features, n_features).
Examples
--------
>>> x = [[1, 2], [2, 3], [3, 4]]
>>> weights = [1, 1, 1]
>>> cov = weighted_cov(x, weights)
>>> cov.shape
(2, 2)
"""
x = np.asarray(x, dtype=np.float64)
weights = np.asarray(weights, dtype=np.float64)
if x.ndim == 1:
x = x.reshape(-1, 1)
# Normalize weights
weights = weights / np.sum(weights)
# Compute weighted mean
mean = np.sum(weights[:, np.newaxis] * x, axis=0)
# Compute weighted covariance
centered = x - mean
cov = (weights[:, np.newaxis] * centered).T @ centered
if ddof != 0:
sum_weights_sq = np.sum(weights**2)
cov = cov / (1 - ddof * sum_weights_sq)
return cov
[docs]
def sample_mean(
x: ArrayLike,
axis: Optional[int] = None,
) -> NDArray[np.floating]:
"""
Compute sample mean.
Parameters
----------
x : array_like
Input data.
axis : int, optional
Axis along which to compute.
Returns
-------
mean : ndarray
Sample mean.
"""
return np.mean(x, axis=axis, dtype=np.float64)
[docs]
def sample_var(
x: ArrayLike,
ddof: int = 1,
axis: Optional[int] = None,
) -> NDArray[np.floating]:
"""
Compute sample variance.
Parameters
----------
x : array_like
Input data.
ddof : int, optional
Delta degrees of freedom. Default is 1 (unbiased estimator).
axis : int, optional
Axis along which to compute.
Returns
-------
var : ndarray
Sample variance.
Examples
--------
>>> x = [1, 2, 3, 4, 5]
>>> sample_var(x)
2.5
"""
return np.var(x, ddof=ddof, axis=axis, dtype=np.float64)
[docs]
def sample_cov(
x: ArrayLike,
y: Optional[ArrayLike] = None,
ddof: int = 1,
) -> NDArray[np.floating]:
"""
Compute sample covariance matrix.
Parameters
----------
x : array_like
Data matrix of shape (n_samples, n_features) or 1D array.
y : array_like, optional
Second variable for cross-covariance.
ddof : int, optional
Delta degrees of freedom. Default is 1.
Returns
-------
cov : ndarray
Covariance matrix.
Examples
--------
>>> x = [[1, 2], [2, 3], [3, 4]]
>>> cov = sample_cov(x)
>>> cov.shape
(2, 2)
"""
x = np.asarray(x, dtype=np.float64)
if y is not None:
y = np.asarray(y, dtype=np.float64)
return np.cov(x, y, ddof=ddof)
if x.ndim == 1:
return np.var(x, ddof=ddof)
return np.cov(x.T, ddof=ddof)
[docs]
def sample_corr(x: ArrayLike) -> NDArray[np.floating]:
"""
Compute sample correlation matrix.
Parameters
----------
x : array_like
Data matrix of shape (n_samples, n_features).
Returns
-------
corr : ndarray
Correlation matrix of shape (n_features, n_features).
Examples
--------
>>> x = [[1, 2], [2, 3], [3, 4]]
>>> corr = sample_corr(x)
>>> corr.shape
(2, 2)
>>> corr[0, 0] # Correlation of feature 1 with itself
1.0
"""
return np.corrcoef(np.asarray(x, dtype=np.float64).T)
[docs]
def mad(
x: ArrayLike,
axis: Optional[int] = None,
scale: float = 1.4826,
) -> NDArray[np.floating]:
"""
Median Absolute Deviation (MAD).
A robust measure of statistical dispersion.
Parameters
----------
x : array_like
Input data.
axis : int, optional
Axis along which to compute.
scale : float, optional
Scale factor for consistency with standard deviation for normal
distributions. Default is 1.4826.
Returns
-------
mad : ndarray
MAD value(s).
Examples
--------
>>> mad([1, 2, 3, 4, 5])
1.4826
Notes
-----
For normally distributed data, scale * MAD approximates the
standard deviation.
"""
x = np.asarray(x, dtype=np.float64)
med = np.median(x, axis=axis, keepdims=True)
return scale * np.median(np.abs(x - med), axis=axis)
[docs]
def iqr(
x: ArrayLike,
axis: Optional[int] = None,
) -> NDArray[np.floating]:
"""
Interquartile range (IQR).
Parameters
----------
x : array_like
Input data.
axis : int, optional
Axis along which to compute.
Returns
-------
iqr : ndarray
Interquartile range (Q3 - Q1).
Examples
--------
>>> iqr([1, 2, 3, 4, 5, 6, 7, 8, 9])
4.5
"""
x = np.asarray(x, dtype=np.float64)
q75, q25 = np.percentile(x, [75, 25], axis=axis)
return q75 - q25
[docs]
def skewness(
x: ArrayLike,
axis: Optional[int] = None,
bias: bool = True,
) -> NDArray[np.floating]:
"""
Compute sample skewness.
Parameters
----------
x : array_like
Input data.
axis : int, optional
Axis along which to compute.
bias : bool, optional
If False, apply bias correction. Default is True.
Returns
-------
skew : ndarray
Skewness value(s).
Examples
--------
>>> skewness([1, 2, 3, 4, 5])
0.0
"""
from scipy.stats import skew as scipy_skew
return np.asarray(scipy_skew(x, axis=axis, bias=bias), dtype=np.float64)
[docs]
def kurtosis(
x: ArrayLike,
axis: Optional[int] = None,
fisher: bool = True,
bias: bool = True,
) -> NDArray[np.floating]:
"""
Compute sample kurtosis.
Parameters
----------
x : array_like
Input data.
axis : int, optional
Axis along which to compute.
fisher : bool, optional
If True, return excess kurtosis (Fisher definition).
If False, return Pearson kurtosis. Default is True.
bias : bool, optional
If False, apply bias correction. Default is True.
Returns
-------
kurt : ndarray
Kurtosis value(s).
Examples
--------
>>> kurtosis([1, 2, 3, 4, 5])
-1.2
"""
from scipy.stats import kurtosis as scipy_kurtosis
return np.asarray(
scipy_kurtosis(x, axis=axis, fisher=fisher, bias=bias), dtype=np.float64
)
[docs]
def moment(
x: ArrayLike,
order: int,
axis: Optional[int] = None,
central: bool = True,
) -> NDArray[np.floating]:
"""
Compute sample moment.
Parameters
----------
x : array_like
Input data.
order : int
Order of the moment.
axis : int, optional
Axis along which to compute.
central : bool, optional
If True, compute central moment. Default is True.
Returns
-------
m : ndarray
Moment value(s).
Examples
--------
>>> moment([1, 2, 3, 4, 5], order=2)
2.0
>>> moment([1, 2, 3, 4, 5], order=2, central=False)
11.0
"""
from scipy.stats import moment as scipy_moment
if central:
return np.asarray(scipy_moment(x, moment=order, axis=axis), dtype=np.float64)
else:
x = np.asarray(x, dtype=np.float64)
return np.mean(x**order, axis=axis)
[docs]
def nees(
error: ArrayLike,
covariance: ArrayLike,
) -> NDArray[np.floating]:
"""
Normalized Estimation Error Squared (NEES).
A consistency metric for estimators. For a consistent estimator,
NEES should be chi-squared distributed with n degrees of freedom.
Parameters
----------
error : array_like
Estimation error vector(s) of shape (n,) or (m, n).
covariance : array_like
Covariance matrix of shape (n, n).
Returns
-------
nees : ndarray
NEES value(s). Scalar if error is 1D, array if error is 2D.
Examples
--------
>>> error = np.array([1.0, 0.5])
>>> cov = np.array([[1, 0], [0, 1]])
>>> nees(error, cov)
1.25
"""
error = np.asarray(error, dtype=np.float64)
covariance = np.asarray(covariance, dtype=np.float64)
cov_inv = np.linalg.inv(covariance)
if error.ndim == 1:
return error @ cov_inv @ error
else:
return np.sum(error @ cov_inv * error, axis=1)
[docs]
def nis(
innovation: ArrayLike,
innovation_covariance: ArrayLike,
) -> NDArray[np.floating]:
"""
Normalized Innovation Squared (NIS).
A filter consistency metric based on measurement innovations.
For a consistent filter, NIS should be chi-squared distributed.
Parameters
----------
innovation : array_like
Innovation (measurement residual) vector(s).
innovation_covariance : array_like
Innovation covariance matrix.
Returns
-------
nis : ndarray
NIS value(s).
Notes
-----
This is equivalent to NEES applied to innovations.
"""
return nees(innovation, innovation_covariance)
__all__ = [
"weighted_mean",
"weighted_var",
"weighted_cov",
"sample_mean",
"sample_var",
"sample_cov",
"sample_corr",
"median",
"mad",
"iqr",
"skewness",
"kurtosis",
"moment",
"nees",
"nis",
]