Source code for pytcl.static_estimation.least_squares

"""
Least squares estimation methods.

This module provides ordinary, weighted, and total least squares estimators
commonly used in tracking for state estimation and model fitting.

References
----------
.. [1] S. Van Huffel and J. Vandewalle, "The Total Least Squares Problem:
       Computational Aspects and Analysis," SIAM, 1991.
.. [2] G. H. Golub and C. F. Van Loan, "Matrix Computations," 4th ed.,
       Johns Hopkins University Press, 2013.
"""

from typing import NamedTuple, Optional

import numpy as np
from numpy.typing import ArrayLike, NDArray



[docs]
class LSResult(NamedTuple):
    """Result of least squares estimation.

    Attributes
    ----------
    x : ndarray
        Estimated parameters.
    residuals : ndarray
        Residual vector (y - A @ x).
    rank : int
        Effective rank of the design matrix.
    singular_values : ndarray
        Singular values of the design matrix.
    """

    x: NDArray[np.floating]
    residuals: NDArray[np.floating]
    rank: int
    singular_values: NDArray[np.floating]




[docs]
class WLSResult(NamedTuple):
    """Result of weighted least squares estimation.

    Attributes
    ----------
    x : ndarray
        Estimated parameters.
    residuals : ndarray
        Residual vector.
    covariance : ndarray
        Estimated covariance of parameters.
    weighted_residual_sum : float
        Sum of weighted squared residuals.
    """

    x: NDArray[np.floating]
    residuals: NDArray[np.floating]
    covariance: NDArray[np.floating]
    weighted_residual_sum: float




[docs]
class TLSResult(NamedTuple):
    """Result of total least squares estimation.

    Attributes
    ----------
    x : ndarray
        Estimated parameters.
    residuals_A : ndarray
        Corrections to the design matrix A.
    residuals_b : ndarray
        Corrections to the observation vector b.
    rank : int
        Effective rank used in the solution.
    """

    x: NDArray[np.floating]
    residuals_A: NDArray[np.floating]
    residuals_b: NDArray[np.floating]
    rank: int




[docs]
def ordinary_least_squares(
    A: ArrayLike,
    b: ArrayLike,
    rcond: Optional[float] = None,
) -> LSResult:
    """
    Ordinary least squares estimation.

    Solves the linear least squares problem:
        min_x ||A @ x - b||_2

    Parameters
    ----------
    A : array_like
        Design matrix of shape (m, n) where m >= n.
    b : array_like
        Observation vector of shape (m,) or (m, k).
    rcond : float, optional
        Cutoff for small singular values. Values smaller than
        rcond * largest_singular_value are treated as zero.
        Default is machine precision * max(m, n).

    Returns
    -------
    result : LSResult
        Named tuple containing:
        - x: Estimated parameters of shape (n,) or (n, k)
        - residuals: Residual vector
        - rank: Effective rank of A
        - singular_values: Singular values of A

    Examples
    --------
    >>> A = np.array([[1, 1], [1, 2], [1, 3]])
    >>> b = np.array([1, 2, 2])
    >>> result = ordinary_least_squares(A, b)
    >>> result.x  # Fitted line parameters
    array([0.66666667, 0.5       ])

    Notes
    -----
    Uses SVD-based solution for numerical stability.
    """
    A = np.asarray(A, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)

    # Solve using SVD for numerical stability
    x, residuals, rank, s = np.linalg.lstsq(A, b, rcond=rcond)

    # Compute residuals if not returned by lstsq
    if len(residuals) == 0:
        residuals = b - A @ x

    return LSResult(
        x=x,
        residuals=np.atleast_1d(residuals),
        rank=int(rank),
        singular_values=s,
    )




[docs]
def weighted_least_squares(
    A: ArrayLike,
    b: ArrayLike,
    W: Optional[ArrayLike] = None,
    weights: Optional[ArrayLike] = None,
) -> WLSResult:
    """
    Weighted least squares estimation.

    Solves the weighted linear least squares problem:
        min_x (b - A @ x)^T W (b - A @ x)

    Parameters
    ----------
    A : array_like
        Design matrix of shape (m, n).
    b : array_like
        Observation vector of shape (m,).
    W : array_like, optional
        Weight matrix of shape (m, m). If provided, weights is ignored.
        Should be positive definite.
    weights : array_like, optional
        Diagonal weights of shape (m,). Used only if W is None.
        Equivalent to W = diag(weights).

    Returns
    -------
    result : WLSResult
        Named tuple containing:
        - x: Estimated parameters of shape (n,)
        - residuals: Residual vector
        - covariance: Estimated parameter covariance
        - weighted_residual_sum: Weighted sum of squared residuals

    Examples
    --------
    >>> A = np.array([[1, 1], [1, 2], [1, 3]])
    >>> b = np.array([1, 2, 2])
    >>> weights = np.array([1, 2, 1])  # Higher weight on middle observation
    >>> result = weighted_least_squares(A, b, weights=weights)

    Notes
    -----
    The covariance of the estimated parameters is (A^T W A)^{-1}.

    For measurement noise with covariance R, use W = R^{-1} to get
    the minimum variance unbiased estimator (MVUE).
    """
    A = np.asarray(A, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)

    m, n = A.shape

    # Construct weight matrix
    if W is not None:
        W = np.asarray(W, dtype=np.float64)
    elif weights is not None:
        weights = np.asarray(weights, dtype=np.float64)
        W = np.diag(weights)
    else:
        # Default to identity (OLS)
        W = np.eye(m)

    # Solve normal equations: (A^T W A) x = A^T W b
    AtWA = A.T @ W @ A
    AtWb = A.T @ W @ b

    # Solve using Cholesky for positive definite system
    try:
        L = np.linalg.cholesky(AtWA)
        y = np.linalg.solve(L, AtWb)
        x = np.linalg.solve(L.T, y)
        cov = np.linalg.solve(L.T, np.linalg.solve(L, np.eye(n)))
    except np.linalg.LinAlgError:
        # Fall back to pseudoinverse if not positive definite
        cov = np.linalg.pinv(AtWA)
        x = cov @ AtWb

    # Compute residuals and weighted sum
    residuals = b - A @ x
    weighted_residual_sum = float(residuals @ W @ residuals)

    return WLSResult(
        x=x,
        residuals=residuals,
        covariance=cov,
        weighted_residual_sum=weighted_residual_sum,
    )




[docs]
def total_least_squares(
    A: ArrayLike,
    b: ArrayLike,
    rank: Optional[int] = None,
) -> TLSResult:
    """
    Total least squares (TLS) estimation.

    Solves the errors-in-variables problem where both the design matrix
    and observations may have errors:
        min_{E, r} ||[E | r]||_F  subject to  (A + E) @ x = b + r

    Parameters
    ----------
    A : array_like
        Design matrix of shape (m, n) where m >= n.
    b : array_like
        Observation vector of shape (m,).
    rank : int, optional
        Truncation rank for regularized TLS. If None, uses full rank.
        Useful when A is rank-deficient.

    Returns
    -------
    result : TLSResult
        Named tuple containing:
        - x: Estimated parameters of shape (n,)
        - residuals_A: Corrections to A of shape (m, n)
        - residuals_b: Corrections to b of shape (m,)
        - rank: Effective rank used

    Examples
    --------
    >>> A = np.array([[1, 1], [2, 1], [3, 1]])
    >>> b = np.array([2.1, 2.9, 4.1])  # Noisy measurements
    >>> result = total_least_squares(A, b)

    Notes
    -----
    TLS is appropriate when both the independent and dependent variables
    are subject to measurement error. It minimizes the orthogonal distance
    from the data points to the fitted model.

    The solution is computed using the SVD of the augmented matrix [A | b].

    References
    ----------
    .. [1] S. Van Huffel and J. Vandewalle, "The Total Least Squares Problem:
           Computational Aspects and Analysis," SIAM, 1991.
    """
    A = np.asarray(A, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)

    m, n = A.shape

    if rank is None:
        rank = n

    # Form augmented matrix [A | b]
    if b.ndim == 1:
        b = b.reshape(-1, 1)
    C = np.hstack([A, b])

    # SVD of augmented matrix
    U, s, Vt = np.linalg.svd(C, full_matrices=True)
    V = Vt.T

    # Check if TLS solution exists
    # The solution exists if V[n, n] != 0
    if abs(V[n, n]) < 1e-14:
        raise ValueError(
            "TLS solution does not exist. The smallest singular value "
            "has multiplicity > 1."
        )

    # TLS solution: x = -V[0:n, n] / V[n, n]
    x = -V[:n, n] / V[n, n]

    # Compute corrections using truncated SVD
    # [E | r] = sum_{i=rank}^{n} s_i * u_i * v_i^T
    E_r = np.zeros_like(C)
    for i in range(rank, min(m, n + 1)):
        if i < len(s):
            E_r += s[i] * np.outer(U[:, i], V[:, i])

    residuals_A = E_r[:, :n]
    residuals_b = E_r[:, n].flatten()

    return TLSResult(
        x=x,
        residuals_A=residuals_A,
        residuals_b=residuals_b,
        rank=rank,
    )




[docs]
def generalized_least_squares(
    A: ArrayLike,
    b: ArrayLike,
    Sigma: ArrayLike,
) -> WLSResult:
    """
    Generalized least squares (GLS) estimation.

    Solves the linear model with correlated errors:
        b = A @ x + e,  where E[e e^T] = Sigma

    This is equivalent to weighted least squares with W = Sigma^{-1}.

    Parameters
    ----------
    A : array_like
        Design matrix of shape (m, n).
    b : array_like
        Observation vector of shape (m,).
    Sigma : array_like
        Error covariance matrix of shape (m, m).

    Returns
    -------
    result : WLSResult
        Same as weighted_least_squares result.

    Examples
    --------
    >>> A = np.array([[1, 1], [1, 2], [1, 3]])
    >>> b = np.array([1, 2, 2])
    >>> Sigma = np.array([[1, 0.5, 0], [0.5, 1, 0.5], [0, 0.5, 1]])
    >>> result = generalized_least_squares(A, b, Sigma)

    Notes
    -----
    GLS is the BLUE (Best Linear Unbiased Estimator) when the error
    covariance structure is known.
    """
    Sigma = np.asarray(Sigma, dtype=np.float64)
    W = np.linalg.inv(Sigma)
    return weighted_least_squares(A, b, W=W)




[docs]
def recursive_least_squares(
    x_prev: ArrayLike,
    P_prev: ArrayLike,
    a: ArrayLike,
    y: float,
    forgetting_factor: float = 1.0,
) -> tuple[NDArray[np.floating], NDArray[np.floating]]:
    """
    Recursive least squares (RLS) update.

    Updates parameter estimates when a new observation arrives,
    without reprocessing all previous data.

    Parameters
    ----------
    x_prev : array_like
        Previous parameter estimate of shape (n,).
    P_prev : array_like
        Previous covariance matrix of shape (n, n).
    a : array_like
        New regressor vector of shape (n,).
    y : float
        New observation.
    forgetting_factor : float, optional
        Forgetting factor in (0, 1]. Values < 1 give more weight to
        recent observations. Default is 1.0 (no forgetting).

    Returns
    -------
    x : ndarray
        Updated parameter estimate.
    P : ndarray
        Updated covariance matrix.

    Examples
    --------
    >>> x = np.zeros(2)  # Initial estimate
    >>> P = np.eye(2) * 100  # High initial uncertainty
    >>> # Process observations one at a time
    >>> x, P = recursive_least_squares(x, P, np.array([1, 1]), 2.0)
    >>> x, P = recursive_least_squares(x, P, np.array([1, 2]), 3.0)

    Notes
    -----
    RLS is equivalent to the Kalman filter for the static parameter
    estimation problem. The forgetting factor introduces exponential
    weighting of past data.
    """
    x_prev = np.asarray(x_prev, dtype=np.float64)
    P_prev = np.asarray(P_prev, dtype=np.float64)
    a = np.asarray(a, dtype=np.float64)

    lam = forgetting_factor

    # Innovation
    innovation = y - a @ x_prev

    # Kalman gain
    Pa = P_prev @ a
    denom = lam + a @ Pa
    K = Pa / denom

    # Update estimates
    x = x_prev + K * innovation
    P = (P_prev - np.outer(K, Pa)) / lam

    return x, P




[docs]
def ridge_regression(
    A: ArrayLike,
    b: ArrayLike,
    alpha: float = 1.0,
) -> NDArray[np.floating]:
    """
    Ridge regression (L2-regularized least squares).

    Solves the regularized problem:
        min_x ||A @ x - b||_2^2 + alpha * ||x||_2^2

    Parameters
    ----------
    A : array_like
        Design matrix of shape (m, n).
    b : array_like
        Observation vector of shape (m,).
    alpha : float, optional
        Regularization parameter. Larger values give more regularization.
        Default is 1.0.

    Returns
    -------
    x : ndarray
        Regularized parameter estimate.

    Examples
    --------
    >>> A = np.array([[1, 1], [1, 2], [1, 3]])
    >>> b = np.array([1, 2, 2])
    >>> x = ridge_regression(A, b, alpha=0.1)

    Notes
    -----
    Ridge regression shrinks parameters toward zero and is useful when:
    - The design matrix is ill-conditioned
    - There are more parameters than observations (n > m)
    - Regularization is desired to prevent overfitting
    """
    A = np.asarray(A, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)

    n = A.shape[1]

    # Solve (A^T A + alpha * I) x = A^T b
    AtA = A.T @ A + alpha * np.eye(n)
    Atb = A.T @ b

    return np.linalg.solve(AtA, Atb)



__all__ = [
    "LSResult",
    "WLSResult",
    "TLSResult",
    "ordinary_least_squares",
    "weighted_least_squares",
    "total_least_squares",
    "generalized_least_squares",
    "recursive_least_squares",
    "ridge_regression",
]