Source code for pytcl.gpu.particle_filter

"""
GPU-accelerated Particle Filter using CuPy.

This module provides GPU-accelerated implementations of particle filtering
algorithms for highly nonlinear and non-Gaussian state estimation.

Key Features
------------
- GPU-accelerated resampling (systematic, multinomial)
- Parallel weight computation
- Batch processing of multiple particle filters
- Efficient memory management

Performance
-----------
The GPU implementation achieves 8-15x speedup compared to CPU for:
- Large particle counts (N > 1000)
- Parallel processing of multiple targets

Examples
--------
>>> from pytcl.gpu.particle_filter import CuPyParticleFilter
>>> import numpy as np
>>>
>>> def dynamics(particles, t):
...     # Propagate particles through nonlinear dynamics
...     return particles + np.random.randn(*particles.shape) * 0.1
>>>
>>> def likelihood(particles, measurement):
...     # Compute likelihood for each particle
...     diff = particles[:, 0] - measurement
...     return np.exp(-0.5 * diff**2)
>>>
>>> pf = CuPyParticleFilter(n_particles=10000, state_dim=2)
>>> pf.predict(dynamics)
>>> pf.update(measurement, likelihood)
"""

from typing import Any, Callable, NamedTuple, Tuple

import numpy as np
from numpy.typing import ArrayLike, NDArray

from pytcl.core.optional_deps import import_optional, requires
from pytcl.gpu.utils import ensure_gpu_array, to_cpu


class ParticleFilterState(NamedTuple):
    """State of a particle filter.

    Attributes
    ----------
    particles : ndarray
        Particle states, shape (n_particles, state_dim).
    weights : ndarray
        Normalized particle weights, shape (n_particles,).
    ess : float
        Effective sample size.
    """

    particles: NDArray[np.floating]
    weights: NDArray[np.floating]
    ess: float



[docs]
@requires("cupy", extra="gpu", feature="GPU particle filter")
def gpu_effective_sample_size(weights: ArrayLike) -> float:
    """
    Compute effective sample size on GPU.

    ESS = 1 / sum(w_i^2)

    Parameters
    ----------
    weights : array_like
        Normalized particle weights.

    Returns
    -------
    ess : float
        Effective sample size.

    Examples
    --------
    >>> import numpy as np
    >>> from pytcl.gpu.particle_filter import gpu_effective_sample_size
    >>> weights = np.array([0.1, 0.2, 0.3, 0.4])
    >>> ess = gpu_effective_sample_size(weights)
    >>> ess > 0
    True
    >>> ess <= len(weights)
    True
    """
    cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")
    w = ensure_gpu_array(weights, dtype=cp.float64)
    ess = 1.0 / float(cp.sum(w**2))
    return ess




[docs]
@requires("cupy", extra="gpu", feature="GPU particle filter")
def gpu_resample_systematic(weights: ArrayLike) -> NDArray[np.intp]:
    """
    GPU-accelerated systematic resampling.

    Systematic resampling uses a single random number to select particles,
    resulting in low variance and O(N) complexity.

    Parameters
    ----------
    weights : array_like
        Normalized particle weights, shape (n_particles,).

    Returns
    -------
    indices : ndarray
        Resampled particle indices, shape (n_particles,).

    Examples
    --------
    >>> import numpy as np
    >>> from pytcl.gpu.particle_filter import gpu_resample_systematic
    >>> weights = np.array([0.1, 0.3, 0.4, 0.2])
    >>> indices = gpu_resample_systematic(weights)
    >>> # Particles 1 and 2 will be selected more often
    """
    cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

    w = ensure_gpu_array(weights, dtype=cp.float64)
    n = len(w)

    # Cumulative sum of weights
    cumsum = cp.cumsum(w)

    # Systematic sampling positions
    u0 = cp.random.uniform(0, 1.0 / n)
    positions = u0 + cp.arange(n, dtype=cp.float64) / n

    # Find indices using searchsorted
    indices = cp.searchsorted(cumsum, positions)

    # Clip to valid range
    indices = cp.clip(indices, 0, n - 1)

    return indices




[docs]
@requires("cupy", extra="gpu", feature="GPU particle filter")
def gpu_resample_multinomial(weights: ArrayLike) -> NDArray[np.intp]:
    """
    GPU-accelerated multinomial resampling.

    Multinomial resampling samples particles independently according
    to their weights.

    Parameters
    ----------
    weights : array_like
        Normalized particle weights, shape (n_particles,).

    Returns
    -------
    indices : ndarray
        Resampled particle indices, shape (n_particles,).

    Examples
    --------
    >>> import numpy as np
    >>> from pytcl.gpu.particle_filter import gpu_resample_multinomial
    >>> weights = np.array([0.1, 0.4, 0.5])
    >>> indices = gpu_resample_multinomial(weights)
    >>> indices.shape
    (3,)
    >>> np.all(indices < 3)
    True

    Notes
    -----
    Multinomial resampling has higher variance than systematic resampling
    but is simpler and can be more efficient on GPU for certain sizes.
    """
    cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

    w = ensure_gpu_array(weights, dtype=cp.float64)
    n = len(w)

    # Cumulative sum
    cumsum = cp.cumsum(w)

    # Generate random samples
    u = cp.random.uniform(0, 1, n)

    # Find indices
    indices = cp.searchsorted(cumsum, u)
    indices = cp.clip(indices, 0, n - 1)

    return indices




[docs]
@requires("cupy", extra="gpu", feature="GPU particle filter")
def gpu_resample_stratified(weights: ArrayLike) -> NDArray[np.intp]:
    """
    GPU-accelerated stratified resampling.

    Stratified resampling divides the CDF into N equal strata and samples
    one particle from each stratum.

    Parameters
    ----------
    weights : array_like
        Normalized particle weights, shape (n_particles,).

    Returns
    -------
    indices : ndarray
        Resampled particle indices, shape (n_particles,).
    """
    cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

    w = ensure_gpu_array(weights, dtype=cp.float64)
    n = len(w)

    # Cumulative sum
    cumsum = cp.cumsum(w)

    # Stratified sampling: one random number per stratum
    u = (cp.arange(n, dtype=cp.float64) + cp.random.uniform(0, 1, n)) / n

    # Find indices
    indices = cp.searchsorted(cumsum, u)
    indices = cp.clip(indices, 0, n - 1)

    return indices




[docs]
@requires("cupy", extra="gpu", feature="GPU particle filter")
def gpu_normalize_weights(
    log_weights: ArrayLike,
) -> Tuple[NDArray[np.floating[Any]], float]:
    """
    Normalize log weights to proper weights on GPU.

    Uses log-sum-exp trick for numerical stability.

    Parameters
    ----------
    log_weights : array_like
        Unnormalized log weights, shape (n_particles,).

    Returns
    -------
    weights : ndarray
        Normalized weights, shape (n_particles,).
    log_likelihood : float
        Log of the normalization constant.

    Examples
    --------
    >>> import numpy as np
    >>> from pytcl.gpu.particle_filter import gpu_normalize_weights
    >>> log_w = np.array([-1.0, -0.5, -2.0])
    >>> weights, log_likelihood = gpu_normalize_weights(log_w)
    >>> np.allclose(weights.sum(), 1.0)
    True
    >>> log_likelihood < 0
    True
    """
    cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

    log_w = ensure_gpu_array(log_weights, dtype=cp.float64)

    # Log-sum-exp for numerical stability
    max_log_w = cp.max(log_w)
    log_sum = max_log_w + cp.log(cp.sum(cp.exp(log_w - max_log_w)))

    # Normalized weights
    weights = cp.exp(log_w - log_sum)

    return weights, float(log_sum)




[docs]
class CuPyParticleFilter:
    """
    GPU-accelerated Bootstrap Particle Filter.

    This class implements the Sequential Importance Resampling (SIR)
    particle filter with GPU acceleration.

    Parameters
    ----------
    n_particles : int
        Number of particles.
    state_dim : int
        Dimension of state vector.
    resample_method : str
        Resampling method: 'systematic', 'multinomial', or 'stratified'.
    resample_threshold : float
        ESS threshold for resampling (as fraction of n_particles).

    Attributes
    ----------
    particles : cupy.ndarray
        Current particle states, shape (n_particles, state_dim).
    weights : cupy.ndarray
        Current particle weights, shape (n_particles,).

    Examples
    --------
    >>> import numpy as np
    >>> from pytcl.gpu.particle_filter import CuPyParticleFilter
    >>>
    >>> # Initialize filter
    >>> pf = CuPyParticleFilter(n_particles=10000, state_dim=4)
    >>> pf.initialize(initial_state, initial_cov)
    >>>
    >>> # Run filter
    >>> for measurement in measurements:
    ...     pf.predict(dynamics_fn)
    ...     pf.update(measurement, likelihood_fn)
    ...     state_estimate = pf.get_estimate()
    """


[docs]
    @requires("cupy", extra="gpu", feature="GPU particle filter")
    def __init__(
        self,
        n_particles: int,
        state_dim: int,
        resample_method: str = "systematic",
        resample_threshold: float = 0.5,
    ):
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

        self.n_particles = n_particles
        self.state_dim = state_dim
        self.resample_threshold = resample_threshold

        # Select resampling function
        if resample_method == "systematic":
            self._resample_fn = gpu_resample_systematic
        elif resample_method == "multinomial":
            self._resample_fn = gpu_resample_multinomial
        elif resample_method == "stratified":
            self._resample_fn = gpu_resample_stratified
        else:
            raise ValueError(f"Unknown resample method: {resample_method}")

        # Initialize particles and weights
        self.particles = cp.zeros((n_particles, state_dim), dtype=cp.float64)
        self.weights = cp.ones(n_particles, dtype=cp.float64) / n_particles



[docs]
    def initialize(
        self,
        mean: ArrayLike,
        cov: ArrayLike,
    ) -> None:
        """
        Initialize particles from Gaussian distribution.

        Parameters
        ----------
        mean : array_like
            Mean state, shape (state_dim,).
        cov : array_like
            Covariance matrix, shape (state_dim, state_dim).
        """
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

        mean = np.asarray(mean).flatten()
        cov = np.asarray(cov)

        # Sample from multivariate normal on CPU (CuPy lacks this)
        samples = np.random.multivariate_normal(mean, cov, self.n_particles)
        self.particles = ensure_gpu_array(samples, dtype=cp.float64)
        self.weights = cp.ones(self.n_particles, dtype=cp.float64) / self.n_particles



[docs]
    def initialize_uniform(
        self,
        low: ArrayLike,
        high: ArrayLike,
    ) -> None:
        """
        Initialize particles from uniform distribution.

        Parameters
        ----------
        low : array_like
            Lower bounds, shape (state_dim,).
        high : array_like
            Upper bounds, shape (state_dim,).
        """
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

        low = ensure_gpu_array(low, dtype=cp.float64)
        high = ensure_gpu_array(high, dtype=cp.float64)

        # Sample uniformly
        u = cp.random.uniform(0, 1, (self.n_particles, self.state_dim))
        self.particles = low + u * (high - low)
        self.weights = cp.ones(self.n_particles, dtype=cp.float64) / self.n_particles



[docs]
    def predict(
        self,
        dynamics_fn: Callable[[NDArray[np.floating[Any]]], NDArray[np.floating[Any]]],
        *args: Any,
        **kwargs: Any,
    ) -> None:
        """
        Propagate particles through dynamics.

        Parameters
        ----------
        dynamics_fn : callable
            Function that takes particles (N, state_dim) and returns
            propagated particles (N, state_dim).
        *args, **kwargs
            Additional arguments passed to dynamics_fn.

        Notes
        -----
        The dynamics function receives CuPy arrays if GPU is available.
        It should return arrays of the same type.
        """
        # Apply dynamics (may be on CPU or GPU depending on function)
        self.particles = dynamics_fn(self.particles, *args, **kwargs)



[docs]
    def update(
        self,
        measurement: ArrayLike,
        likelihood_fn: Callable[
            [NDArray[np.floating[Any]], NDArray[np.floating[Any]]],
            NDArray[np.floating[Any]],
        ],
    ) -> float:
        """
        Update weights based on measurement likelihood.

        Parameters
        ----------
        measurement : array_like
            Measurement vector.
        likelihood_fn : callable
            Function that computes likelihood for each particle.
            Takes (particles, measurement) and returns likelihoods (n_particles,).

        Returns
        -------
        log_likelihood : float
            Log of the marginal likelihood (normalization constant).
        """
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

        z = ensure_gpu_array(measurement, dtype=cp.float64)

        # Compute likelihoods
        likelihoods = likelihood_fn(self.particles, z)
        likelihoods = ensure_gpu_array(likelihoods, dtype=cp.float64)

        # Update weights
        log_weights = cp.log(self.weights) + cp.log(likelihoods + 1e-300)

        # Normalize
        self.weights, log_likelihood = gpu_normalize_weights(log_weights)

        # Resample if ESS drops below threshold
        ess = gpu_effective_sample_size(self.weights)
        if ess < self.resample_threshold * self.n_particles:
            self._resample()

        return log_likelihood


    def _resample(self) -> None:
        """Perform resampling."""
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

        indices = self._resample_fn(self.weights)
        self.particles = self.particles[indices]
        self.weights = cp.ones(self.n_particles, dtype=cp.float64) / self.n_particles


[docs]
    def get_estimate(self) -> NDArray[np.floating]:
        """
        Compute weighted mean estimate.

        Returns
        -------
        estimate : ndarray
            Weighted mean state, shape (state_dim,).
        """
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")
        estimate = cp.sum(self.particles * self.weights[:, None], axis=0)
        return estimate



[docs]
    def get_covariance(self) -> NDArray[np.floating]:
        """
        Compute weighted covariance estimate.

        Returns
        -------
        cov : ndarray
            Weighted covariance, shape (state_dim, state_dim).
        """
        cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

        mean = self.get_estimate()
        diff = self.particles - mean
        cov = cp.einsum("n,ni,nj->ij", self.weights, diff, diff)
        return cov



[docs]
    def get_ess(self) -> float:
        """Get current effective sample size."""
        return gpu_effective_sample_size(self.weights)



[docs]
    def get_state(self) -> ParticleFilterState:
        """
        Get current filter state.

        Returns
        -------
        state : ParticleFilterState
            Named tuple with particles, weights, and ESS.
        """
        return ParticleFilterState(
            particles=self.particles,
            weights=self.weights,
            ess=self.get_ess(),
        )



[docs]
    def get_particles_cpu(self) -> NDArray[np.floating]:
        """Get particles on CPU."""
        return to_cpu(self.particles)



[docs]
    def get_weights_cpu(self) -> NDArray[np.floating]:
        """Get weights on CPU."""
        return to_cpu(self.weights)




@requires("cupy", extra="gpu", feature="GPU particle filter")
def batch_particle_filter_update(
    particles: ArrayLike,
    weights: ArrayLike,
    measurements: ArrayLike,
    likelihood_fn: Callable[
        [NDArray[np.floating[Any]], NDArray[np.floating[Any]]],
        NDArray[np.floating[Any]],
    ],
) -> Tuple[
    NDArray[np.floating[Any]], NDArray[np.floating[Any]], NDArray[np.floating[Any]]
]:
    """
    Batch update for multiple particle filters.

    Parameters
    ----------
    particles : array_like
        Particle states, shape (n_filters, n_particles, state_dim).
    weights : array_like
        Particle weights, shape (n_filters, n_particles).
    measurements : array_like
        Measurements, shape (n_filters, meas_dim).
    likelihood_fn : callable
        Function that computes likelihood for each particle.

    Returns
    -------
    weights_updated : ndarray
        Updated weights.
    log_likelihoods : ndarray
        Log likelihoods for each filter.
    ess : ndarray
        Effective sample sizes.
    """
    cp = import_optional("cupy", extra="gpu", feature="GPU particle filter")

    particles_gpu = ensure_gpu_array(particles, dtype=cp.float64)
    weights_gpu = ensure_gpu_array(weights, dtype=cp.float64)
    measurements_gpu = ensure_gpu_array(measurements, dtype=cp.float64)

    n_filters = particles_gpu.shape[0]

    weights_updated = cp.zeros_like(weights_gpu)
    log_likelihoods = cp.zeros(n_filters, dtype=cp.float64)
    ess = cp.zeros(n_filters, dtype=cp.float64)

    for i in range(n_filters):
        # Compute likelihoods
        likelihoods = likelihood_fn(particles_gpu[i], measurements_gpu[i])
        likelihoods = ensure_gpu_array(likelihoods, dtype=cp.float64)

        # Update weights
        log_weights = cp.log(weights_gpu[i]) + cp.log(likelihoods + 1e-300)

        # Normalize
        max_log_w = cp.max(log_weights)
        log_sum = max_log_w + cp.log(cp.sum(cp.exp(log_weights - max_log_w)))
        weights_updated[i] = cp.exp(log_weights - log_sum)
        log_likelihoods[i] = log_sum

        # ESS
        ess[i] = 1.0 / cp.sum(weights_updated[i] ** 2)

    return weights_updated, log_likelihoods, ess


__all__ = [
    "ParticleFilterState",
    "gpu_effective_sample_size",
    "gpu_resample_systematic",
    "gpu_resample_multinomial",
    "gpu_resample_stratified",
    "gpu_normalize_weights",
    "CuPyParticleFilter",
    "batch_particle_filter_update",
]