Source code for pytcl.performance_evaluation.track_metrics

"""
Track performance metrics for multi-target tracking evaluation.

This module provides metrics for evaluating multi-target tracker performance,
including OSPA (Optimal Sub-Pattern Assignment), track purity, and fragmentation.

References
----------
.. [1] D. Schuhmacher, B.-T. Vo, and B.-N. Vo, "A Consistent Metric for
       Performance Evaluation of Multi-Object Filters," IEEE Trans. Signal
       Processing, vol. 56, no. 8, pp. 3447-3457, Aug. 2008.
.. [2] K. Bernardin and R. Stiefelhagen, "Evaluating Multiple Object Tracking
       Performance: The CLEAR MOT Metrics," EURASIP J. Image Video Process.,
       2008.
"""

from typing import List, NamedTuple, Optional

import numpy as np
from numpy.typing import NDArray

from pytcl.assignment_algorithms import hungarian


[docs] class OSPAResult(NamedTuple): """ Result of OSPA metric computation. Attributes ---------- ospa : float Total OSPA distance. localization : float Localization component. cardinality : float Cardinality component. """ ospa: float localization: float cardinality: float
[docs] class MOTMetrics(NamedTuple): """ Multiple Object Tracking (MOT) metrics. Attributes ---------- mota : float Multiple Object Tracking Accuracy. motp : float Multiple Object Tracking Precision. num_switches : int Number of identity switches. num_fragmentations : int Number of track fragmentations. num_false_positives : int Number of false positive detections. num_misses : int Number of missed detections. """ mota: float motp: float num_switches: int num_fragmentations: int num_false_positives: int num_misses: int
[docs] def ospa( X: List[NDArray[np.float64]], Y: List[NDArray[np.float64]], c: float = 100.0, p: float = 2.0, ) -> OSPAResult: """ Compute Optimal Sub-Pattern Assignment (OSPA) metric. The OSPA metric provides a mathematically consistent measure of distance between two sets of points, accounting for both localization error and cardinality mismatch. Parameters ---------- X : list of ndarray First set of points (e.g., ground truth). Y : list of ndarray Second set of points (e.g., estimated tracks). c : float, optional Cutoff parameter for localization error (default: 100.0). p : float, optional Order parameter for the metric (default: 2.0). Returns ------- OSPAResult Named tuple containing: - ospa: Total OSPA distance - localization: Localization component - cardinality: Cardinality component Notes ----- - If both sets are empty, OSPA is 0. - The metric is symmetric: ospa(X, Y) = ospa(Y, X). - For p=2 (default), the metric is a proper L2 distance. Examples -------- >>> X = [np.array([0, 0]), np.array([10, 10])] >>> Y = [np.array([1, 0]), np.array([10, 11])] >>> result = ospa(X, Y, c=100, p=2) >>> result.ospa # doctest: +SKIP 1.118... """ m = len(X) n = len(Y) # Handle empty sets if m == 0 and n == 0: return OSPAResult(ospa=0.0, localization=0.0, cardinality=0.0) if m == 0 or n == 0: # All cardinality error cardinality = c return OSPAResult(ospa=cardinality, localization=0.0, cardinality=cardinality) # Ensure m <= n (swap if needed) if m > n: X, Y = Y, X m, n = n, m # Compute pairwise distances cost_matrix = np.zeros((m, n)) for i, x in enumerate(X): for j, y in enumerate(Y): d = np.linalg.norm(np.asarray(x) - np.asarray(y)) cost_matrix[i, j] = min(d, c) ** p # Solve assignment problem row_ind, col_ind, _ = hungarian(cost_matrix) # Compute localization component localization_sum = 0.0 for i, j in zip(row_ind, col_ind): localization_sum += cost_matrix[i, j] # Cardinality penalty cardinality_penalty = (n - m) * (c**p) # OSPA distance total = localization_sum + cardinality_penalty ospa_val = (total / n) ** (1.0 / p) # Decompose into components loc_component = (localization_sum / n) ** (1.0 / p) if localization_sum > 0 else 0.0 card_component = (cardinality_penalty / n) ** (1.0 / p) if n > m else 0.0 return OSPAResult( ospa=ospa_val, localization=loc_component, cardinality=card_component )
[docs] def ospa_over_time( X_sequence: List[List[NDArray[np.float64]]], Y_sequence: List[List[NDArray[np.float64]]], c: float = 100.0, p: float = 2.0, ) -> NDArray[np.float64]: """ Compute OSPA metric over a time sequence. Parameters ---------- X_sequence : list of list of ndarray Sequence of ground truth point sets. Y_sequence : list of list of ndarray Sequence of estimated point sets. c : float, optional Cutoff parameter (default: 100.0). p : float, optional Order parameter (default: 2.0). Returns ------- ndarray OSPA values at each time step. Raises ------ ValueError If sequences have different lengths. Examples -------- >>> # Two time steps with ground truth and estimates >>> X_seq = [[np.array([0, 0]), np.array([10, 10])], ... [np.array([1, 0]), np.array([11, 10])]] >>> Y_seq = [[np.array([0.5, 0]), np.array([10, 10.5])], ... [np.array([1.5, 0]), np.array([11, 10.5])]] >>> ospa_vals = ospa_over_time(X_seq, Y_seq, c=100, p=2) >>> len(ospa_vals) 2 """ if len(X_sequence) != len(Y_sequence): raise ValueError("Sequences must have the same length") ospa_values = np.zeros(len(X_sequence)) for k, (X, Y) in enumerate(zip(X_sequence, Y_sequence)): result = ospa(X, Y, c, p) ospa_values[k] = result.ospa return ospa_values
[docs] def track_purity( true_labels: NDArray[np.int_], estimated_labels: NDArray[np.int_], ) -> float: """ Compute track purity metric. Track purity measures how well estimated tracks correspond to single ground truth targets. A purity of 1.0 means each estimated track contains observations from only one true target. Parameters ---------- true_labels : ndarray Ground truth target labels for each observation. estimated_labels : ndarray Estimated track labels for each observation. Returns ------- float Track purity score in [0, 1]. Examples -------- >>> true_labels = np.array([0, 0, 0, 1, 1, 1]) >>> estimated_labels = np.array([0, 0, 0, 1, 1, 1]) # Perfect >>> track_purity(true_labels, estimated_labels) 1.0 >>> estimated_labels = np.array([0, 0, 1, 1, 1, 1]) # Mixed >>> track_purity(true_labels, estimated_labels) # doctest: +SKIP 0.833... """ true_labels = np.asarray(true_labels) estimated_labels = np.asarray(estimated_labels) if len(true_labels) != len(estimated_labels): raise ValueError("Label arrays must have the same length") if len(true_labels) == 0: return 1.0 unique_est = np.unique(estimated_labels) total_correct = 0 for est_label in unique_est: mask = estimated_labels == est_label true_in_track = true_labels[mask] # Count most frequent true label if len(true_in_track) > 0: _, counts = np.unique(true_in_track, return_counts=True) total_correct += counts.max() return total_correct / len(true_labels)
[docs] def track_fragmentation( true_labels: NDArray[np.int_], estimated_labels: NDArray[np.int_], time_indices: Optional[NDArray[np.int_]] = None, ) -> int: """ Count number of track fragmentations. A fragmentation occurs when observations from a single ground truth target are split across multiple estimated tracks. Parameters ---------- true_labels : ndarray Ground truth target labels for each observation. estimated_labels : ndarray Estimated track labels for each observation. time_indices : ndarray, optional Time indices for each observation (for temporal ordering). Returns ------- int Number of fragmentations. Examples -------- >>> true_labels = np.array([0, 0, 0, 0]) >>> estimated_labels = np.array([0, 0, 1, 1]) # One fragmentation >>> track_fragmentation(true_labels, estimated_labels) 1 """ true_labels = np.asarray(true_labels) estimated_labels = np.asarray(estimated_labels) if time_indices is not None: # Sort by time sort_idx = np.argsort(time_indices) true_labels = true_labels[sort_idx] estimated_labels = estimated_labels[sort_idx] fragmentations = 0 unique_true = np.unique(true_labels) for true_label in unique_true: mask = true_labels == true_label est_for_target = estimated_labels[mask] # Count transitions between different estimated tracks for i in range(1, len(est_for_target)): if est_for_target[i] != est_for_target[i - 1]: fragmentations += 1 return fragmentations
[docs] def identity_switches( true_labels: NDArray[np.int_], estimated_labels: NDArray[np.int_], time_indices: Optional[NDArray[np.int_]] = None, ) -> int: """ Count number of identity switches. An identity switch occurs when an estimated track changes which ground truth target it is associated with. Parameters ---------- true_labels : ndarray Ground truth target labels for each observation. estimated_labels : ndarray Estimated track labels for each observation. time_indices : ndarray, optional Time indices for each observation. Returns ------- int Number of identity switches. Examples -------- >>> true_labels = np.array([0, 0, 1, 1]) >>> estimated_labels = np.array([0, 0, 0, 0]) # Track 0 switches targets >>> identity_switches(true_labels, estimated_labels) 1 """ true_labels = np.asarray(true_labels) estimated_labels = np.asarray(estimated_labels) if time_indices is not None: sort_idx = np.argsort(time_indices) true_labels = true_labels[sort_idx] estimated_labels = estimated_labels[sort_idx] switches = 0 unique_est = np.unique(estimated_labels) for est_label in unique_est: mask = estimated_labels == est_label true_for_track = true_labels[mask] # Count transitions between different true targets for i in range(1, len(true_for_track)): if true_for_track[i] != true_for_track[i - 1]: switches += 1 return switches
[docs] def mot_metrics( ground_truth: List[List[NDArray[np.float64]]], estimates: List[List[NDArray[np.float64]]], threshold: float = 10.0, ) -> MOTMetrics: """ Compute CLEAR MOT metrics. Parameters ---------- ground_truth : list of list of ndarray Ground truth positions at each time step. estimates : list of list of ndarray Estimated positions at each time step. threshold : float, optional Distance threshold for valid associations (default: 10.0). Returns ------- MOTMetrics Named tuple containing MOTA, MOTP, and counts. Notes ----- MOTA (Multiple Object Tracking Accuracy) accounts for false positives, misses, and identity switches. MOTP (Precision) measures localization accuracy for correctly matched pairs. Examples -------- >>> gt = [[np.array([0, 0]), np.array([10, 10])], ... [np.array([1, 0]), np.array([11, 10])]] >>> est = [[np.array([0.5, 0]), np.array([10.5, 10])], ... [np.array([1.5, 0]), np.array([11.5, 10])]] >>> result = mot_metrics(gt, est, threshold=5.0) >>> result.mota # High accuracy with small errors 1.0 >>> result.motp < 1.0 # Some localization error True """ total_gt = 0 total_fp = 0 total_misses = 0 total_switches = 0 total_frags = 0 total_distance = 0.0 total_matches = 0 prev_assignment: dict[int, int] = {} # gt_idx -> est_idx from previous frame for k, (gt_frame, est_frame) in enumerate(zip(ground_truth, estimates)): n_gt = len(gt_frame) n_est = len(est_frame) total_gt += n_gt if n_gt == 0 and n_est == 0: continue if n_gt == 0: total_fp += n_est continue if n_est == 0: total_misses += n_gt prev_assignment = {} continue # Build cost matrix cost_matrix = np.zeros((n_gt, n_est)) for i, gt in enumerate(gt_frame): for j, est in enumerate(est_frame): cost_matrix[i, j] = np.linalg.norm(np.asarray(gt) - np.asarray(est)) # Solve assignment row_ind, col_ind, _ = hungarian(cost_matrix) # Count matches within threshold current_assignment = {} frame_matches = 0 frame_distance = 0.0 for i, j in zip(row_ind, col_ind): if cost_matrix[i, j] <= threshold: current_assignment[i] = j frame_matches += 1 frame_distance += cost_matrix[i, j] # Check for identity switch if i in prev_assignment and prev_assignment[i] != j: total_switches += 1 total_matches += frame_matches total_distance += frame_distance total_misses += n_gt - frame_matches total_fp += n_est - frame_matches prev_assignment = current_assignment # Compute metrics if total_gt > 0: mota = 1.0 - (total_misses + total_fp + total_switches) / total_gt else: mota = 1.0 if total_matches > 0: motp = total_distance / total_matches else: motp = 0.0 return MOTMetrics( mota=mota, motp=motp, num_switches=total_switches, num_fragmentations=total_frags, num_false_positives=total_fp, num_misses=total_misses, )
__all__ = [ "OSPAResult", "MOTMetrics", "ospa", "ospa_over_time", "track_purity", "track_fragmentation", "identity_switches", "mot_metrics", ]