Performance Evaluation

This example demonstrates tracking performance metrics and evaluation.

Overview

Evaluating tracker performance requires multiple metrics:

  • OSPA/GOSPA: Optimal Sub-Pattern Assignment distance

  • RMSE: Root Mean Square Error for localization

  • Track statistics: Purity, fragmentation, switches

  • Detection metrics: Probability of detection, false alarm rate

OSPA Metric

OSPA combines localization error and cardinality error:

  • Localization: Distance between matched targets

  • Cardinality: Penalty for missed/false targets

  • Order parameter (p): Controls metric sensitivity

  • Cutoff (c): Maximum localization error

Key Concepts

  • Track-to-truth assignment: Matching estimated tracks to ground truth

  • Track purity: Fraction of time track follows same target

  • Track fragmentation: Number of track breaks per target

  • ID switches: Number of times track switches targets

Code Highlights

The example demonstrates:

  • Computing OSPA at each time step with ospa()

  • OSPA over time with ospa_over_time()

  • NEES/NIS consistency metrics

  • Track-to-truth assignment and purity calculation

  • ROC curves for detection performance

Source Code

  1"""
  2Performance Evaluation Example.
  3
  4This example demonstrates:
  51. OSPA (Optimal Sub-Pattern Assignment) metric for multi-target tracking
  62. NEES (Normalized Estimation Error Squared) for filter consistency
  73. NIS (Normalized Innovation Squared) for measurement consistency
  84. Monte Carlo simulation for tracker evaluation
  95. Track quality metrics (purity, fragmentation)
 10
 11Run with: python examples/performance_evaluation.py
 12"""
 13
 14import sys
 15from pathlib import Path
 16
 17sys.path.insert(0, str(Path(__file__).parent.parent))
 18
 19# Output directory for generated plots
 20OUTPUT_DIR = Path(__file__).parent.parent / "docs" / "_static" / "images" / "examples"
 21OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 22
 23from typing import List, Tuple  # noqa: E402
 24
 25import numpy as np  # noqa: E402
 26import plotly.graph_objects as go  # noqa: E402
 27from plotly.subplots import make_subplots  # noqa: E402
 28
 29from pytcl.dynamic_estimation import (  # noqa: E402
 30    kf_predict,
 31    kf_update,
 32)
 33from pytcl.dynamic_models import (  # noqa: E402
 34    f_constant_velocity,
 35    q_constant_velocity,
 36)
 37from pytcl.performance_evaluation import (  # noqa: E402; Track metrics
 38    ospa,
 39)
 40
 41
 42def ospa_demo() -> None:
 43    """Demonstrate OSPA metric for multi-target tracking evaluation."""
 44    print("=" * 60)
 45    print("1. OSPA METRIC FOR MULTI-TARGET TRACKING")
 46    print("=" * 60)
 47
 48    print("\nOSPA (Optimal Sub-Pattern Assignment) measures the distance")
 49    print("between two sets of targets, accounting for:")
 50    print("  - Localization errors (how far are matched targets?)")
 51    print("  - Cardinality errors (how many targets are missed/false?)")
 52
 53    # Ground truth targets (2D positions)
 54    truth = np.array(
 55        [
 56            [10.0, 20.0],
 57            [50.0, 30.0],
 58            [80.0, 60.0],
 59        ]
 60    )
 61
 62    print(f"\nGround truth: {len(truth)} targets")
 63    for i, pos in enumerate(truth):
 64        print(f"  Target {i+1}: ({pos[0]:.1f}, {pos[1]:.1f})")
 65
 66    # Different estimate scenarios
 67    scenarios = [
 68        ("Perfect tracking", np.array([[10.0, 20.0], [50.0, 30.0], [80.0, 60.0]])),
 69        ("Small errors", np.array([[12.0, 22.0], [48.0, 32.0], [82.0, 58.0]])),
 70        ("One missed target", np.array([[10.0, 20.0], [50.0, 30.0]])),
 71        (
 72            "One false target",
 73            np.array([[10.0, 20.0], [50.0, 30.0], [80.0, 60.0], [100.0, 100.0]]),
 74        ),
 75        ("Wrong positions", np.array([[0.0, 0.0], [100.0, 100.0], [50.0, 50.0]])),
 76    ]
 77
 78    # OSPA parameters
 79    c = 50.0  # Cutoff distance (max penalty per target)
 80    p = 2  # Order parameter
 81
 82    print(f"\nOSPA parameters: c={c}, p={p}")
 83    print("-" * 60)
 84
 85    for name, estimates in scenarios:
 86        result = ospa(truth, estimates, c=c, p=p)
 87
 88        print(f"\n{name}:")
 89        print(f"  Estimates: {len(estimates)} targets")
 90        print(f"  OSPA distance: {result.ospa:.2f}")
 91        print(f"  Localization: {result.localization:.2f}")
 92        print(f"  Cardinality:  {result.cardinality:.2f}")
 93
 94
 95def nees_consistency_demo() -> None:
 96    """Demonstrate NEES for filter consistency evaluation."""
 97    print("\n" + "=" * 60)
 98    print("2. NEES FOR FILTER CONSISTENCY")
 99    print("=" * 60)
100
101    print("\nNEES (Normalized Estimation Error Squared) tests if the filter's")
102    print("covariance estimate matches the actual estimation errors.")
103    print("For a consistent filter, NEES should average to the state dimension.")
104
105    np.random.seed(42)
106
107    # Simulate a simple 2D tracking scenario
108    n_steps = 100
109    dt = 1.0
110
111    # System matrices
112    F = f_constant_velocity(dt, 2)  # 4-state CV model
113    Q = q_constant_velocity(dt, 0.1, 2)
114    H = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])  # Measure position
115    R = np.diag([5.0**2, 5.0**2])
116
117    # True initial state
118    x_true = np.array([0.0, 2.0, 0.0, 1.0])
119
120    # Run filter with correct process noise (consistent)
121    print("\n--- Correctly Tuned Filter ---")
122    nees_correct = run_filter_get_nees(x_true, F, Q, H, R, Q, n_steps)
123
124    # Run filter with underestimated process noise (optimistic)
125    print("\n--- Underestimated Process Noise (Optimistic) ---")
126    Q_low = q_constant_velocity(dt, 0.01, 2)  # Too low
127    nees_optimistic = run_filter_get_nees(x_true, F, Q, H, R, Q_low, n_steps)
128
129    # Run filter with overestimated process noise (conservative)
130    print("\n--- Overestimated Process Noise (Conservative) ---")
131    Q_high = q_constant_velocity(dt, 1.0, 2)  # Too high
132    nees_conservative = run_filter_get_nees(x_true, F, Q, H, R, Q_high, n_steps)
133
134    # Statistical test
135    state_dim = 4
136    print(f"\n{'Filter Type':<30} {'Mean NEES':<12} {'Expected':<12} {'Consistent?'}")
137    print("-" * 70)
138
139    for name, nees_vals in [
140        ("Correctly tuned", nees_correct),
141        ("Optimistic (low Q)", nees_optimistic),
142        ("Conservative (high Q)", nees_conservative),
143    ]:
144        mean_nees = np.mean(nees_vals)
145        # Chi-squared bounds (95% confidence)
146        lower = state_dim * 0.5  # Rough approximation
147        upper = state_dim * 1.5
148        consistent = lower <= mean_nees <= upper
149        status = "Yes" if consistent else "No"
150
151        print(f"{name:<30} {mean_nees:<12.2f} {state_dim:<12} {status}")
152
153
154def run_filter_get_nees(
155    x_true_init: np.ndarray,
156    F: np.ndarray,
157    Q_true: np.ndarray,
158    H: np.ndarray,
159    R: np.ndarray,
160    Q_filter: np.ndarray,
161    n_steps: int,
162) -> List[float]:
163    """Run Kalman filter and compute NEES at each step."""
164    # Generate true trajectory
165    x_true = x_true_init.copy()
166    true_states = [x_true.copy()]
167
168    for _ in range(n_steps - 1):
169        # Process noise
170        w = np.random.multivariate_normal(np.zeros(4), Q_true)
171        x_true = F @ x_true + w
172        true_states.append(x_true.copy())
173
174    # Generate measurements
175    measurements = []
176    for x in true_states:
177        v = np.random.multivariate_normal(np.zeros(2), R)
178        z = H @ x + v
179        measurements.append(z)
180
181    # Run filter
182    x = np.array([measurements[0][0], 0.0, measurements[0][1], 0.0])
183    P = np.diag([25.0, 10.0, 25.0, 10.0])
184
185    nees_values = []
186
187    for k in range(n_steps):
188        if k > 0:
189            x, P = kf_predict(x, P, F, Q_filter)
190            result = kf_update(x, P, measurements[k], H, R)
191            x, P = result.x, result.P
192
193        # Compute NEES
194        err = x - true_states[k]
195        nees_val = float(err.T @ np.linalg.solve(P, err))
196        nees_values.append(nees_val)
197
198    return nees_values
199
200
201def monte_carlo_demo() -> Tuple[List[float], List[float], List[float]]:
202    """Demonstrate Monte Carlo evaluation of tracker performance."""
203    print("\n" + "=" * 60)
204    print("3. MONTE CARLO TRACKER EVALUATION")
205    print("=" * 60)
206
207    print("\nMonte Carlo simulation runs multiple trials to get")
208    print("statistically meaningful performance metrics.")
209
210    np.random.seed(123)
211
212    n_runs = 50
213    n_steps = 100
214    dt = 1.0
215
216    # System setup
217    F = f_constant_velocity(dt, 2)
218    Q = q_constant_velocity(dt, 0.1, 2)
219    H = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
220    R = np.diag([5.0**2, 5.0**2])
221
222    print(f"\nRunning {n_runs} Monte Carlo trials...")
223    print(f"  {n_steps} time steps per trial")
224
225    # Collect metrics across runs
226    all_rmse = []
227    all_nees = []
228    all_nis = []
229
230    for run in range(n_runs):
231        # Random initial state
232        x_true = np.array(
233            [
234                np.random.uniform(-50, 50),  # x
235                np.random.uniform(1, 3),  # vx
236                np.random.uniform(-50, 50),  # y
237                np.random.uniform(1, 3),  # vy
238            ]
239        )
240
241        # Generate trajectory and measurements
242        true_states = [x_true.copy()]
243        measurements = []
244
245        for _ in range(n_steps):
246            # Measurement
247            v = np.random.multivariate_normal(np.zeros(2), R)
248            z = H @ x_true + v
249            measurements.append(z)
250
251            # Propagate
252            w = np.random.multivariate_normal(np.zeros(4), Q)
253            x_true = F @ x_true + w
254            true_states.append(x_true.copy())
255
256        true_states = true_states[:-1]  # Match measurement count
257
258        # Run filter
259        x = np.array([measurements[0][0], 0.0, measurements[0][1], 0.0])
260        P = np.diag([25.0, 10.0, 25.0, 10.0])
261
262        run_errors = []
263        run_nees = []
264        run_nis = []
265
266        for k in range(n_steps):
267            if k > 0:
268                x, P = kf_predict(x, P, F, Q)
269                z_pred = H @ x
270                S = H @ P @ H.T + R
271                innovation = measurements[k] - z_pred
272
273                # NIS
274                nis_val = float(innovation.T @ np.linalg.solve(S, innovation))
275                run_nis.append(nis_val)
276
277                result = kf_update(x, P, measurements[k], H, R)
278                x, P = result.x, result.P
279
280            # Position error
281            err = np.sqrt(
282                (x[0] - true_states[k][0]) ** 2 + (x[2] - true_states[k][2]) ** 2
283            )
284            run_errors.append(err)
285
286            # NEES
287            state_err = x - true_states[k]
288            nees_val = float(state_err.T @ np.linalg.solve(P, state_err))
289            run_nees.append(nees_val)
290
291        all_rmse.append(np.sqrt(np.mean(np.array(run_errors) ** 2)))
292        all_nees.append(np.mean(run_nees))
293        all_nis.append(np.mean(run_nis))
294
295    # Report statistics
296    print("\nResults across all Monte Carlo runs:")
297    print("-" * 60)
298
299    print("\nPosition RMSE (m):")
300    print(f"  Mean:   {np.mean(all_rmse):.2f}")
301    print(f"  Std:    {np.std(all_rmse):.2f}")
302    print(f"  Min:    {np.min(all_rmse):.2f}")
303    print(f"  Max:    {np.max(all_rmse):.2f}")
304
305    print("\nNEES (expected: 4.0 for 4-state filter):")
306    print(f"  Mean:   {np.mean(all_nees):.2f}")
307    print(f"  Std:    {np.std(all_nees):.2f}")
308
309    print("\nNIS (expected: 2.0 for 2-measurement filter):")
310    print(f"  Mean:   {np.mean(all_nis):.2f}")
311    print(f"  Std:    {np.std(all_nis):.2f}")
312
313    # Chi-squared test
314    print("\nConsistency check:")
315    nees_pass = 3.0 <= np.mean(all_nees) <= 5.0
316    nis_pass = 1.5 <= np.mean(all_nis) <= 2.5
317    print(f"  NEES within bounds: {'PASS' if nees_pass else 'FAIL'}")
318    print(f"  NIS within bounds:  {'PASS' if nis_pass else 'FAIL'}")
319
320    return all_rmse, all_nees, all_nis
321
322
323def ospa_over_time_demo() -> Tuple[List[float], List[float], List[float]]:
324    """Demonstrate OSPA metric evolution over time."""
325    print("\n" + "=" * 60)
326    print("4. OSPA OVER TIME FOR TRACKER EVALUATION")
327    print("=" * 60)
328
329    np.random.seed(456)
330
331    n_steps = 50
332
333    # Simulate ground truth: 2 targets, one appears at t=10, one disappears at t=40
334    print("\nSimulating scenario with target birth/death:")
335    print("  - Target 1: present throughout")
336    print("  - Target 2: appears at t=10, disappears at t=40")
337
338    # Generate trajectories
339    truth_history = []
340    for t in range(n_steps):
341        targets = []
342        # Target 1: always present, moving right
343        targets.append(np.array([10.0 + t * 2, 30.0 + t * 0.5]))
344
345        # Target 2: appears at t=10, disappears at t=40
346        if 10 <= t < 40:
347            targets.append(np.array([80.0 - t * 1.5, 20.0 + t * 1.0]))
348
349        truth_history.append(
350            np.array(targets) if targets else np.array([]).reshape(0, 2)
351        )
352
353    # Simulate tracker estimates (with some noise and occasional misses)
354    estimate_history = []
355    for t, truth in enumerate(truth_history):
356        estimates = []
357        for target in truth:
358            # 90% detection probability
359            if np.random.rand() < 0.9:
360                noise = np.random.randn(2) * 5.0
361                estimates.append(target + noise)
362
363        # 10% false alarm probability
364        if np.random.rand() < 0.1:
365            false_alarm = np.array(
366                [np.random.uniform(0, 100), np.random.uniform(0, 80)]
367            )
368            estimates.append(false_alarm)
369
370        estimate_history.append(
371            np.array(estimates) if estimates else np.array([]).reshape(0, 2)
372        )
373
374    # Compute OSPA over time
375    c = 50.0
376    p = 2
377    ospa_history = []
378    loc_history = []
379    card_history = []
380
381    for truth, estimates in zip(truth_history, estimate_history):
382        if len(truth) == 0 and len(estimates) == 0:
383            ospa_history.append(0.0)
384            loc_history.append(0.0)
385            card_history.append(0.0)
386        else:
387            result = ospa(truth, estimates, c=c, p=p)
388            ospa_history.append(result.ospa)
389            loc_history.append(result.localization)
390            card_history.append(result.cardinality)
391
392    # Summary
393    print(f"\nOSPA statistics over {n_steps} time steps:")
394    print(f"  Mean OSPA:         {np.mean(ospa_history):.2f}")
395    print(f"  Mean Localization: {np.mean(loc_history):.2f}")
396    print(f"  Mean Cardinality:  {np.mean(card_history):.2f}")
397
398    return ospa_history, loc_history, card_history
399
400
401def plot_results(
402    mc_rmse: List[float],
403    mc_nees: List[float],
404    mc_nis: List[float],
405    ospa_hist: List[float],
406    loc_hist: List[float],
407    card_hist: List[float],
408) -> None:
409    """Create performance evaluation plots."""
410    fig = make_subplots(
411        rows=2,
412        cols=2,
413        subplot_titles=(
414            "Monte Carlo RMSE Distribution",
415            "NEES/NIS Distribution",
416            "OSPA Over Time",
417            "OSPA Components",
418        ),
419    )
420
421    # RMSE histogram
422    fig.add_trace(
423        go.Histogram(x=mc_rmse, nbinsx=20, name="Position RMSE", marker_color="blue"),
424        row=1,
425        col=1,
426    )
427
428    # NEES/NIS histograms
429    fig.add_trace(
430        go.Histogram(
431            x=mc_nees,
432            nbinsx=20,
433            name="NEES",
434            marker_color="green",
435            opacity=0.7,
436        ),
437        row=1,
438        col=2,
439    )
440    fig.add_trace(
441        go.Histogram(
442            x=mc_nis,
443            nbinsx=20,
444            name="NIS",
445            marker_color="orange",
446            opacity=0.7,
447        ),
448        row=1,
449        col=2,
450    )
451
452    # OSPA over time
453    time = list(range(len(ospa_hist)))
454    fig.add_trace(
455        go.Scatter(x=time, y=ospa_hist, name="OSPA", line=dict(color="red", width=2)),
456        row=2,
457        col=1,
458    )
459
460    # OSPA components
461    fig.add_trace(
462        go.Scatter(
463            x=time,
464            y=loc_hist,
465            name="Localization",
466            line=dict(color="blue", width=1.5),
467        ),
468        row=2,
469        col=2,
470    )
471    fig.add_trace(
472        go.Scatter(
473            x=time,
474            y=card_hist,
475            name="Cardinality",
476            line=dict(color="green", width=1.5),
477        ),
478        row=2,
479        col=2,
480    )
481
482    fig.update_layout(
483        title="Performance Evaluation Metrics",
484        height=800,
485        width=1200,
486        showlegend=True,
487        barmode="overlay",
488    )
489
490    fig.update_xaxes(title_text="RMSE (m)", row=1, col=1)
491    fig.update_yaxes(title_text="Count", row=1, col=1)
492    fig.update_xaxes(title_text="Value", row=1, col=2)
493    fig.update_yaxes(title_text="Count", row=1, col=2)
494    fig.update_xaxes(title_text="Time Step", row=2, col=1)
495    fig.update_yaxes(title_text="OSPA", row=2, col=1)
496    fig.update_xaxes(title_text="Time Step", row=2, col=2)
497    fig.update_yaxes(title_text="Component Value", row=2, col=2)
498
499    fig.write_html(str(OUTPUT_DIR / "performance_evaluation.html"))
500    print("\nInteractive plot saved to performance_evaluation.html")
501    fig.show()
502
503
504def main() -> None:
505    """Run performance evaluation demonstrations."""
506    print("\nPerformance Evaluation Examples")
507    print("=" * 60)
508    print("Demonstrating pytcl tracker and filter evaluation metrics")
509
510    ospa_demo()
511    nees_consistency_demo()
512    mc_rmse, mc_nees, mc_nis = monte_carlo_demo()
513    ospa_hist, loc_hist, card_hist = ospa_over_time_demo()
514
515    # Generate plots
516    try:
517        plot_results(mc_rmse, mc_nees, mc_nis, ospa_hist, loc_hist, card_hist)
518    except Exception as e:
519        print(f"\nCould not generate plots: {e}")
520
521    print("\n" + "=" * 60)
522    print("Done!")
523    print("=" * 60)
524
525
526if __name__ == "__main__":
527    main()

Running the Example

python examples/performance_evaluation.py

See Also