Performance Evaluation
This example demonstrates tracking performance metrics and evaluation.
Overview
Evaluating tracker performance requires multiple metrics:
OSPA/GOSPA: Optimal Sub-Pattern Assignment distance
RMSE: Root Mean Square Error for localization
Track statistics: Purity, fragmentation, switches
Detection metrics: Probability of detection, false alarm rate
OSPA Metric
OSPA combines localization error and cardinality error:
Localization: Distance between matched targets
Cardinality: Penalty for missed/false targets
Order parameter (p): Controls metric sensitivity
Cutoff (c): Maximum localization error
Key Concepts
Track-to-truth assignment: Matching estimated tracks to ground truth
Track purity: Fraction of time track follows same target
Track fragmentation: Number of track breaks per target
ID switches: Number of times track switches targets
Code Highlights
The example demonstrates:
Computing OSPA at each time step with
ospa()OSPA over time with
ospa_over_time()NEES/NIS consistency metrics
Track-to-truth assignment and purity calculation
ROC curves for detection performance
Source Code
1"""
2Performance Evaluation Example.
3
4This example demonstrates:
51. OSPA (Optimal Sub-Pattern Assignment) metric for multi-target tracking
62. NEES (Normalized Estimation Error Squared) for filter consistency
73. NIS (Normalized Innovation Squared) for measurement consistency
84. Monte Carlo simulation for tracker evaluation
95. Track quality metrics (purity, fragmentation)
10
11Run with: python examples/performance_evaluation.py
12"""
13
14import sys
15from pathlib import Path
16
17sys.path.insert(0, str(Path(__file__).parent.parent))
18
19# Output directory for generated plots
20OUTPUT_DIR = Path(__file__).parent.parent / "docs" / "_static" / "images" / "examples"
21OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
22
23from typing import List, Tuple # noqa: E402
24
25import numpy as np # noqa: E402
26import plotly.graph_objects as go # noqa: E402
27from plotly.subplots import make_subplots # noqa: E402
28
29from pytcl.dynamic_estimation import ( # noqa: E402
30 kf_predict,
31 kf_update,
32)
33from pytcl.dynamic_models import ( # noqa: E402
34 f_constant_velocity,
35 q_constant_velocity,
36)
37from pytcl.performance_evaluation import ( # noqa: E402; Track metrics
38 ospa,
39)
40
41
42def ospa_demo() -> None:
43 """Demonstrate OSPA metric for multi-target tracking evaluation."""
44 print("=" * 60)
45 print("1. OSPA METRIC FOR MULTI-TARGET TRACKING")
46 print("=" * 60)
47
48 print("\nOSPA (Optimal Sub-Pattern Assignment) measures the distance")
49 print("between two sets of targets, accounting for:")
50 print(" - Localization errors (how far are matched targets?)")
51 print(" - Cardinality errors (how many targets are missed/false?)")
52
53 # Ground truth targets (2D positions)
54 truth = np.array(
55 [
56 [10.0, 20.0],
57 [50.0, 30.0],
58 [80.0, 60.0],
59 ]
60 )
61
62 print(f"\nGround truth: {len(truth)} targets")
63 for i, pos in enumerate(truth):
64 print(f" Target {i+1}: ({pos[0]:.1f}, {pos[1]:.1f})")
65
66 # Different estimate scenarios
67 scenarios = [
68 ("Perfect tracking", np.array([[10.0, 20.0], [50.0, 30.0], [80.0, 60.0]])),
69 ("Small errors", np.array([[12.0, 22.0], [48.0, 32.0], [82.0, 58.0]])),
70 ("One missed target", np.array([[10.0, 20.0], [50.0, 30.0]])),
71 (
72 "One false target",
73 np.array([[10.0, 20.0], [50.0, 30.0], [80.0, 60.0], [100.0, 100.0]]),
74 ),
75 ("Wrong positions", np.array([[0.0, 0.0], [100.0, 100.0], [50.0, 50.0]])),
76 ]
77
78 # OSPA parameters
79 c = 50.0 # Cutoff distance (max penalty per target)
80 p = 2 # Order parameter
81
82 print(f"\nOSPA parameters: c={c}, p={p}")
83 print("-" * 60)
84
85 for name, estimates in scenarios:
86 result = ospa(truth, estimates, c=c, p=p)
87
88 print(f"\n{name}:")
89 print(f" Estimates: {len(estimates)} targets")
90 print(f" OSPA distance: {result.ospa:.2f}")
91 print(f" Localization: {result.localization:.2f}")
92 print(f" Cardinality: {result.cardinality:.2f}")
93
94
95def nees_consistency_demo() -> None:
96 """Demonstrate NEES for filter consistency evaluation."""
97 print("\n" + "=" * 60)
98 print("2. NEES FOR FILTER CONSISTENCY")
99 print("=" * 60)
100
101 print("\nNEES (Normalized Estimation Error Squared) tests if the filter's")
102 print("covariance estimate matches the actual estimation errors.")
103 print("For a consistent filter, NEES should average to the state dimension.")
104
105 np.random.seed(42)
106
107 # Simulate a simple 2D tracking scenario
108 n_steps = 100
109 dt = 1.0
110
111 # System matrices
112 F = f_constant_velocity(dt, 2) # 4-state CV model
113 Q = q_constant_velocity(dt, 0.1, 2)
114 H = np.array([[1, 0, 0, 0], [0, 0, 1, 0]]) # Measure position
115 R = np.diag([5.0**2, 5.0**2])
116
117 # True initial state
118 x_true = np.array([0.0, 2.0, 0.0, 1.0])
119
120 # Run filter with correct process noise (consistent)
121 print("\n--- Correctly Tuned Filter ---")
122 nees_correct = run_filter_get_nees(x_true, F, Q, H, R, Q, n_steps)
123
124 # Run filter with underestimated process noise (optimistic)
125 print("\n--- Underestimated Process Noise (Optimistic) ---")
126 Q_low = q_constant_velocity(dt, 0.01, 2) # Too low
127 nees_optimistic = run_filter_get_nees(x_true, F, Q, H, R, Q_low, n_steps)
128
129 # Run filter with overestimated process noise (conservative)
130 print("\n--- Overestimated Process Noise (Conservative) ---")
131 Q_high = q_constant_velocity(dt, 1.0, 2) # Too high
132 nees_conservative = run_filter_get_nees(x_true, F, Q, H, R, Q_high, n_steps)
133
134 # Statistical test
135 state_dim = 4
136 print(f"\n{'Filter Type':<30} {'Mean NEES':<12} {'Expected':<12} {'Consistent?'}")
137 print("-" * 70)
138
139 for name, nees_vals in [
140 ("Correctly tuned", nees_correct),
141 ("Optimistic (low Q)", nees_optimistic),
142 ("Conservative (high Q)", nees_conservative),
143 ]:
144 mean_nees = np.mean(nees_vals)
145 # Chi-squared bounds (95% confidence)
146 lower = state_dim * 0.5 # Rough approximation
147 upper = state_dim * 1.5
148 consistent = lower <= mean_nees <= upper
149 status = "Yes" if consistent else "No"
150
151 print(f"{name:<30} {mean_nees:<12.2f} {state_dim:<12} {status}")
152
153
154def run_filter_get_nees(
155 x_true_init: np.ndarray,
156 F: np.ndarray,
157 Q_true: np.ndarray,
158 H: np.ndarray,
159 R: np.ndarray,
160 Q_filter: np.ndarray,
161 n_steps: int,
162) -> List[float]:
163 """Run Kalman filter and compute NEES at each step."""
164 # Generate true trajectory
165 x_true = x_true_init.copy()
166 true_states = [x_true.copy()]
167
168 for _ in range(n_steps - 1):
169 # Process noise
170 w = np.random.multivariate_normal(np.zeros(4), Q_true)
171 x_true = F @ x_true + w
172 true_states.append(x_true.copy())
173
174 # Generate measurements
175 measurements = []
176 for x in true_states:
177 v = np.random.multivariate_normal(np.zeros(2), R)
178 z = H @ x + v
179 measurements.append(z)
180
181 # Run filter
182 x = np.array([measurements[0][0], 0.0, measurements[0][1], 0.0])
183 P = np.diag([25.0, 10.0, 25.0, 10.0])
184
185 nees_values = []
186
187 for k in range(n_steps):
188 if k > 0:
189 x, P = kf_predict(x, P, F, Q_filter)
190 result = kf_update(x, P, measurements[k], H, R)
191 x, P = result.x, result.P
192
193 # Compute NEES
194 err = x - true_states[k]
195 nees_val = float(err.T @ np.linalg.solve(P, err))
196 nees_values.append(nees_val)
197
198 return nees_values
199
200
201def monte_carlo_demo() -> Tuple[List[float], List[float], List[float]]:
202 """Demonstrate Monte Carlo evaluation of tracker performance."""
203 print("\n" + "=" * 60)
204 print("3. MONTE CARLO TRACKER EVALUATION")
205 print("=" * 60)
206
207 print("\nMonte Carlo simulation runs multiple trials to get")
208 print("statistically meaningful performance metrics.")
209
210 np.random.seed(123)
211
212 n_runs = 50
213 n_steps = 100
214 dt = 1.0
215
216 # System setup
217 F = f_constant_velocity(dt, 2)
218 Q = q_constant_velocity(dt, 0.1, 2)
219 H = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
220 R = np.diag([5.0**2, 5.0**2])
221
222 print(f"\nRunning {n_runs} Monte Carlo trials...")
223 print(f" {n_steps} time steps per trial")
224
225 # Collect metrics across runs
226 all_rmse = []
227 all_nees = []
228 all_nis = []
229
230 for run in range(n_runs):
231 # Random initial state
232 x_true = np.array(
233 [
234 np.random.uniform(-50, 50), # x
235 np.random.uniform(1, 3), # vx
236 np.random.uniform(-50, 50), # y
237 np.random.uniform(1, 3), # vy
238 ]
239 )
240
241 # Generate trajectory and measurements
242 true_states = [x_true.copy()]
243 measurements = []
244
245 for _ in range(n_steps):
246 # Measurement
247 v = np.random.multivariate_normal(np.zeros(2), R)
248 z = H @ x_true + v
249 measurements.append(z)
250
251 # Propagate
252 w = np.random.multivariate_normal(np.zeros(4), Q)
253 x_true = F @ x_true + w
254 true_states.append(x_true.copy())
255
256 true_states = true_states[:-1] # Match measurement count
257
258 # Run filter
259 x = np.array([measurements[0][0], 0.0, measurements[0][1], 0.0])
260 P = np.diag([25.0, 10.0, 25.0, 10.0])
261
262 run_errors = []
263 run_nees = []
264 run_nis = []
265
266 for k in range(n_steps):
267 if k > 0:
268 x, P = kf_predict(x, P, F, Q)
269 z_pred = H @ x
270 S = H @ P @ H.T + R
271 innovation = measurements[k] - z_pred
272
273 # NIS
274 nis_val = float(innovation.T @ np.linalg.solve(S, innovation))
275 run_nis.append(nis_val)
276
277 result = kf_update(x, P, measurements[k], H, R)
278 x, P = result.x, result.P
279
280 # Position error
281 err = np.sqrt(
282 (x[0] - true_states[k][0]) ** 2 + (x[2] - true_states[k][2]) ** 2
283 )
284 run_errors.append(err)
285
286 # NEES
287 state_err = x - true_states[k]
288 nees_val = float(state_err.T @ np.linalg.solve(P, state_err))
289 run_nees.append(nees_val)
290
291 all_rmse.append(np.sqrt(np.mean(np.array(run_errors) ** 2)))
292 all_nees.append(np.mean(run_nees))
293 all_nis.append(np.mean(run_nis))
294
295 # Report statistics
296 print("\nResults across all Monte Carlo runs:")
297 print("-" * 60)
298
299 print("\nPosition RMSE (m):")
300 print(f" Mean: {np.mean(all_rmse):.2f}")
301 print(f" Std: {np.std(all_rmse):.2f}")
302 print(f" Min: {np.min(all_rmse):.2f}")
303 print(f" Max: {np.max(all_rmse):.2f}")
304
305 print("\nNEES (expected: 4.0 for 4-state filter):")
306 print(f" Mean: {np.mean(all_nees):.2f}")
307 print(f" Std: {np.std(all_nees):.2f}")
308
309 print("\nNIS (expected: 2.0 for 2-measurement filter):")
310 print(f" Mean: {np.mean(all_nis):.2f}")
311 print(f" Std: {np.std(all_nis):.2f}")
312
313 # Chi-squared test
314 print("\nConsistency check:")
315 nees_pass = 3.0 <= np.mean(all_nees) <= 5.0
316 nis_pass = 1.5 <= np.mean(all_nis) <= 2.5
317 print(f" NEES within bounds: {'PASS' if nees_pass else 'FAIL'}")
318 print(f" NIS within bounds: {'PASS' if nis_pass else 'FAIL'}")
319
320 return all_rmse, all_nees, all_nis
321
322
323def ospa_over_time_demo() -> Tuple[List[float], List[float], List[float]]:
324 """Demonstrate OSPA metric evolution over time."""
325 print("\n" + "=" * 60)
326 print("4. OSPA OVER TIME FOR TRACKER EVALUATION")
327 print("=" * 60)
328
329 np.random.seed(456)
330
331 n_steps = 50
332
333 # Simulate ground truth: 2 targets, one appears at t=10, one disappears at t=40
334 print("\nSimulating scenario with target birth/death:")
335 print(" - Target 1: present throughout")
336 print(" - Target 2: appears at t=10, disappears at t=40")
337
338 # Generate trajectories
339 truth_history = []
340 for t in range(n_steps):
341 targets = []
342 # Target 1: always present, moving right
343 targets.append(np.array([10.0 + t * 2, 30.0 + t * 0.5]))
344
345 # Target 2: appears at t=10, disappears at t=40
346 if 10 <= t < 40:
347 targets.append(np.array([80.0 - t * 1.5, 20.0 + t * 1.0]))
348
349 truth_history.append(
350 np.array(targets) if targets else np.array([]).reshape(0, 2)
351 )
352
353 # Simulate tracker estimates (with some noise and occasional misses)
354 estimate_history = []
355 for t, truth in enumerate(truth_history):
356 estimates = []
357 for target in truth:
358 # 90% detection probability
359 if np.random.rand() < 0.9:
360 noise = np.random.randn(2) * 5.0
361 estimates.append(target + noise)
362
363 # 10% false alarm probability
364 if np.random.rand() < 0.1:
365 false_alarm = np.array(
366 [np.random.uniform(0, 100), np.random.uniform(0, 80)]
367 )
368 estimates.append(false_alarm)
369
370 estimate_history.append(
371 np.array(estimates) if estimates else np.array([]).reshape(0, 2)
372 )
373
374 # Compute OSPA over time
375 c = 50.0
376 p = 2
377 ospa_history = []
378 loc_history = []
379 card_history = []
380
381 for truth, estimates in zip(truth_history, estimate_history):
382 if len(truth) == 0 and len(estimates) == 0:
383 ospa_history.append(0.0)
384 loc_history.append(0.0)
385 card_history.append(0.0)
386 else:
387 result = ospa(truth, estimates, c=c, p=p)
388 ospa_history.append(result.ospa)
389 loc_history.append(result.localization)
390 card_history.append(result.cardinality)
391
392 # Summary
393 print(f"\nOSPA statistics over {n_steps} time steps:")
394 print(f" Mean OSPA: {np.mean(ospa_history):.2f}")
395 print(f" Mean Localization: {np.mean(loc_history):.2f}")
396 print(f" Mean Cardinality: {np.mean(card_history):.2f}")
397
398 return ospa_history, loc_history, card_history
399
400
401def plot_results(
402 mc_rmse: List[float],
403 mc_nees: List[float],
404 mc_nis: List[float],
405 ospa_hist: List[float],
406 loc_hist: List[float],
407 card_hist: List[float],
408) -> None:
409 """Create performance evaluation plots."""
410 fig = make_subplots(
411 rows=2,
412 cols=2,
413 subplot_titles=(
414 "Monte Carlo RMSE Distribution",
415 "NEES/NIS Distribution",
416 "OSPA Over Time",
417 "OSPA Components",
418 ),
419 )
420
421 # RMSE histogram
422 fig.add_trace(
423 go.Histogram(x=mc_rmse, nbinsx=20, name="Position RMSE", marker_color="blue"),
424 row=1,
425 col=1,
426 )
427
428 # NEES/NIS histograms
429 fig.add_trace(
430 go.Histogram(
431 x=mc_nees,
432 nbinsx=20,
433 name="NEES",
434 marker_color="green",
435 opacity=0.7,
436 ),
437 row=1,
438 col=2,
439 )
440 fig.add_trace(
441 go.Histogram(
442 x=mc_nis,
443 nbinsx=20,
444 name="NIS",
445 marker_color="orange",
446 opacity=0.7,
447 ),
448 row=1,
449 col=2,
450 )
451
452 # OSPA over time
453 time = list(range(len(ospa_hist)))
454 fig.add_trace(
455 go.Scatter(x=time, y=ospa_hist, name="OSPA", line=dict(color="red", width=2)),
456 row=2,
457 col=1,
458 )
459
460 # OSPA components
461 fig.add_trace(
462 go.Scatter(
463 x=time,
464 y=loc_hist,
465 name="Localization",
466 line=dict(color="blue", width=1.5),
467 ),
468 row=2,
469 col=2,
470 )
471 fig.add_trace(
472 go.Scatter(
473 x=time,
474 y=card_hist,
475 name="Cardinality",
476 line=dict(color="green", width=1.5),
477 ),
478 row=2,
479 col=2,
480 )
481
482 fig.update_layout(
483 title="Performance Evaluation Metrics",
484 height=800,
485 width=1200,
486 showlegend=True,
487 barmode="overlay",
488 )
489
490 fig.update_xaxes(title_text="RMSE (m)", row=1, col=1)
491 fig.update_yaxes(title_text="Count", row=1, col=1)
492 fig.update_xaxes(title_text="Value", row=1, col=2)
493 fig.update_yaxes(title_text="Count", row=1, col=2)
494 fig.update_xaxes(title_text="Time Step", row=2, col=1)
495 fig.update_yaxes(title_text="OSPA", row=2, col=1)
496 fig.update_xaxes(title_text="Time Step", row=2, col=2)
497 fig.update_yaxes(title_text="Component Value", row=2, col=2)
498
499 fig.write_html(str(OUTPUT_DIR / "performance_evaluation.html"))
500 print("\nInteractive plot saved to performance_evaluation.html")
501 fig.show()
502
503
504def main() -> None:
505 """Run performance evaluation demonstrations."""
506 print("\nPerformance Evaluation Examples")
507 print("=" * 60)
508 print("Demonstrating pytcl tracker and filter evaluation metrics")
509
510 ospa_demo()
511 nees_consistency_demo()
512 mc_rmse, mc_nees, mc_nis = monte_carlo_demo()
513 ospa_hist, loc_hist, card_hist = ospa_over_time_demo()
514
515 # Generate plots
516 try:
517 plot_results(mc_rmse, mc_nees, mc_nis, ospa_hist, loc_hist, card_hist)
518 except Exception as e:
519 print(f"\nCould not generate plots: {e}")
520
521 print("\n" + "=" * 60)
522 print("Done!")
523 print("=" * 60)
524
525
526if __name__ == "__main__":
527 main()
Running the Example
python examples/performance_evaluation.py
See Also
Multi-Target Tracking - Tracker to evaluate
Assignment Algorithms - Assignment for track-truth matching