diff --git a/api/api_tests/internal/transform/test_embed_text.py b/api/api_tests/internal/transform/test_embed_text.py
index a30adbc62..b524f90bd 100644
--- a/api/api_tests/internal/transform/test_embed_text.py
+++ b/api/api_tests/internal/transform/test_embed_text.py
@@ -369,10 +369,12 @@ def test_async_request_handler_empty_prompts_list(mock_make_async_request):
     assert result == []
 
 
-@patch("nv_ingest_api.internal.transform.embed_text.infer_microservice")
-def test_make_async_request_happy_path(im_mock):
+@patch("nv_ingest_api.util.nim.infer_microservice")
+@patch(f"{MODULE_UNDER_TEST}.infer_microservice", create=True)
+def test_make_async_request_happy_path(module_im_mock, nim_im_mock):
     # Assign
-    im_mock.return_value = [[0.1, 0.2, 0.3]]
+    nim_im_mock.return_value = [[0.1, 0.2, 0.3]]
+    module_im_mock.return_value = [[0.1, 0.2, 0.3]]
     # Act
     result = module_under_test._make_async_request(
         prompts=["Hello world"],
@@ -385,8 +387,8 @@ def test_make_async_request_happy_path(im_mock):
         filter_errors=False,
         dimensions=None,
     )
-    # Assert: client called as expected
-    im_mock.assert_called_once_with(
+    # Assert: client called as expected (module-level import is used in embed_text)
+    module_im_mock.assert_called_once_with(
         ["Hello world"],
         "dummy_model",
         embedding_endpoint="http://dummy-endpoint",
@@ -403,10 +405,12 @@ def test_make_async_request_happy_path(im_mock):
     assert result == {"embedding": [[0.1, 0.2, 0.3]], "info_msg": None}
 
 
-@patch("nv_ingest_api.internal.transform.embed_text.infer_microservice")
-def test_make_async_request_failure_returns_none_embedding_and_info_message(im_mock):
+@patch("nv_ingest_api.util.nim.infer_microservice")
+@patch(f"{MODULE_UNDER_TEST}.infer_microservice", create=True)
+def test_make_async_request_failure_returns_none_embedding_and_info_message(module_im_mock, nim_im_mock):
     # Arrange
-    im_mock.side_effect = RuntimeError("Simulated client failure")
+    nim_im_mock.side_effect = RuntimeError("Simulated client failure")
+    module_im_mock.side_effect = RuntimeError("Simulated client failure")
 
     # Act & Assert
     with pytest.raises(RuntimeError) as excinfo:
diff --git a/api/src/nv_ingest_api/internal/primitives/tracing/tagging.py b/api/src/nv_ingest_api/internal/primitives/tracing/tagging.py
index dd771152d..75715bd57 100644
--- a/api/src/nv_ingest_api/internal/primitives/tracing/tagging.py
+++ b/api/src/nv_ingest_api/internal/primitives/tracing/tagging.py
@@ -15,7 +15,7 @@
 
 def traceable(trace_name: Optional[str] = None):
     """
-    A decorator that adds entry and exit trace timestamps to a IngestControlMessage's metadata
+    A decorator that adds entry and exit trace timestamps to an IngestControlMessage's metadata
     based on the presence of a 'config::add_trace_tagging' flag.
 
     This decorator checks if the 'config::add_trace_tagging' flag is set to True in the
@@ -37,7 +37,7 @@ def traceable(trace_name: Optional[str] = None):
 
     Notes
     -----
-    The decorated function must accept a IngestControlMessage object as one of its arguments.
+    The decorated function must accept an IngestControlMessage object as one of its arguments.
     For a regular function, this is expected to be the first argument; for a class method,
     this is expected to be the second argument (after 'self'). The IngestControlMessage object
     must implement `has_metadata`, `get_metadata`, and `set_metadata` methods used by the decorator
@@ -51,7 +51,7 @@ def traceable(trace_name: Optional[str] = None):
     --------
     Automatic stage name detection (recommended):
 
-    >>> @traceable()  # Uses self.stage_name automatically
+    >>> @traceable() # Uses self.stage_name automatically
     ... def process_message(self, message):
     ...     pass
 
@@ -253,14 +253,14 @@ def set_trace_timestamps_with_parent_context(control_message, execution_trace_lo
     --------
     Basic usage in a stage:
 
-    >>> execution_trace_log = {"trace::entry::yolox_inference": ts1, "trace::exit::yolox_inference": ts2}
+    >>> execution_trace_log = {"trace::entry::yolox_inference": ts1, "trace::exit::yolox_inference": ts2} # noqa
     >>> set_trace_timestamps_with_parent_context(
     ...     control_message, execution_trace_log, "pdf_extractor", logger
     ... )
 
     This transforms:
     - trace::entry::yolox_inference -> trace::entry::pdf_extractor::yolox_inference
-    - trace::exit::yolox_inference  -> trace::exit::pdf_extractor::yolox_inference
+    - trace::exit::yolox_inference -> trace::exit::pdf_extractor::yolox_inference
     """
     if not execution_trace_log:
         return
diff --git a/assets/style.css b/assets/style.css
new file mode 100644
index 000000000..36fe2ebba
--- /dev/null
+++ b/assets/style.css
@@ -0,0 +1,111 @@
+/* Global fonts and tokens */
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
+
+:root {
+  --bg: #0f1115;
+  --surface: #151821;
+  --surface-2: #1b2030;
+  --text: #e5e7eb;
+  --text-muted: #9aa3b2;
+  --border: #2a2f3d;
+  --accent: #4f46e5;
+  --accent-2: #06b6d4;
+  --success: #10b981;
+  --danger: #ef4444;
+  --warning: #f59e0b;
+  --radius: 8px;
+  --shadow: 0 1px 2px rgba(0,0,0,0.25), 0 8px 24px rgba(0,0,0,0.18);
+  --gap-xs: 4px; --gap-sm: 8px; --gap-md: 12px; --gap-lg: 16px; --gap-xl: 24px;
+}
+
+/* Light theme support via data-theme=light on body (optional) */
+body[data-theme="light"] {
+  --bg: #ffffff;
+  --surface: #f6f7fb;
+  --surface-2: #eef1f7;
+  --text: #111827;
+  --text-muted: #6b7280;
+  --border: #e5e7eb;
+}
+
+html, body { height: 100%; }
+body {
+  margin: 0;
+  background: var(--bg);
+  color: var(--text);
+  font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
+  font-size: 14px;
+  line-height: 1.5;
+}
+
+/* Layout */
+.grid { display: grid; grid-template-columns: 300px 1fr; gap: var(--gap-lg); align-items: start; }
+.sidebar {
+  position: sticky; top: var(--gap-lg);
+  padding: var(--gap-lg);
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: var(--radius);
+  box-shadow: var(--shadow);
+}
+.section-title { font-size: 16px; font-weight: 600; margin: var(--gap-sm) 0; }
+.label { color: var(--text-muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.04em; margin-bottom: 4px; display: block; }
+.help, .muted { color: var(--text-muted); font-size: 12px; }
+.control { margin: var(--gap-md) 0; }
+.kpis { display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: var(--gap-md); margin: var(--gap-sm) 0 var(--gap-lg); }
+.graph { margin-bottom: var(--gap-lg); background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: var(--gap-sm); }
+
+/* Inputs & Buttons */
+input, textarea {
+  background: var(--surface-2);
+  color: var(--text);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  padding: 8px 10px;
+}
+input::placeholder, textarea::placeholder { color: var(--text-muted); }
+button, .button {
+  appearance: none; border: 1px solid var(--border); background: var(--surface-2);
+  color: var(--text); padding: 6px 12px; border-radius: 6px; cursor: pointer;
+}
+button:hover { border-color: color-mix(in srgb, var(--accent) 35%, var(--border)); }
+button.primary { background: var(--accent); border-color: var(--accent); color: #fff; }
+button.primary:hover { background: color-mix(in srgb, var(--accent) 85%, #000); }
+
+/* Dash core components */
+/* RadioItems */
+input[type="radio"] { accent-color: var(--accent); }
+input[type="checkbox"] { accent-color: var(--accent); }
+
+/* Dropdown (react-select) */
+.Select-control { background: var(--surface-2); border-color: var(--border); color: var(--text); }
+.Select--single > .Select-control .Select-value, .Select-placeholder { color: var(--text); }
+.Select-menu-outer { background: var(--surface-2); border-color: var(--border); color: var(--text); z-index: 1000; }
+.Select-option { background: var(--surface-2); color: var(--text); }
+.Select-option.is-focused { background: color-mix(in srgb, var(--accent) 14%, var(--surface-2)); }
+.Select-option.is-selected { background: color-mix(in srgb, var(--accent) 28%, var(--surface-2)); }
+
+/* Slider (rc-slider) */
+.rc-slider { padding: 6px 0; }
+.rc-slider-rail { background: var(--border); }
+.rc-slider-track { background: var(--accent); }
+.rc-slider-handle { border-color: var(--accent); background: #fff; }
+
+/* Tabs */
+.tabs, .dash-tabs { background: transparent; }
+.tab { background: var(--surface); border: 1px solid var(--border) !important; color: var(--text); border-radius: 6px 6px 0 0; margin-right: 4px; padding: 8px 12px; }
+.tab--selected { border-bottom-color: transparent !important; background: var(--surface-2); }
+
+/* Upload */
+.dccUpload { border: 1px dashed var(--border); border-radius: 6px; padding: 6px 10px; color: var(--text-muted); }
+
+/* Cytoscape container */
+#proctree-cyto-container { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); }
+#proctree-graph { background: var(--surface); }
+
+/* Helper spacing */
+hr { border: none; border-top: 1px solid var(--border); margin: var(--gap-md) 0; opacity: 0.7; }
+
+/* Plotly figure background harmonization */
+.js-plotly-plot .plotly .bg, .js-plotly-plot .plotly .bglayer, .js-plotly-plot .plotly .plot { background: transparent !important; }
+.js-plotly-plot .plotly .infolayer { color: var(--text); }
diff --git a/client/src/nv_ingest_client/client/ingest_job_handler.py b/client/src/nv_ingest_client/client/ingest_job_handler.py
index c3c9af815..c2f81dd2a 100644
--- a/client/src/nv_ingest_client/client/ingest_job_handler.py
+++ b/client/src/nv_ingest_client/client/ingest_job_handler.py
@@ -259,6 +259,59 @@ def _save_response_data(
         clean_doc_name = os.path.basename(doc_name)
         output_name = f"{clean_doc_name}.metadata.json"
 
+        # Additionally, write out parallel files for trace timings and annotations (if present)
+        try:
+            os.makedirs(output_directory, exist_ok=True)
+        except Exception:
+            # Best-effort; directory should generally exist already
+            pass
+
+        # Build primitive breakdown from response_data
+        primitive_total: int = 0
+        primitive_counts_by_type: Dict[str, int] = defaultdict(int)
+        structured_by_subtype: Dict[str, int] = defaultdict(int)
+        try:
+            for document in response_data:
+                # Each document is treated as one primitive
+                primitive_total += 1
+                meta: Dict[str, Any] = document.get("metadata", {})
+                content_meta: Dict[str, Any] = meta.get("content_metadata", {})
+                doc_type: str = content_meta.get("type", "unknown")
+                primitive_counts_by_type[doc_type] += 1
+                if doc_type == "structured":
+                    subtype: str = content_meta.get("subtype", "unknown")
+                    structured_by_subtype[subtype] += 1
+        except Exception:
+            # Be resilient; don't let counting failures block output
+            pass
+
+        # Merge trace (if any) with primitive counts and always write a traces file
+        try:
+            trace_obj = response.get("trace") or response.get("traces") or {}
+            trace_out = dict(trace_obj)
+            trace_out["primitive_counts"] = {
+                "total": primitive_total,
+                "by_type": dict(primitive_counts_by_type),
+                "structured_by_subtype": dict(structured_by_subtype),
+            }
+
+            trace_path = os.path.join(output_directory, f"{clean_doc_name}.traces.json")
+            with open(trace_path, "w") as f:
+                f.write(json.dumps(trace_out, indent=2))
+            logger.debug("Wrote trace output to %s", trace_path)
+        except Exception as e:
+            logger.error("Failed to write traces for %s: %s", clean_doc_name, e)
+
+        annotations_obj = response.get("annotations")
+        if annotations_obj:
+            try:
+                annotations_path = os.path.join(output_directory, f"{clean_doc_name}.annotations.json")
+                with open(annotations_path, "w") as f:
+                    f.write(json.dumps(annotations_obj, indent=2))
+                logger.debug("Wrote annotations output to %s", annotations_path)
+            except Exception as e:
+                logger.error("Failed to write annotations for %s: %s", clean_doc_name, e)
+
         # Organize by document type
         doc_map: Dict[str, List[Dict[str, Any]]] = {}
         for document in response_data:
diff --git a/scripts/support/__init__.py b/scripts/support/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/support/system_monitor/README.md b/scripts/support/system_monitor/README.md
new file mode 100644
index 000000000..a7c3ef1d7
--- /dev/null
+++ b/scripts/support/system_monitor/README.md
@@ -0,0 +1,166 @@
+# System Monitor
+
+A lightweight system tracing and dashboard toolkit with two primary workflows:
+
+1) Real-time monitoring: start/stop tracing directly from the dashboard UI and visualize live metrics.
+2) Offline exploration: collect Parquet/CSV on one system and explore it on another system without a running tracer.
+
+This package contains:
+- `system_monitor.py`: Dash dashboard (UI, charts, event annotations, process tree).
+- `system_tracer.py`: Tracer library and CLI (collects metrics, writes Parquet atomically).
+
+
+## Requirements
+
+- Python 3.9+
+- Core: `pandas`, `psutil`, `plotly`, `dash`, `click`
+- Optional:
+  - `pyarrow` (preferred) or `fastparquet` for Parquet
+  - `dash-cytoscape` for process tree graph view (text view works without it)
+  - `docker` Python package and a local Docker daemon for container metrics
+  - `pynvml` for NVIDIA GPU metrics (if NVIDIA drivers are present)
+
+Install common dependencies (example):
+
+```bash
+pip install pandas psutil plotly dash click pyarrow
+# Optional extras
+pip install fastparquet dash-cytoscape docker pynvml
+```
+
+Environment setup
+- Ensure the `system_monitor` package is importable at the top level. If you are running from the repo without installing, set PYTHONPATH so `python -m system_monitor` works:
+
+```bash
+export PYTHONPATH=$(pwd)/scripts/support:$PYTHONPATH
+```
+
+Alternatively, install the package into your environment (recommended for reuse). If you maintain a packaging config, use `pip install -e .` at the repo root.
+
+
+## Quickstart A: Real-time Monitoring (single machine)
+
+Launch the dashboard on the machine you want to monitor and control tracing from the UI.
+
+```bash
+python -m system_monitor --datafile system_monitor.parquet --port 8050
+```
+
+- Open the URL printed (default http://0.0.0.0:8050).
+- In the left sidebar under Tracing:
+  - Set Output Parquet Path (defaults to `system_monitor.parquet`).
+  - Adjust Sampling and Write Interval.
+  - Toggle Enable GPU / Enable Docker as needed.
+  - Click Start to begin live tracing. Click Stop to end. Click Snapshot Now to force an immediate write.
+- The graphs update as data is written. You can:
+  - Switch theme (Light/Dark)
+  - Change time range and smoothing
+  - Add/import events and toggle event markers
+  - Inspect the process tree (text or Cytoscape if installed)
+
+Notes
+- Writes are atomic (tmp + replace) to avoid partial reads.
+- If `pyarrow` is unavailable, the tracer falls back to `fastparquet` via pandas.
+- Docker/GPU stats are optional and automatically disabled if their deps/daemons are unavailable.
+- Timezones: Graphs default to Local display time. You can switch graphs to UTC or a custom IANA zone. Use the "Data timezone (source)" selector if your data was recorded in UTC.
+
+
+## Quickstart B: Offline Data Collection and Exploration (two machines)
+
+Use one system to collect data (headless), then transfer the file to another system for exploration in the dashboard.
+
+1) Collect on Source (headless CLI):
+
+```bash
+# Run continuously until interrupted (local timestamps by default)
+python -m system_monitor.system_tracer run \
+  --output /tmp/system_monitor.parquet \
+  --sample-interval 2 \
+  --write-interval 10
+
+# Record timestamps in UTC instead of local
+python -m system_monitor.system_tracer run \
+  --output /tmp/system_monitor_utc.parquet \
+  --sample-interval 2 \
+  --write-interval 10 \
+  --utc
+
+# Or run for a fixed duration (e.g., 5 minutes)
+python -m system_monitor.system_tracer run \
+  --output /tmp/system_monitor.parquet \
+  --sample-interval 2 \
+  --write-interval 10 \
+  --duration 300
+```
+
+2) Transfer the Parquet/CSV to your analysis machine:
+
+```bash
+scp source:/tmp/system_monitor.parquet ./
+```
+
+3) Explore on Destination (no tracer needed):
+
+```bash
+python -m system_monitor --datafile ./system_monitor.parquet --port 8050
+```
+
+- The dashboard loads the provided file and renders metrics.
+- Tracing controls in the UI only affect the local machine; they are independent of the loaded file.
+
+
+## Process Tree Inspection
+
+From dashboard:
+- Go to the Process Tree view.
+- Enter a PID or use the PID finder to search by name/command.
+- Click Inspect to load the tree and thread counts.
+- If `dash-cytoscape` is installed, toggle to Graph view for a visual tree with node details.
+
+From CLI:
+
+```bash
+python -m system_monitor.system_tracer proctree <PID>
+```
+
+
+## Events and Timezones
+
+- Add events via the sidebar date/time picker.
+- Import CSV with two columns: `event,timestamp`. Timestamps are normalized internally.
+- Display timezone can be set to Local, UTC, or a custom IANA zone. Event markers and data align accordingly.
+
+
+## Tips & Troubleshooting
+
+- Parquet engines: Install `pyarrow` for best compatibility. `fastparquet` is used as a fallback.
+- Docker metrics: Requires the Docker daemon running and the `docker` Python package. If unavailable, container graphs will be empty.
+- GPU metrics: Requires `pynvml` and NVIDIA drivers. If unavailable, GPU graphs will be empty.
+- Assets override: Set `SYSTEM_MONITOR_ASSETS` to point to a custom assets directory if desired.
+- Permissions: Some process/thread info may require elevated privileges; run as a user with sufficient permissions if you see AccessDenied errors.
+
+
+## Programmatic API (optional)
+
+Collect a one-off snapshot in Python:
+
+```python
+from system_monitor.system_tracer import collect_system_snapshot
+snap = collect_system_snapshot(enable_gpu=False, enable_docker=False)
+```
+
+Run tracer to Parquet in-process:
+
+```python
+from system_monitor.system_tracer import monitor_to_parquet
+monitor_to_parquet(output_file="system_monitor.parquet", sample_interval=2, write_interval=10)
+```
+
+UTC vs Local
+- Tracer defaults to local timestamps. Pass `--utc` to record timestamps in UTC.
+- Dashboard defaults to Local display time. Use the "Display timezone" control to switch to UTC or a custom IANA zone, and "Data timezone (source)" to inform the dashboard whether your stored data timestamps are Local or UTC.
+
+
+## License
+
+Internal project module; follow repository licensing and contribution guidelines.
diff --git a/scripts/support/system_monitor/__init__.py b/scripts/support/system_monitor/__init__.py
new file mode 100644
index 000000000..9939ca4e2
--- /dev/null
+++ b/scripts/support/system_monitor/__init__.py
@@ -0,0 +1,11 @@
+"""System Monitor package.
+
+Provides the canonical entry point for the dashboard and tracer.
+
+Usage:
+    python -m system_monitor --datafile system_monitor.parquet
+"""
+
+from .system_monitor import run_dashboard
+
+__all__ = ["run_dashboard"]
diff --git a/scripts/support/system_monitor/__main__.py b/scripts/support/system_monitor/__main__.py
new file mode 100644
index 000000000..24b1cf381
--- /dev/null
+++ b/scripts/support/system_monitor/__main__.py
@@ -0,0 +1,19 @@
+# flake8: noqa
+# noqa
+"""
+Module entry point for the System Monitor dashboard.
+
+Run with:
+    python -m system_monitor --datafile system_monitor.parquet
+"""
+import os
+
+_HERE = os.path.abspath(os.path.dirname(__file__))
+_PKG_ASSETS = os.path.join(_HERE, "assets")
+if os.path.isdir(_PKG_ASSETS) and not os.environ.get("SYSTEM_MONITOR_ASSETS"):
+    os.environ["SYSTEM_MONITOR_ASSETS"] = _PKG_ASSETS
+
+from .system_monitor import run_dashboard
+
+if __name__ == "__main__":
+    run_dashboard()
diff --git a/scripts/support/system_monitor/assets/style.css b/scripts/support/system_monitor/assets/style.css
new file mode 100644
index 000000000..ea4fa438a
--- /dev/null
+++ b/scripts/support/system_monitor/assets/style.css
@@ -0,0 +1,137 @@
+/* Global fonts and tokens */
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
+
+:root {
+  --bg: #0f1115;
+  --surface: #151821;
+  --surface-2: #1b2030;
+  --text: #e5e7eb;
+  --text-muted: #9aa3b2;
+  --border: #2a2f3d;
+  --accent: #4f46e5;
+  --accent-2: #06b6d4;
+  --success: #10b981;
+  --danger: #ef4444;
+  --warning: #f59e0b;
+  --radius: 8px;
+  --shadow: 0 1px 2px rgba(0,0,0,0.25), 0 8px 24px rgba(0,0,0,0.18);
+  --gap-xs: 4px; --gap-sm: 8px; --gap-md: 12px; --gap-lg: 16px; --gap-xl: 24px;
+}
+
+/* Light theme support via data-theme=light on body (optional) */
+body[data-theme="light"] {
+  --bg: #ffffff;
+  --surface: #f6f7fb;
+  --surface-2: #eef1f7;
+  --text: #111827;
+  --text-muted: #6b7280;
+  --border: #e5e7eb;
+}
+
+html, body { height: 100%; }
+body {
+  margin: 0;
+  background: var(--bg);
+  color: var(--text);
+  font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
+  font-size: 14px;
+  line-height: 1.5;
+}
+
+/* Layout */
+.grid { display: grid; grid-template-columns: 320px 1fr; gap: var(--gap-lg); align-items: start; }
+.sidebar {
+  position: sticky; top: var(--gap-lg);
+  padding: var(--gap-lg);
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: var(--radius);
+  box-shadow: var(--shadow);
+}
+.section-title { font-size: 16px; font-weight: 600; margin: var(--gap-sm) 0; }
+.label { color: var(--text-muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.04em; margin-bottom: 4px; display: block; }
+.help, .muted { color: var(--text-muted); font-size: 12px; }
+.control { margin: var(--gap-md) 0; }
+.kpis { display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: var(--gap-md); margin: var(--gap-sm) 0 var(--gap-lg); }
+.graph { margin-bottom: var(--gap-lg); background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: var(--gap-sm); }
+
+/* Inputs & Buttons */
+input, textarea {
+  background: var(--surface-2);
+  color: var(--text);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  padding: 8px 10px;
+}
+input::placeholder, textarea::placeholder { color: var(--text-muted); }
+button, .button {
+  appearance: none; border: 1px solid var(--border); background: var(--surface-2);
+  color: var(--text); padding: 6px 12px; border-radius: 6px; cursor: pointer;
+}
+button:hover { border-color: color-mix(in srgb, var(--accent) 35%, var(--border)); }
+button.primary { background: var(--accent); border-color: var(--accent); color: #fff; }
+button.primary:hover { background: color-mix(in srgb, var(--accent) 85%, #000); }
+
+/* Dash core components */
+/* RadioItems */
+input[type="radio"] { accent-color: var(--accent); }
+input[type="checkbox"] { accent-color: var(--accent); }
+
+/* Dropdown (react-select) */
+.Select-control { background: var(--surface-2); border-color: var(--border); color: var(--text); }
+.Select--single > .Select-control .Select-value, .Select-placeholder { color: var(--text); }
+.Select-menu-outer { background: var(--surface-2); border-color: var(--border); color: var(--text); z-index: 1000; }
+.Select-option { background: var(--surface-2); color: var(--text); }
+.Select-option.is-focused { background: color-mix(in srgb, var(--accent) 14%, var(--surface-2)); }
+.Select-option.is-selected { background: color-mix(in srgb, var(--accent) 28%, var(--surface-2)); }
+
+/* Slider (rc-slider) */
+.rc-slider { padding: 6px 0; }
+.rc-slider-rail { background: var(--border); }
+.rc-slider-track { background: var(--accent); }
+.rc-slider-handle { border-color: var(--accent); background: #fff; }
+
+/* Tabs */
+.tabs, .dash-tabs { background: transparent; }
+.tab { background: var(--surface); border: 1px solid var(--border) !important; color: var(--text); border-radius: 6px 6px 0 0; margin-right: 4px; padding: 8px 12px; }
+.tab--selected { border-bottom-color: transparent !important; background: var(--surface-2); }
+
+/* Upload */
+.dccUpload { border: 1px dashed var(--border); border-radius: 6px; padding: 6px 10px; color: var(--text-muted); }
+
+/* Cytoscape container */
+#proctree-cyto-container { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); }
+#proctree-graph { background: var(--surface); }
+
+/* Helper spacing */
+hr { border: none; border-top: 1px solid var(--border); margin: var(--gap-md) 0; opacity: 0.7; }
+
+/* Plotly figure background harmonization */
+.js-plotly-plot .plotly .bg, .js-plotly-plot .plotly .bglayer, .js-plotly-plot .plotly .plot { background: transparent !important; }
+.js-plotly-plot .plotly .infolayer { color: var(--text); }
+
+/* Sidebar spacing & control layout improvements */
+/* Add breathing room between stacked elements inside controls */
+.sidebar .control > *:not(:last-child) { margin-bottom: var(--gap-sm); }
+
+/* Space out button groups (e.g., Start/Stop/Reset/Snapshot) */
+.sidebar .control button,
+.sidebar .control .button { margin-right: var(--gap-sm); margin-bottom: var(--gap-sm); }
+
+/* Make text/number inputs full-width and comfortable in the sidebar */
+.sidebar .control input[type="text"],
+.sidebar .control input[type="number"] { width: 100%; box-sizing: border-box; }
+
+/* RadioItems and Checklist: wrap and space options nicely */
+.sidebar .control .dash-radio-items,
+.sidebar .control .dash-checklist { display: flex; flex-wrap: wrap; gap: 6px 12px; }
+.sidebar .control .dash-radio-items label,
+.sidebar .control .dash-checklist label { margin: 0; display: inline-flex; align-items: center; gap: 6px; }
+
+/* Upload widget spacing */
+.sidebar .control .dccUpload { margin-top: var(--gap-sm); }
+
+/* Details sections: subtle separators and clickable summaries */
+.sidebar details { padding: 8px 0; border-top: 1px solid var(--border); }
+.sidebar details:first-of-type { border-top: none; }
+.sidebar details > summary { cursor: pointer; padding: 6px 0; }
diff --git a/scripts/support/system_monitor/callbacks/__init__.py b/scripts/support/system_monitor/callbacks/__init__.py
new file mode 100644
index 000000000..e51c9195c
--- /dev/null
+++ b/scripts/support/system_monitor/callbacks/__init__.py
@@ -0,0 +1,28 @@
+from typing import Any
+
+
+def register_callbacks(app: Any, *, cy_available: bool) -> None:
+    """Register all dashboard callbacks grouped by domain.
+
+    This function aggregates domain callback registration to keep the main
+    system_monitor.py body declarative.
+
+    Parameters
+    ----------
+    app : Any
+        Dash app instance.
+    cy_available : bool
+        Whether dash_cytoscape is available (for Process Tree graph callbacks).
+    """
+    from .overview import register_overview_callbacks
+    from .proctree_impl import register_proctree_callbacks
+    from .events import register_events_callbacks
+    from .containers import register_containers_callbacks
+    from .theme import register_theme_callbacks
+
+    # Order can matter for readability; functional independence is maintained.
+    register_overview_callbacks(app)
+    register_proctree_callbacks(app, cy_available=cy_available)
+    register_events_callbacks(app)
+    register_containers_callbacks(app)
+    register_theme_callbacks(app)
diff --git a/scripts/support/system_monitor/callbacks/containers.py b/scripts/support/system_monitor/callbacks/containers.py
new file mode 100644
index 000000000..aae098b62
--- /dev/null
+++ b/scripts/support/system_monitor/callbacks/containers.py
@@ -0,0 +1,10 @@
+from typing import Any
+
+
+def register_containers_callbacks(app: Any) -> None:
+    """Register containers tab callbacks.
+
+    Placeholder registrar; existing callbacks remain in system_monitor.py
+    until migrated.
+    """
+    return
diff --git a/scripts/support/system_monitor/callbacks/events.py b/scripts/support/system_monitor/callbacks/events.py
new file mode 100644
index 000000000..7b1d8fc3d
--- /dev/null
+++ b/scripts/support/system_monitor/callbacks/events.py
@@ -0,0 +1,10 @@
+from typing import Any
+
+
+def register_events_callbacks(app: Any) -> None:
+    """Register events and annotations callbacks.
+
+    Placeholder registrar; existing callbacks remain in system_monitor.py
+    until migrated.
+    """
+    return
diff --git a/scripts/support/system_monitor/callbacks/overview.py b/scripts/support/system_monitor/callbacks/overview.py
new file mode 100644
index 000000000..f981c2858
--- /dev/null
+++ b/scripts/support/system_monitor/callbacks/overview.py
@@ -0,0 +1,10 @@
+from typing import Any
+
+
+def register_overview_callbacks(app: Any) -> None:
+    """Register overview tab callbacks.
+
+    Placeholder registrar; existing callbacks remain in system_monitor.py
+    until migrated.
+    """
+    return
diff --git a/scripts/support/system_monitor/callbacks/proctree.py b/scripts/support/system_monitor/callbacks/proctree.py
new file mode 100644
index 000000000..2541ab43a
--- /dev/null
+++ b/scripts/support/system_monitor/callbacks/proctree.py
@@ -0,0 +1,293 @@
+if True:
+    from typing import Any
+    from datetime import datetime
+    import dash
+    from dash import dcc
+    from dash.dependencies import Output, Input, State
+    import psutil
+
+    def register_proctree_callbacks(app: Any, *, cy_available: bool) -> None:
+        # PID search: suggest processes matching a search string (similar to `ps -AFl | grep <pattern>`)
+        @app.callback(
+            Output("proctree-suggestions", "options"),
+            [Input("proctree-search", "value")],
+            prevent_initial_call=False,
+        )
+        def update_proctree_suggestions(search_text):
+            opts = []
+            try:
+                pattern = (search_text or "").strip()
+                if not pattern:
+                    return []
+                pattern_low = pattern.lower()
+                # Collect processes with safe attribute access
+                matches = []
+                for p in psutil.process_iter(attrs=["pid", "name", "username", "cmdline", "num_threads"]):
+                    try:
+                        info = p.info
+                        pid = info.get("pid")
+                        name = info.get("name") or ""
+                        username = info.get("username") or ""
+                        cmdline_list = info.get("cmdline") or []
+                        cmdline = " ".join(cmdline_list)
+                        haystack = f"{name} {cmdline}".lower()
+                        if pattern_low in haystack:
+                            threads = info.get("num_threads") or 0
+                            label = f"PID {pid} • {username} • thr={threads} • {name} — {cmdline}".strip()
+                            matches.append({"label": label[:300], "value": pid})
+                    except Exception:
+                        continue
+                # Limit to first 50
+                opts = matches[:50]
+            except Exception:
+                opts = []
+            return opts
+
+        # When a suggestion is selected, set the PID input
+        @app.callback(
+            Output("proctree-pid", "value"),
+            [Input("proctree-suggestions", "value")],
+            prevent_initial_call=True,
+        )
+        def set_pid_from_selection(selected_pid):
+            try:
+                if selected_pid is None:
+                    return dash.no_update
+                return int(selected_pid)
+            except Exception:
+                return dash.no_update
+
+        # Theme-aware high-contrast styles for PID search controls
+        @app.callback(
+            Output("proctree-suggestions", "style"),
+            [Input("theme-toggle", "value")],
+        )
+        def style_proctree_dropdown(theme_value):
+            light = {
+                "width": "420px",
+                "display": "inline-block",
+                "color": "#111",
+                "backgroundColor": "#ffffff",
+                "border": "1px solid #888",
+            }
+            dark = {
+                "width": "420px",
+                "display": "inline-block",
+                "color": "#eee",
+                "backgroundColor": "#222",
+                "border": "1px solid #555",
+            }
+            return dark if theme_value == "dark" else light
+
+        @app.callback(
+            Output("proctree-search", "style"),
+            [Input("theme-toggle", "value")],
+        )
+        def style_proctree_search(theme_value):
+            base = {"width": "320px", "marginLeft": "6px", "marginRight": "8px"}
+            if theme_value == "dark":
+                base.update({"backgroundColor": "#222", "color": "#eee", "border": "1px solid #555"})
+            else:
+                base.update({"backgroundColor": "#fff", "color": "#111", "border": "1px solid #888"})
+            return base
+
+        # Toggle between text and graph tree containers
+        @app.callback(
+            [Output("proctree-cyto-container", "style"), Output("proctree-tree-text-container", "style")],
+            [Input("proctree-view-mode", "value")],
+        )
+        def toggle_tree_view(view_mode):
+            # Always show the Graph container when Graph is selected, even if dash-cytoscape
+            # is not installed, so the fallback help message is visible.
+            if view_mode == "graph":
+                return {"display": "block"}, {"display": "none"}
+            # default to text view
+            return {"display": "none"}, {"display": "block"}
+
+        # Build cytoscape elements from last summary
+        if cy_available:
+
+            @app.callback(
+                Output("proctree-graph", "elements"),
+                [Input("proctree-last-summary", "data")],
+            )
+            def build_cytoscape_elements(summary):
+                try:
+                    plist = (summary or {}).get("processes", [])
+                    if not plist:
+                        return []
+                    nodes = []
+                    edges = []
+                    pids = set()
+                    for p in plist:
+                        pid = p.get("pid")
+                        name = p.get("name") or "?"
+                        threads = int(p.get("threads") or 0)
+                        pids.add(pid)
+                        nodes.append(
+                            {
+                                "data": {
+                                    "id": str(pid),
+                                    "label": f"{name}({pid}) t={threads}",
+                                    "threads": threads,
+                                }
+                            }
+                        )
+                    for p in plist:
+                        pid = p.get("pid")
+                        ppid = p.get("ppid")
+                        if ppid in pids and pid in pids and ppid is not None and pid is not None:
+                            edges.append({"data": {"source": str(ppid), "target": str(pid)}})
+                    return nodes + edges
+                except Exception:
+                    return []
+
+            @app.callback(
+                Output("proctree-graph", "stylesheet"),
+                [Input("theme-toggle", "value")],
+            )
+            def cytoscape_stylesheet(theme_value):
+                # map threads to size/color
+                node_color_dark = "#4aa3ff"
+                node_color_light = "#1f77b4"
+                text_color_dark = "#e5e5e5"
+                text_color_light = "#222222"
+                edge_color_dark = "#888"
+                edge_color_light = "#aaa"
+                base = [
+                    {
+                        "selector": "node",
+                        "style": {
+                            "label": "data(label)",
+                            "font-size": 10,
+                            "color": (text_color_dark if theme_value == "dark" else text_color_light),
+                            "background-color": (node_color_dark if theme_value == "dark" else node_color_light),
+                            "width": "mapData(threads, 0, 64, 20, 60)",
+                            "height": "mapData(threads, 0, 64, 20, 60)",
+                            "text-valign": "center",
+                            "text-halign": "center",
+                        },
+                    },
+                    {
+                        "selector": "edge",
+                        "style": {
+                            "line-color": (edge_color_dark if theme_value == "dark" else edge_color_light),
+                            "target-arrow-color": (edge_color_dark if theme_value == "dark" else edge_color_light),
+                            "target-arrow-shape": "triangle",
+                            "curve-style": "bezier",
+                            "width": 1.5,
+                        },
+                    },
+                ]
+                return base
+
+            @app.callback(
+                Output("proctree-node-details", "children"),
+                [Input("proctree-graph", "tapNodeData")],
+                [State("proctree-last-summary", "data")],
+            )
+            def show_node_details(tap_node, summary):
+                try:
+                    if not tap_node:
+                        return ""
+                    pid = int(tap_node.get("id"))
+                    plist = (summary or {}).get("processes", [])
+                    ent = next((p for p in plist if p.get("pid") == pid), None)
+                    if not ent:
+                        return ""
+                    name = ent.get("name") or "?"
+                    ppid = ent.get("ppid")
+                    threads = ent.get("threads")
+                    return f"Selected: {name} ({pid}) — PPID={ppid}, Threads={threads}"
+                except Exception:
+                    return ""
+
+            # Force a layout re-run whenever elements change
+            @app.callback(
+                Output("proctree-graph", "layout"),
+                [Input("proctree-graph", "elements")],
+            )
+            def refresh_cyto_layout(elements):
+                return {"name": "breadthfirst", "directed": True}
+
+        # Status helper under the graph container
+        @app.callback(
+            Output("proctree-graph-status", "children"),
+            [Input("proctree-view-mode", "value"), Input("proctree-last-summary", "data")],
+        )
+        def update_graph_status(view_mode, summary):
+            if view_mode != "graph":
+                return ""
+            if not cy_available:
+                return "Graph view requires dash-cytoscape. Install with: pip install dash-cytoscape"
+            plist = (summary or {}).get("processes", [])
+            if not plist:
+                return "No graph data yet. Click 'Inspect' after entering a valid PID."
+            # count edges
+            by_ppid = {}
+            for p in plist:
+                by_ppid.setdefault(p.get("ppid"), []).append(p)
+            edge_count = sum(len(v) for k, v in by_ppid.items() if k is not None)
+            return f"Graph ready: {len(plist)} node(s), {edge_count} edge(s). Tip: click a node to see details."
+
+        # Snapshot current summary
+        @app.callback(
+            [Output("proctree-snapshot", "data"), Output("proctree-snapshot-status", "children")],
+            [Input("proctree-snapshot-btn", "n_clicks")],
+            [State("proctree-last-summary", "data")],
+            prevent_initial_call=True,
+        )
+        def take_snapshot(n_clicks, summary):
+            try:
+                if not summary or not summary.get("processes"):
+                    return dash.no_update, "No current tree to snapshot. Run Inspect first."
+                ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                snap = {"timestamp": ts, "summary": summary}
+                return snap, f"Snapshot captured at {ts}."
+            except Exception:
+                return dash.no_update, "Failed to capture snapshot."
+
+        # Diff current summary to snapshot
+        @app.callback(
+            Output("proctree-diff-result", "children"),
+            [Input("proctree-diff-btn", "n_clicks")],
+            [State("proctree-snapshot", "data"), State("proctree-last-summary", "data")],
+            prevent_initial_call=True,
+        )
+        def diff_to_snapshot(n_clicks, snapshot, current):
+            try:
+                if not snapshot or not snapshot.get("summary"):
+                    return "No snapshot available. Click 'Take Snapshot' after Inspect."
+                snap = snapshot.get("summary") or {}
+                snap_plist = snap.get("processes", [])
+                cur_plist = (current or {}).get("processes", [])
+                snap_by_pid = {p.get("pid"): p for p in snap_plist}
+                cur_by_pid = {p.get("pid"): p for p in cur_plist}
+                added = sorted([pid for pid in cur_by_pid.keys() if pid not in snap_by_pid])
+                removed = sorted([pid for pid in snap_by_pid.keys() if pid not in cur_by_pid])
+                changed = []
+                for pid in set(cur_by_pid.keys()).intersection(snap_by_pid.keys()):
+                    t0 = int(snap_by_pid[pid].get("threads") or 0)
+                    t1 = int(cur_by_pid[pid].get("threads") or 0)
+                    if t0 != t1:
+                        changed.append((pid, t0, t1))
+                lines = []
+                lines.append(f"Added: {len(added)}")
+                if added:
+                    lines.extend([f"  + {pid}" for pid in added[:50]])
+                    if len(added) > 50:
+                        lines.append("  …")
+                lines.append(f"Removed: {len(removed)}")
+                if removed:
+                    lines.extend([f"  - {pid}" for pid in removed[:50]])
+                    if len(removed) > 50:
+                        lines.append("  …")
+                lines.append(f"Thread changes: {len(changed)}")
+                if changed:
+                    for pid, t0, t1 in changed[:50]:
+                        lines.append(f"  ~ {pid}: {t0} -> {t1}")
+                    if len(changed) > 50:
+                        lines.append("  …")
+                return dcc.Markdown("```text\n" + "\n".join(lines) + "\n```")
+            except Exception:
+                return "Diff failed."
diff --git a/scripts/support/system_monitor/callbacks/theme.py b/scripts/support/system_monitor/callbacks/theme.py
new file mode 100644
index 000000000..bee8e234a
--- /dev/null
+++ b/scripts/support/system_monitor/callbacks/theme.py
@@ -0,0 +1,10 @@
+from typing import Any
+
+
+def register_theme_callbacks(app: Any) -> None:
+    """Register theme-related callbacks.
+
+    Placeholder registrar; existing callbacks remain in system_monitor.py
+    until migrated.
+    """
+    return
diff --git a/scripts/support/system_monitor/layout.py b/scripts/support/system_monitor/layout.py
new file mode 100644
index 000000000..8b47769d3
--- /dev/null
+++ b/scripts/support/system_monitor/layout.py
@@ -0,0 +1,1219 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# pylint: skip-file
+# flake8: noqa
+
+from typing import Any
+from dash import dcc, html
+
+try:
+    import dash_cytoscape as cy  # type: ignore
+except Exception:  # pragma: no cover
+    cy = None
+
+
+def build_layout(*, datafile: str, interval: int, cy_available: bool, cy_module: Any = None):
+    """Build the System Monitor dashboard layout (sidebar + tabs + stores + interval).
+
+    Parameters
+    ----------
+    datafile : str
+        Default datafile path to display in the Output Parquet Path field and initial store.
+    interval : int
+        Refresh interval in seconds for the dcc.Interval component.
+    cy_available : bool
+        If True, render the Cytoscape process tree graph; otherwise show helper text.
+    cy_module : Any, optional
+        The dash_cytoscape module to use if available; if None, falls back to local import if present.
+    """
+    _cy = cy_module if cy_module is not None else cy
+
+    return html.Div(
+        [
+            # Stores
+            dcc.Store(id="datafile-store", data=datafile, storage_type="local"),
+            dcc.Store(id="event-store", data=[], storage_type="local"),
+            dcc.Store(id="event-auto-store", data=[], storage_type="local"),
+            dcc.Store(id="watch-state-store", data={}, storage_type="session"),
+            dcc.Store(id="proctree-last-summary", data={}, storage_type="memory"),
+            dcc.Store(id="proctree-snapshot", data=None, storage_type="local"),
+            dcc.Store(id="theme-sink", data=None, storage_type="memory"),
+            # body-bg-sync is provided as a hidden Div below for clientside callback output
+            html.Div(
+                [
+                    # Sidebar controls
+                    html.Div(
+                        [
+                            html.H2("System Monitor", className="section-title"),
+                            html.Div([html.Span(id="data-source", className="muted")]),
+                            html.Div(
+                                [
+                                    html.Span("Last updated: ", className="muted"),
+                                    html.Span("Never", id="last-updated", className="muted"),
+                                ],
+                                style={"marginBottom": "6px"},
+                            ),
+                            html.Details(
+                                [
+                                    html.Summary("Time & Display", className="section-title"),
+                                    html.Div(
+                                        [
+                                            html.Div("Time range", className="label"),
+                                            dcc.RadioItems(
+                                                id="time-range",
+                                                options=[
+                                                    {"label": "10m", "value": 10},
+                                                    {"label": "30m", "value": 30},
+                                                    {"label": "1h", "value": 60},
+                                                    {"label": "3h", "value": 180},
+                                                    {"label": "All", "value": 0},
+                                                ],
+                                                value=30,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                                labelStyle={"display": "inline-block", "marginRight": "8px"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Display timezone", className="label"),
+                                            dcc.Dropdown(
+                                                id="display-tz",
+                                                options=[
+                                                    {"label": "Local", "value": "local"},
+                                                    {"label": "UTC", "value": "utc"},
+                                                    {"label": "Custom…", "value": "custom"},
+                                                ],
+                                                value="local",
+                                                clearable=False,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            dcc.Input(
+                                                id="display-tz-custom",
+                                                type="text",
+                                                placeholder="e.g., America/Denver",
+                                                debounce=True,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            )
+                                        ],
+                                        id="display-tz-custom-wrap",
+                                        className="control",
+                                        style={"display": "none"},
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Data timezone (source)", className="label"),
+                                            dcc.RadioItems(
+                                                id="data-tz",
+                                                options=[
+                                                    {"label": "Local", "value": "local"},
+                                                    {"label": "UTC", "value": "utc"},
+                                                ],
+                                                value="local",
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                                labelStyle={"display": "inline-block", "marginRight": "8px"},
+                                            ),
+                                            html.Div(
+                                                "Tip: Set to UTC if you started the tracer with --utc.",
+                                                className="muted",
+                                                style={"marginTop": "4px"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Smoothing (samples)", className="label"),
+                                            dcc.Slider(
+                                                id="smoothing-window",
+                                                min=1,
+                                                max=10,
+                                                step=1,
+                                                value=3,
+                                                marks={1: "1", 5: "5", 10: "10"},
+                                                tooltip={"placement": "bottom", "always_visible": False},
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            dcc.Checklist(
+                                                id="pause-refresh",
+                                                options=[{"label": "Pause auto-refresh", "value": "pause"}],
+                                                value=[],
+                                                inline=True,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            )
+                                        ],
+                                        className="control",
+                                    ),
+                                ],
+                                open=True,
+                            ),
+                            html.Details(
+                                [
+                                    html.Summary("Live Tracing", className="section-title"),
+                                    html.Div(
+                                        "Configure and control in-process tracing. For offline files, "
+                                        "leave tracing stopped.",
+                                        className="muted",
+                                        style={"marginTop": "-6px", "marginBottom": "6px"},
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Data source mode", className="label"),
+                                            dcc.RadioItems(
+                                                id="data-source-mode",
+                                                options=[
+                                                    {"label": "Auto (prefer live if running)", "value": "auto"},
+                                                    {"label": "Live tracer", "value": "live"},
+                                                    {"label": "File (Parquet/CSV)", "value": "file"},
+                                                ],
+                                                value="auto",
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                                labelStyle={"display": "block", "marginRight": "8px"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Output Parquet Path", className="label"),
+                                            dcc.Input(
+                                                id="tracer-output-path",
+                                                type="text",
+                                                value=datafile,
+                                                debounce=True,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                                style={"width": "100%"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Sampling (s)", className="label"),
+                                            dcc.Input(
+                                                id="tracer-sample-interval",
+                                                type="number",
+                                                min=0.1,
+                                                step=0.1,
+                                                value=5.0,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Write Interval (s)", className="label"),
+                                            dcc.Input(
+                                                id="tracer-write-interval",
+                                                type="number",
+                                                min=1,
+                                                step=1,
+                                                value=10.0,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            dcc.Checklist(
+                                                id="tracer-options",
+                                                options=[
+                                                    {"label": "Enable GPU", "value": "gpu"},
+                                                    {"label": "Enable Docker", "value": "docker"},
+                                                    {"label": "UTC timestamps", "value": "utc"},
+                                                ],
+                                                value=["gpu", "docker"],
+                                                inline=True,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            )
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Button(
+                                                "Start", id="tracer-start-btn", n_clicks=0, className="button primary"
+                                            ),
+                                            html.Button(
+                                                "Stop",
+                                                id="tracer-stop-btn",
+                                                n_clicks=0,
+                                                className="button",
+                                                style={"marginLeft": "6px"},
+                                            ),
+                                            html.Button(
+                                                "Reset Buffer",
+                                                id="tracer-reset-btn",
+                                                n_clicks=0,
+                                                className="button",
+                                                style={"marginLeft": "6px"},
+                                            ),
+                                            html.Button(
+                                                "Snapshot Now",
+                                                id="tracer-snapshot-btn",
+                                                n_clicks=0,
+                                                className="button primary",
+                                                style={"marginLeft": "6px"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(id="tracer-status", className="muted", style={"marginTop": "6px"}),
+                                ],
+                                open=False,
+                            ),
+                            html.Hr(style={"opacity": 0.2}),
+                            html.Details(
+                                [
+                                    html.Summary("Events & Annotations", className="section-title"),
+                                    html.Div(
+                                        "Add named events to appear as vertical markers on all charts.",
+                                        className="muted",
+                                        style={"marginTop": "-6px", "marginBottom": "6px"},
+                                    ),
+                                    html.Div(
+                                        [
+                                            dcc.Input(
+                                                id="event-name",
+                                                type="text",
+                                                placeholder="Event name",
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                                style={"width": "100%"},
+                                            )
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Div("Event time", className="label"),
+                                            html.Div(
+                                                [
+                                                    dcc.DatePickerSingle(
+                                                        id="event-date",
+                                                        display_format="YYYY-MM-DD",
+                                                        persistence=True,
+                                                        persistence_type="local",
+                                                    ),
+                                                    dcc.Input(
+                                                        id="event-time",
+                                                        type="text",
+                                                        placeholder="HH:MM[:SS]",
+                                                        persistence=True,
+                                                        persisted_props=["value"],
+                                                        persistence_type="local",
+                                                        style={"width": "120px", "marginLeft": "8px"},
+                                                    ),
+                                                ],
+                                                style={"display": "flex", "alignItems": "center"},
+                                            ),
+                                            html.Div(
+                                                "Date/time interpreted in selected Display Timezone; stored as UTC.",
+                                                className="muted",
+                                                style={"marginTop": "4px"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            html.Button(
+                                                "Add", id="add-event-btn", n_clicks=0, className="inline button primary"
+                                            ),
+                                            html.Button(
+                                                "Add (Now)",
+                                                id="add-event-now-btn",
+                                                n_clicks=0,
+                                                className="inline button primary",
+                                            ),
+                                            html.Button("Clear", id="clear-events-btn", n_clicks=0, className="inline"),
+                                            dcc.Upload(
+                                                id="event-upload",
+                                                children=html.Div(["Import CSV"]),
+                                                className="dccUpload inline",
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(
+                                        [
+                                            dcc.Checklist(
+                                                id="event-display-options",
+                                                options=[{"label": "Show event markers", "value": "markers"}],
+                                                value=["markers"],
+                                                inline=True,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            )
+                                        ],
+                                        className="control",
+                                    ),
+                                    html.Div(id="event-list", className="control"),
+                                ],
+                                open=False,
+                            ),
+                            html.Details(
+                                [
+                                    html.Summary(
+                                        "Watch Points (auto-create events when thresholds are exceeded)",
+                                        className="section-title",
+                                    ),
+                                    html.Div(
+                                        [
+                                            dcc.Checklist(
+                                                id="watch-enable",
+                                                options=[
+                                                    {"label": "CPU %", "value": "cpu"},
+                                                    {"label": "Memory %", "value": "mem"},
+                                                    {"label": "Threads", "value": "threads"},
+                                                    {"label": "Processes", "value": "procs"},
+                                                ],
+                                                value=[],
+                                                inline=True,
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.Span("CPU % >"),
+                                                    dcc.Input(
+                                                        id="watch-cpu",
+                                                        type="number",
+                                                        min=0,
+                                                        max=100,
+                                                        step=1,
+                                                        style={"width": "70px", "marginRight": "12px"},
+                                                        persistence=True,
+                                                        persisted_props=["value"],
+                                                        persistence_type="local",
+                                                    ),
+                                                    html.Span("Mem % >"),
+                                                    dcc.Input(
+                                                        id="watch-mem",
+                                                        type="number",
+                                                        min=0,
+                                                        max=100,
+                                                        step=1,
+                                                        style={"width": "70px", "marginRight": "12px"},
+                                                        persistence=True,
+                                                        persisted_props=["value"],
+                                                        persistence_type="local",
+                                                    ),
+                                                    html.Span("Threads >"),
+                                                    dcc.Input(
+                                                        id="watch-threads",
+                                                        type="number",
+                                                        min=0,
+                                                        step=1,
+                                                        style={"width": "90px", "marginRight": "12px"},
+                                                        persistence=True,
+                                                        persisted_props=["value"],
+                                                        persistence_type="local",
+                                                    ),
+                                                    html.Span("Processes >"),
+                                                    dcc.Input(
+                                                        id="watch-procs",
+                                                        type="number",
+                                                        min=0,
+                                                        step=1,
+                                                        style={"width": "90px", "marginRight": "12px"},
+                                                        persistence=True,
+                                                        persisted_props=["value"],
+                                                        persistence_type="local",
+                                                    ),
+                                                ],
+                                                style={"marginTop": "6px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.Button(
+                                                        "Clear Auto Events",
+                                                        id="clear-auto-events-btn",
+                                                        n_clicks=0,
+                                                        className="inline",
+                                                    )
+                                                ],
+                                                className="control",
+                                            ),
+                                        ]
+                                    ),
+                                ],
+                                open=False,
+                            ),
+                            html.Hr(style={"opacity": 0.2}),
+                            html.Details(
+                                [
+                                    html.Summary("Appearance", className="section-title"),
+                                    html.Div(
+                                        [
+                                            html.Div("Theme", className="label"),
+                                            dcc.RadioItems(
+                                                id="theme-toggle",
+                                                options=[
+                                                    {"label": "Light", "value": "light"},
+                                                    {"label": "Dark", "value": "dark"},
+                                                ],
+                                                value="dark",
+                                                persistence=True,
+                                                persisted_props=["value"],
+                                                persistence_type="local",
+                                                labelStyle={"display": "inline-block", "marginRight": "8px"},
+                                            ),
+                                        ],
+                                        className="control",
+                                    ),
+                                ],
+                                open=False,
+                            ),
+                        ],
+                        className="sidebar",
+                    ),
+                    # Main content
+                    html.Div(
+                        [
+                            # Contextual notice banner (filled by callback)
+                            html.Div(
+                                id="notice-banner",
+                                style={
+                                    "marginBottom": "10px",
+                                    "border": "1px solid var(--border)",
+                                    "padding": "8px",
+                                    "display": "block",
+                                },
+                            ),
+                            # Tabs
+                            dcc.Tabs(
+                                id="main-tabs",
+                                value="tab-overview",
+                                colors={
+                                    "border": "var(--border)",
+                                    "primary": "var(--text)",
+                                    "background": "var(--surface)",
+                                },
+                                children=[
+                                    dcc.Tab(
+                                        label="Overview",
+                                        value="tab-overview",
+                                        children=[
+                                            html.Div(
+                                                [
+                                                    html.Div(
+                                                        [
+                                                            html.H2("System Overview", className="section-title"),
+                                                            html.Div(id="kpi-row", className="kpis"),
+                                                            dcc.Graph(
+                                                                id="system-overview-graph",
+                                                                className="graph",
+                                                                style={"height": "340px"},
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"marginTop": "4px"},
+                                            )
+                                        ],
+                                    ),
+                                    dcc.Tab(
+                                        label="Process Tree",
+                                        value="tab-proctree",
+                                        children=[
+                                            html.Div(
+                                                [
+                                                    html.H2("Process Tree Inspector", className="section-title"),
+                                                    dcc.Loading(
+                                                        id="proctree-loading-controls",
+                                                        type="dot",
+                                                        children=html.Div(
+                                                            [
+                                                                html.Div(
+                                                                    [
+                                                                        html.Span("Root PID:"),
+                                                                        dcc.Input(
+                                                                            id="proctree-pid",
+                                                                            type="number",
+                                                                            placeholder="Enter PID",
+                                                                            debounce=True,
+                                                                            style={
+                                                                                "width": "140px",
+                                                                                "marginLeft": "6px",
+                                                                                "marginRight": "12px",
+                                                                            },
+                                                                            persistence=True,
+                                                                            persisted_props=["value"],
+                                                                            persistence_type="local",
+                                                                        ),
+                                                                        dcc.Checklist(
+                                                                            id="proctree-verbose",
+                                                                            options=[
+                                                                                {"label": "Verbose", "value": "verbose"}
+                                                                            ],
+                                                                            value=[],
+                                                                            inline=True,
+                                                                            persistence=True,
+                                                                            persisted_props=["value"],
+                                                                            persistence_type="local",
+                                                                            style={
+                                                                                "display": "inline-block",
+                                                                                "marginRight": "12px",
+                                                                            },
+                                                                        ),
+                                                                        html.Span("Find PID:"),
+                                                                        dcc.Input(
+                                                                            id="proctree-search",
+                                                                            type="text",
+                                                                            placeholder="e.g. "
+                                                                            "microservice_entrypoint.py",
+                                                                            debounce=False,
+                                                                            style={
+                                                                                "width": "320px",
+                                                                                "marginLeft": "6px",
+                                                                                "marginRight": "8px",
+                                                                            },
+                                                                            persistence=True,
+                                                                            persisted_props=["value"],
+                                                                            persistence_type="local",
+                                                                        ),
+                                                                        dcc.Dropdown(
+                                                                            id="proctree-suggestions",
+                                                                            options=[],
+                                                                            placeholder="Select a matching PID",
+                                                                            style={
+                                                                                "width": "420px",
+                                                                                "display": "inline-block",
+                                                                            },
+                                                                            clearable=True,
+                                                                        ),
+                                                                        html.Button(
+                                                                            "Inspect",
+                                                                            id="proctree-run",
+                                                                            n_clicks=0,
+                                                                            className="button primary",
+                                                                            style={"marginLeft": "12px"},
+                                                                        ),
+                                                                        html.Div(
+                                                                            "Type to search; "
+                                                                            "select a row to populate Root PID.",
+                                                                            className="muted",
+                                                                            style={"marginTop": "6px"},
+                                                                        ),
+                                                                        html.Div(
+                                                                            [
+                                                                                html.Span(
+                                                                                    "Tree view:",
+                                                                                    style={"marginRight": "8px"},
+                                                                                ),
+                                                                                dcc.RadioItems(
+                                                                                    id="proctree-view-mode",
+                                                                                    options=[
+                                                                                        {
+                                                                                            "label": "Text",
+                                                                                            "value": "text",
+                                                                                        },
+                                                                                        {
+                                                                                            "label": "Graph",
+                                                                                            "value": "graph",
+                                                                                        },
+                                                                                    ],
+                                                                                    value="text",
+                                                                                    labelStyle={
+                                                                                        "display": "inline-block",
+                                                                                        "marginRight": "8px",
+                                                                                    },
+                                                                                    persistence=True,
+                                                                                    persisted_props=["value"],
+                                                                                    persistence_type="local",
+                                                                                ),
+                                                                                html.Button(
+                                                                                    "Take Snapshot",
+                                                                                    id="proctree-snapshot-btn",
+                                                                                    n_clicks=0,
+                                                                                    className="button",
+                                                                                    style={"marginLeft": "12px"},
+                                                                                ),
+                                                                                html.Button(
+                                                                                    "Compare to Snapshot",
+                                                                                    id="proctree-diff-btn",
+                                                                                    n_clicks=0,
+                                                                                    className="button",
+                                                                                    style={"marginLeft": "8px"},
+                                                                                ),
+                                                                            ],
+                                                                            style={"marginTop": "8px"},
+                                                                        ),
+                                                                    ],
+                                                                    className="control",
+                                                                ),
+                                                            ],
+                                                            className="control",
+                                                        ),
+                                                    ),
+                                                    dcc.Loading(
+                                                        id="proctree-loading-totals",
+                                                        type="dot",
+                                                        children=html.Div(
+                                                            id="proctree-totals",
+                                                            className="muted",
+                                                            style={"marginBottom": "8px"},
+                                                        ),
+                                                    ),
+                                                    dcc.Loading(
+                                                        id="proctree-loading-graphs",
+                                                        type="default",
+                                                        style={"width": "100%"},
+                                                        children=html.Div(
+                                                            [
+                                                                html.Div(
+                                                                    [
+                                                                        dcc.Graph(
+                                                                            id="proctree-procs-by-cmd",
+                                                                            className="graph",
+                                                                            style={"height": "260px", "width": "100%"},
+                                                                            config={"responsive": True},
+                                                                        )
+                                                                    ],
+                                                                    style={
+                                                                        "flex": "1 1 0",
+                                                                        "minWidth": "0",
+                                                                        "boxSizing": "border-box",
+                                                                        "overflow": "hidden",
+                                                                    },
+                                                                ),
+                                                                html.Div(
+                                                                    [
+                                                                        dcc.Graph(
+                                                                            id="proctree-threads-by-cmd",
+                                                                            className="graph",
+                                                                            style={"height": "260px", "width": "100%"},
+                                                                            config={"responsive": True},
+                                                                        )
+                                                                    ],
+                                                                    style={
+                                                                        "flex": "1 1 0",
+                                                                        "minWidth": "0",
+                                                                        "boxSizing": "border-box",
+                                                                        "overflow": "hidden",
+                                                                    },
+                                                                ),
+                                                            ],
+                                                            style={
+                                                                "display": "flex",
+                                                                "gap": "12px",
+                                                                "alignItems": "stretch",
+                                                                "width": "100%",
+                                                            },
+                                                        ),
+                                                    ),
+                                                    dcc.Loading(
+                                                        id="proctree-loading-tree",
+                                                        type="cube",
+                                                        style={"width": "100%"},
+                                                        children=html.Div(
+                                                            [
+                                                                html.H3("Tree", className="section-title"),
+                                                                html.Div(
+                                                                    [
+                                                                        (
+                                                                            _cy.Cytoscape(
+                                                                                id="proctree-graph",
+                                                                                elements=[],
+                                                                                layout={
+                                                                                    "name": "breadthfirst",
+                                                                                    "directed": True,
+                                                                                },
+                                                                                style={
+                                                                                    "width": "100%",
+                                                                                    "height": "520px",
+                                                                                    "border": "1px solid #444",
+                                                                                    "boxSizing": "border-box",
+                                                                                    "maxWidth": "100%",
+                                                                                    "overflow": "hidden",
+                                                                                },
+                                                                                stylesheet=[],
+                                                                            )
+                                                                            if cy_available and _cy is not None
+                                                                            else html.Div(
+                                                                                "Graph view requires "
+                                                                                "dash-cytoscape. Install with: "
+                                                                                "pip install dash-cytoscape",
+                                                                                className="muted",
+                                                                                style={"padding": "8px"},
+                                                                            )
+                                                                        )
+                                                                    ],
+                                                                    id="proctree-cyto-container",
+                                                                    style={
+                                                                        "display": "none",
+                                                                        "width": "100%",
+                                                                        "maxWidth": "100%",
+                                                                        "overflowX": "hidden",
+                                                                    },
+                                                                ),
+                                                                html.Div(
+                                                                    id="proctree-graph-status",
+                                                                    className="muted",
+                                                                    style={"marginTop": "6px"},
+                                                                ),
+                                                                html.Div(
+                                                                    [
+                                                                        dcc.Markdown(
+                                                                            id="proctree-tree-md",
+                                                                            style={"whiteSpace": "pre-wrap"},
+                                                                        )
+                                                                    ],
+                                                                    id="proctree-tree-text-container",
+                                                                    style={"display": "block"},
+                                                                ),
+                                                                html.Div(
+                                                                    id="proctree-node-details",
+                                                                    className="muted",
+                                                                    style={"marginTop": "8px"},
+                                                                ),
+                                                                html.Div(
+                                                                    id="proctree-snapshot-status",
+                                                                    className="muted",
+                                                                    style={"marginTop": "8px"},
+                                                                ),
+                                                                html.Div(
+                                                                    id="proctree-diff-result",
+                                                                    className="muted",
+                                                                    style={"marginTop": "8px"},
+                                                                ),
+                                                            ]
+                                                        ),
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            )
+                                        ],
+                                    ),
+                                    dcc.Tab(
+                                        label="CPU & Memory",
+                                        value="tab-cpu-mem",
+                                        children=[
+                                            html.Div(
+                                                [
+                                                    html.H2("CPU Utilization", className="section-title"),
+                                                    html.Div(
+                                                        [
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="cpu-aggregated-utilization-graph",
+                                                                        className="graph",
+                                                                        style={"height": "320px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="cpu-individual-utilization-graph",
+                                                                        className="graph",
+                                                                        style={"height": "320px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2("Memory Usage", className="section-title"),
+                                                    dcc.Graph(
+                                                        id="memory-graph", className="graph", style={"height": "320px"}
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2("File Descriptor Usage", className="section-title"),
+                                                    html.Div(
+                                                        [
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="file-count-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="fd-usage-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2("Processes and Threads", className="section-title"),
+                                                    html.Div(
+                                                        [
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="process-count-graph",
+                                                                        className="graph",
+                                                                        style={"height": "260px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="thread-count-graph",
+                                                                        className="graph",
+                                                                        style={"height": "260px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                        ],
+                                    ),
+                                    dcc.Tab(
+                                        label="I/O & Network",
+                                        value="tab-io-net",
+                                        children=[
+                                            html.Div(
+                                                [
+                                                    html.H2("Network Activity", className="section-title"),
+                                                    dcc.Graph(
+                                                        id="network-graph", className="graph", style={"height": "320px"}
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2("Disk I/O", className="section-title"),
+                                                    dcc.Graph(
+                                                        id="disk-io-graph", className="graph", style={"height": "320px"}
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                        ],
+                                    ),
+                                    dcc.Tab(
+                                        label="GPU",
+                                        value="tab-gpu",
+                                        children=[
+                                            html.Div(
+                                                [
+                                                    html.H2("GPU Usage", className="section-title"),
+                                                    html.Div(
+                                                        [
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="gpu-utilization-graph",
+                                                                        className="graph",
+                                                                        style={"height": "300px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="gpu-memory-graph",
+                                                                        className="graph",
+                                                                        style={"height": "300px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                        ],
+                                    ),
+                                    dcc.Tab(
+                                        label="Containers",
+                                        value="tab-containers",
+                                        children=[
+                                            html.Div(
+                                                [
+                                                    html.Details(
+                                                        [
+                                                            html.Summary("Container Focus"),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Checklist(
+                                                                        id="container-auto-top",
+                                                                        options=[
+                                                                            {
+                                                                                "label": "Auto select top by CPU",
+                                                                                "value": "auto",
+                                                                            }
+                                                                        ],
+                                                                        value=["auto"],
+                                                                        persistence=True,
+                                                                        persisted_props=["value"],
+                                                                        persistence_type="local",
+                                                                        labelStyle={
+                                                                            "display": "inline-block",
+                                                                            "marginRight": "10px",
+                                                                        },
+                                                                    ),
+                                                                    html.Label("Top N (when auto):"),
+                                                                    dcc.Slider(
+                                                                        id="container-top-n",
+                                                                        min=1,
+                                                                        max=10,
+                                                                        step=1,
+                                                                        value=5,
+                                                                        marks={1: "1", 5: "5", 10: "10"},
+                                                                        tooltip={
+                                                                            "placement": "bottom",
+                                                                            "always_visible": False,
+                                                                        },
+                                                                    ),
+                                                                    html.Label("Or select containers:"),
+                                                                    dcc.Dropdown(
+                                                                        id="container-select",
+                                                                        options=[],
+                                                                        value=[],
+                                                                        multi=True,
+                                                                        placeholder="Select containers...",
+                                                                        persistence=True,
+                                                                        persisted_props=["value"],
+                                                                        persistence_type="local",
+                                                                    ),
+                                                                ],
+                                                                style={"margin": "8px 0"},
+                                                            ),
+                                                        ],
+                                                        open=False,
+                                                    )
+                                                ]
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2("Container Metrics", className="section-title"),
+                                                    html.Div(
+                                                        [
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="container-cpu-utilization-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "33%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="container-memory-utilization-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "33%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="container-files-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "33%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2(
+                                                        "Container Network Throughput (MB/s)", className="section-title"
+                                                    ),
+                                                    dcc.Graph(
+                                                        id="container-net-graph",
+                                                        className="graph",
+                                                        style={"height": "300px"},
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2("Container Disk I/O (MB/s)", className="section-title"),
+                                                    dcc.Graph(
+                                                        id="container-io-graph",
+                                                        className="graph",
+                                                        style={"height": "300px"},
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                            html.Div(
+                                                [
+                                                    html.H2(
+                                                        "Top Containers (Latest Sample)", className="section-title"
+                                                    ),
+                                                    html.Div(
+                                                        [
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="container-top-cpu-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                            html.Div(
+                                                                [
+                                                                    dcc.Graph(
+                                                                        id="container-top-mem-graph",
+                                                                        className="graph",
+                                                                        style={"height": "280px"},
+                                                                    )
+                                                                ],
+                                                                style={
+                                                                    "width": "50%",
+                                                                    "display": "inline-block",
+                                                                    "verticalAlign": "top",
+                                                                },
+                                                            ),
+                                                        ]
+                                                    ),
+                                                ],
+                                                style={"width": "100%", "marginBottom": "16px"},
+                                            ),
+                                        ],
+                                    ),
+                                ],
+                            ),
+                        ]
+                    ),
+                ],
+                className="grid",
+            ),
+            # Refresh interval
+            dcc.Interval(
+                id="interval-component", interval=interval * 1000, n_intervals=0  # convert seconds to milliseconds
+            ),
+            # Hidden div to sync body background with theme (already declared store above)
+            html.Div(id="body-bg-sync", style={"display": "none"}),
+        ],
+        id="page-container",
+        style={"padding": "16px"},
+    )
diff --git a/scripts/support/system_monitor/system_monitor.py b/scripts/support/system_monitor/system_monitor.py
new file mode 100644
index 000000000..d6a7a89f1
--- /dev/null
+++ b/scripts/support/system_monitor/system_monitor.py
@@ -0,0 +1,1715 @@
+import dash
+from dash import dcc, html
+from dash.dependencies import Output, Input, State, ALL
+import pandas as pd
+import plotly.graph_objects as go
+import os
+import click
+import base64
+import io
+import csv
+from datetime import datetime
+import json
+import psutil
+
+# noqa
+# flake8: noqa
+
+# Use absolute package import only (no relatives or fallbacks)
+from system_tracer import (
+    get_process_tree_summary,
+    SystemTracer,
+)
+from layout import build_layout
+from callbacks import register_callbacks
+from helpers import apply_theme, style_minimal_figure
+
+try:
+    from dateutil.tz import tzlocal, gettz
+except Exception:
+    tzlocal = None
+    gettz = None
+
+try:
+    import dash_cytoscape as cy  # type: ignore
+
+    CY_AVAILABLE = True
+except Exception:
+    CY_AVAILABLE = False
+    cy = None
+
+try:
+    import pyarrow.parquet as pq  # noqa
+
+    PARQUET_AVAILABLE = True
+except ImportError:
+    PARQUET_AVAILABLE = False
+    print("Warning: pyarrow is not available. Please install pyarrow for Parquet file support.")
+
+
+@click.command()
+@click.option("--datafile", "-d", default="system_monitor.parquet", help="Path to the parquet data file")
+@click.option("--port", "-p", default=8050, help="Port to run the dashboard server on")
+@click.option("--host", "-h", default="0.0.0.0", help="Host to run the dashboard server on")
+@click.option("--interval", "-i", default=10, help="Refresh interval in seconds")
+@click.option("--debug/--no-debug", default=True, help="Run in debug mode")
+def run_dashboard(datafile, port, host, interval, debug):
+    """Run the system monitoring dashboard with the specified parameters."""
+
+    # Validate the data file (be permissive; pandas can use either pyarrow or fastparquet)
+    if not os.path.exists(datafile):
+        print(f"Error: Data file '{datafile}' not found.")
+        print("Dashboard will start but won't display data until the file exists.")
+    elif datafile.endswith(".parquet") and not PARQUET_AVAILABLE:
+        print("Warning: pyarrow is not installed; will attempt to read parquet via pandas (fastparquet if available).")
+    elif not datafile.endswith(".parquet") and not datafile.endswith(".csv"):
+        print(f"Warning: Data file '{datafile}' is not a .parquet or .csv file.")
+        print("Attempting to load it anyway, but this may cause errors.")
+
+    # Initialize the Dash app and ensure assets/ resolves to the packaged assets by default
+    pkg_dir = os.path.abspath(os.path.dirname(__file__))
+    default_assets = os.path.join(pkg_dir, "assets")
+    assets_override = os.environ.get("SYSTEM_MONITOR_ASSETS")
+    assets_path = assets_override if assets_override and os.path.isdir(assets_override) else default_assets
+    app = dash.Dash(__name__, assets_folder=assets_path)
+
+    # Global Plotly defaults: Inter font and transparent backgrounds (Tufte-style minimalism)
+    try:
+        import plotly.io as pio  # type: ignore
+
+        tufted = go.layout.Template()
+        tufted.layout.font.family = "Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif"
+        tufted.layout.paper_bgcolor = "rgba(0,0,0,0)"
+        tufted.layout.plot_bgcolor = "rgba(0,0,0,0)"
+        pio.templates["tufted"] = tufted
+        pio.templates.default = "tufted"
+    except Exception:
+        pass
+
+    # Keep index minimal; styling handled via assets/style.css
+    app.index_string = """
+    <!DOCTYPE html>
+    <html>
+        <head>
+            {%metas%}
+            <title>{%title%}</title>
+            {%favicon%}
+            {%css%}
+        </head>
+        <body>
+            {%app_entry%}
+            <footer>
+                {%config%}
+                {%scripts%}
+                {%renderer%}
+            </footer>
+        </body>
+    </html>
+    """
+
+    # Set the app title
+    app.title = "System Resource Monitor"
+
+    # Use extracted layout factory (overrides inline layout above)
+    app.layout = build_layout(
+        datafile=datafile,
+        interval=interval,
+        cy_available=CY_AVAILABLE,
+        cy_module=cy,
+    )
+    # Register callbacks by domain (existing in this module; future modules will hook here)
+    register_callbacks(app, cy_available=CY_AVAILABLE)
+
+    # Helper function to load and filter data
+    def load_data(data_path, time_range_minutes, source_mode="auto"):
+        try:
+            # Live buffer branch (explicit when mode == live, or auto + running)
+            if source_mode in ("live", "auto"):
+                try:
+                    if _is_running():
+                        with _tracer_lock:
+                            tracer = _tracer_obj.get("tracer")
+                            if tracer is not None and getattr(tracer, "data_buffer", None) is not None:
+                                df = pd.DataFrame(list(tracer.data_buffer))
+                                if time_range_minutes > 0 and not df.empty and "timestamp" in df.columns:
+                                    latest_time = pd.to_datetime(df["timestamp"]).max()
+                                    time_threshold = latest_time - pd.Timedelta(minutes=time_range_minutes)
+                                    df = df[pd.to_datetime(df["timestamp"]) >= time_threshold]
+                                return df
+                except Exception as le:
+                    print(f"Error reading live buffer: {le}")
+
+            # File branch (explicit when mode == file, or auto + no live)
+            if data_path and os.path.exists(data_path):
+                # Prefer parquet if extension says so; let pandas pick available engine (pyarrow/fastparquet)
+                if data_path.endswith(".parquet"):
+                    try:
+                        df = pd.read_parquet(data_path)
+                    except Exception as pe:
+                        print(f"Parquet read failed via pandas: {pe}. Trying CSV fallback (may fail)...")
+                        df = pd.read_csv(data_path, parse_dates=["timestamp"])  # best-effort
+                elif data_path.endswith(".csv"):
+                    df = pd.read_csv(data_path, parse_dates=["timestamp"])
+                else:
+                    # Try parquet first, then CSV
+                    try:
+                        df = pd.read_parquet(data_path)
+                    except Exception:
+                        df = pd.read_csv(data_path, parse_dates=["timestamp"])  # best-effort
+
+                # Filter by time range if specified
+                if time_range_minutes > 0 and not df.empty and "timestamp" in df.columns:
+                    latest_time = pd.to_datetime(df["timestamp"]).max()
+                    time_threshold = latest_time - pd.Timedelta(minutes=time_range_minutes)
+                    df = df[pd.to_datetime(df["timestamp"]) >= time_threshold]
+
+                return df
+            else:
+                return pd.DataFrame()
+        except Exception as e:
+            print(f"Error loading data: {e}")
+            return pd.DataFrame()
+
+    # Interval enable/disable based on pause toggle
+    @app.callback(
+        Output("interval-component", "disabled"),
+        [Input("pause-refresh", "value")],
+    )
+    def _toggle_interval_disabled(pause_values):
+        try:
+            return isinstance(pause_values, list) and ("pause" in pause_values)
+        except Exception:
+            return False
+
+    # Notice banner content (guides first-time usage)
+    @app.callback(
+        Output("notice-banner", "children"),
+        [
+            Input("interval-component", "n_intervals"),
+            Input("datafile-store", "data"),
+            Input("data-source-mode", "value"),
+        ],
+    )
+    def _notice_banner(_n, data_path, source_mode):
+        try:
+            running = _is_running()
+            has_file = bool(data_path) and os.path.exists(data_path)
+            if source_mode == "live":
+                if not running:
+                    return html.Div(
+                        [
+                            html.Strong("No live data. "),
+                            "Click Start in Live Tracing to begin collecting metrics. ",
+                            "Or switch Data source mode to File and set a Parquet/CSV path.",
+                        ]
+                    )
+            elif source_mode == "file":
+                if not has_file:
+                    return html.Div(
+                        [
+                            html.Strong("No file loaded. "),
+                            "Set Output Parquet Path to an existing Parquet/CSV and Start the tracer, ",
+                            "or switch Data source mode to Live to collect data in-memory.",
+                        ]
+                    )
+            else:  # auto
+                if not running and not has_file:
+                    return html.Div(
+                        [
+                            html.Strong("No data available. "),
+                            "Start live tracing (left) or set a Parquet/CSV in Output Parquet Path. ",
+                            "Data source mode is Auto: it will prefer live data when the tracer is running.",
+                        ]
+                    )
+        except Exception:
+            pass
+        return ""
+
+    # ----------------------------
+    # Helper utilities (theme, events, small figure helpers)
+    # ----------------------------
+    def normalize_ts(ts_any):
+        try:
+            ts = pd.to_datetime(ts_any)
+            # drop tz if present
+            if getattr(ts, "tzinfo", None) is not None:
+                try:
+                    ts = ts.tz_convert("UTC").tz_localize(None)
+                except Exception:
+                    try:
+                        ts = ts.tz_localize("UTC").tz_localize(None)
+                    except Exception:
+                        pass
+            return ts
+        except Exception:
+            return pd.Timestamp.utcnow()
+
+    # apply_theme and style_minimal_figure are imported from helpers.py
+
+    def make_sparkline(ts, series, theme_value):
+        """Create a tiny sparkline figure for KPI cards.
+
+        Expects ts (Datetime-like Series) and series (numeric Series) of same length.
+        """
+        fig = go.Figure()
+        try:
+            fig.add_trace(go.Scatter(x=ts, y=series, mode="lines", name="", hoverinfo="skip"))
+        except Exception:
+            # fallback empty
+            pass
+        # Minimal styling
+        apply_theme(fig, theme_value)
+        style_minimal_figure(fig, theme_value)
+        fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
+        fig.update_xaxes(visible=False)
+        fig.update_yaxes(visible=False)
+        return fig
+
+    # ----------------------------
+    # Strategy pattern: Graph components and registry
+    # ----------------------------
+    class GraphContext:
+        def __init__(self, smoothing_window, apply_theme_fn, smooth_series_fn):
+            self.smoothing_window = smoothing_window
+            self.apply_theme = apply_theme_fn
+            self.smooth_series = smooth_series_fn
+
+    class GraphComponent:
+        component_id = ""
+        title = None
+        is_time_series = True  # whether event markers should be applied
+
+        def build(self, df: pd.DataFrame, ts: pd.Series, ctx: GraphContext, params: dict) -> go.Figure:
+            raise NotImplementedError
+
+    class CPUIndividualGraph(GraphComponent):
+        component_id = "cpu-individual-utilization-graph"
+        title = "CPU Utilization (per core)"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            cpu_cols = [c for c in df.columns if c.startswith("cpu_") and c.endswith("_utilization")]
+            for c in sorted(cpu_cols):
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df[c]), mode="lines", name=c.replace("_utilization", ""))
+                )
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class CPUAggregateGraph(GraphComponent):
+        component_id = "cpu-aggregated-utilization-graph"
+        title = "CPU Utilization (aggregate)"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            cpu_cols = [c for c in df.columns if c.startswith("cpu_") and c.endswith("_utilization")]
+            if cpu_cols:
+                cpu_mean = ctx.smooth_series(df[cpu_cols].mean(axis=1))
+                fig.add_trace(go.Scatter(x=ts, y=cpu_mean, mode="lines", name="CPU %"))
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class MemoryGraph(GraphComponent):
+        component_id = "memory-graph"
+        title = "Memory Utilization"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            if {"sys_used", "sys_total"}.issubset(df.columns):
+                mem_pct = (df["sys_used"] / df["sys_total"] * 100.0).clip(lower=0, upper=100)
+                fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(mem_pct), mode="lines", name="Mem %"))
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class FileCountGraph(GraphComponent):
+        component_id = "file-count-graph"
+        title = "Total Open Files"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            if "total_open_files" in df.columns:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df["total_open_files"]), mode="lines", name="Open Files")
+                )
+                fig.update_layout(title=self.title)
+            return fig
+
+    class FDUsageGraph(GraphComponent):
+        component_id = "fd-usage-graph"
+        title = "FD Usage %"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            if "fd_usage_percent" in df.columns:
+                fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(df["fd_usage_percent"]), mode="lines", name="FD %"))
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class NetworkGraph(GraphComponent):
+        component_id = "network-graph"
+        title = "Network Throughput"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            recv_col = "net_bytes_recv_per_sec" if "net_bytes_recv_per_sec" in df.columns else None
+            sent_col = "net_bytes_sent_per_sec" if "net_bytes_sent_per_sec" in df.columns else None
+            if recv_col:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df[recv_col]) / (1024**2), mode="lines", name="Down MB/s")
+                )
+            if sent_col:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df[sent_col]) / (1024**2), mode="lines", name="Up MB/s")
+                )
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="MB/s")
+            return fig
+
+    class DiskIOGraph(GraphComponent):
+        component_id = "disk-io-graph"
+        title = "Disk I/O"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            r_col = "disk_read_bytes_per_sec" if "disk_read_bytes_per_sec" in df.columns else None
+            w_col = "disk_write_bytes_per_sec" if "disk_write_bytes_per_sec" in df.columns else None
+            if r_col:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df[r_col]) / (1024**2), mode="lines", name="Read MB/s")
+                )
+            if w_col:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df[w_col]) / (1024**2), mode="lines", name="Write MB/s")
+                )
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="MB/s")
+            return fig
+
+    class GPUUtilGraph(GraphComponent):
+        component_id = "gpu-utilization-graph"
+        title = "GPU Utilization %"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            util_cols = [c for c in df.columns if c.endswith("_utilization") and c.startswith("gpu_")]
+            for c in sorted(util_cols):
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df[c]), mode="lines", name=c.replace("_utilization", " util"))
+                )
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class GPUMemoryGraph(GraphComponent):
+        component_id = "gpu-memory-graph"
+        title = "GPU Memory %"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            gpu_mem_used = [c for c in df.columns if c.startswith("gpu_") and c.endswith("_used")]
+            for c in sorted(gpu_mem_used):
+                idx = c.split("_")[1]
+                tot_col = f"gpu_{idx}_total"
+                if tot_col in df.columns:
+                    pct = (df[c] / df[tot_col] * 100.0).clip(lower=0, upper=100)
+                    fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(pct), mode="lines", name=f"GPU {idx} Mem %"))
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    # Helpers for Docker naming (support old and new) — module scope so callbacks can access
+    def docker_container_names(df):
+        try:
+            names = set()
+            for col in df.columns:
+                if col.endswith("_container_cpu_percent"):
+                    names.add(col[: -len("_container_cpu_percent")])
+                elif col.startswith("docker_") and col.endswith("_cpu_percent"):
+                    names.add(col[len("docker_") : -len("_cpu_percent")])
+            return sorted(names)
+        except Exception:
+            return []
+
+    # (migrated) proctree suggestion/search callbacks are registered in callbacks/proctree.py
+
+    # (migrated) proctree PID selection registered in callbacks/proctree.py
+
+    def docker_pick_col(df, name, new_suffix, old_suffix):
+        for cand in (f"docker_{name}_{new_suffix}", f"{name}_{old_suffix}"):
+            if cand in df.columns:
+                return cand
+        return None
+
+    class ContainerCPUGraph(GraphComponent):
+        component_id = "container-cpu-utilization-graph"
+        title = "Container CPU %"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            for name in params.get("selected_containers", []):
+                col = docker_pick_col(df, name, "cpu_percent", "container_cpu_percent")
+                if col:
+                    fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(df[col]), mode="lines", name=f"{name} CPU%"))
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class ContainerMemGraph(GraphComponent):
+        component_id = "container-memory-utilization-graph"
+        title = "Container Memory %"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            for name in params.get("selected_containers", []):
+                col = docker_pick_col(df, name, "mem_percent", "container_mem_percent")
+                if col:
+                    fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(df[col]), mode="lines", name=f"{name} Mem%"))
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    class ContainerFilesGraph(GraphComponent):
+        component_id = "container-files-graph"
+        title = "Container Open Files"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            for name in params.get("selected_containers", []):
+                col = docker_pick_col(df, name, "open_files", "container_open_files")
+                if col:
+                    fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(df[col]), mode="lines", name=f"{name} Files"))
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title)
+            return fig
+
+    class ContainerNetGraph(GraphComponent):
+        component_id = "container-net-graph"
+        title = "Container Network (selected sum)"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            rx_cols = []
+            tx_cols = []
+            for n in params.get("selected_containers", []):
+                col_rx = docker_pick_col(df, n, "net_rx_bytes_per_sec", "container_net_rx_bytes_per_sec")
+                col_tx = docker_pick_col(df, n, "net_tx_bytes_per_sec", "container_net_tx_bytes_per_sec")
+                if col_rx:
+                    rx_cols.append(col_rx)
+                if col_tx:
+                    tx_cols.append(col_tx)
+            if rx_cols:
+                fig.add_trace(
+                    go.Scatter(
+                        x=ts, y=ctx.smooth_series(df[rx_cols].sum(axis=1)) / (1024**2), mode="lines", name="RX MB/s"
+                    )
+                )
+            if tx_cols:
+                fig.add_trace(
+                    go.Scatter(
+                        x=ts, y=ctx.smooth_series(df[tx_cols].sum(axis=1)) / (1024**2), mode="lines", name="TX MB/s"
+                    )
+                )
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="MB/s")
+            return fig
+
+    class ContainerIOGraph(GraphComponent):
+        component_id = "container-io-graph"
+        title = "Container Disk I/O (selected sum)"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            r_cols = []
+            w_cols = []
+            for n in params.get("selected_containers", []):
+                col_r = docker_pick_col(df, n, "blkio_read_bytes_per_sec", "container_blkio_read_bytes_per_sec")
+                col_w = docker_pick_col(df, n, "blkio_write_bytes_per_sec", "container_blkio_write_bytes_per_sec")
+                if col_r:
+                    r_cols.append(col_r)
+                if col_w:
+                    w_cols.append(col_w)
+            if r_cols:
+                fig.add_trace(
+                    go.Scatter(
+                        x=ts, y=ctx.smooth_series(df[r_cols].sum(axis=1)) / (1024**2), mode="lines", name="Read MB/s"
+                    )
+                )
+            if w_cols:
+                fig.add_trace(
+                    go.Scatter(
+                        x=ts, y=ctx.smooth_series(df[w_cols].sum(axis=1)) / (1024**2), mode="lines", name="Write MB/s"
+                    )
+                )
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="MB/s")
+            return fig
+
+    class TopContainersCPUBar(GraphComponent):
+        component_id = "container-top-cpu-graph"
+        title = "Top Containers by CPU (latest)"
+        is_time_series = False
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            all_containers = docker_container_names(df)
+            if not all_containers or df.empty:
+                return fig
+            latest = df.iloc[-1]
+            pairs = []
+            for name in all_containers:
+                col = docker_pick_col(df, name, "cpu_percent", "container_cpu_percent")
+                if col:
+                    pairs.append((name, latest[col]))
+            pairs = sorted(pairs, key=lambda x: x[1], reverse=True)[:10]
+            if pairs:
+                fig.add_trace(go.Bar(x=[n for n, _ in pairs], y=[v for _, v in pairs], name="CPU %"))
+                fig.update_layout(title=self.title)
+            return fig
+
+    class TopContainersMemBar(GraphComponent):
+        component_id = "container-top-mem-graph"
+        title = "Top Containers by Mem (latest)"
+        is_time_series = False
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            all_containers = docker_container_names(df)
+            if not all_containers or df.empty:
+                return fig
+            latest = df.iloc[-1]
+            pairs = []
+            for name in all_containers:
+                col = docker_pick_col(df, name, "mem_percent", "container_mem_percent")
+                if col:
+                    pairs.append((name, latest[col]))
+            pairs = sorted(pairs, key=lambda x: x[1], reverse=True)[:10]
+            if pairs:
+                fig.add_trace(go.Bar(x=[n for n, _ in pairs], y=[v for _, v in pairs], name="Mem %"))
+                fig.update_layout(title=self.title)
+            return fig
+
+    class ProcessCountGraph(GraphComponent):
+        component_id = "process-count-graph"
+        title = "System Process Count"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            if "system_process_count" in df.columns:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df["system_process_count"]), mode="lines", name="Processes")
+                )
+                fig.update_layout(title=self.title)
+            return fig
+
+    class ThreadCountGraph(GraphComponent):
+        component_id = "thread-count-graph"
+        title = "System Thread Count"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            if "system_thread_count" in df.columns:
+                fig.add_trace(
+                    go.Scatter(x=ts, y=ctx.smooth_series(df["system_thread_count"]), mode="lines", name="Threads")
+                )
+                fig.update_layout(title=self.title)
+            return fig
+
+    class OverviewGraph(GraphComponent):
+        component_id = "system-overview-graph"
+        title = "System Overview"
+
+        def build(self, df, ts, ctx, params):
+            fig = go.Figure()
+            # include CPU aggregate and Memory % if available
+            cpu_cols = [c for c in df.columns if c.startswith("cpu_") and c.endswith("_utilization")]
+            if cpu_cols:
+                cpu_mean = ctx.smooth_series(df[cpu_cols].mean(axis=1))
+                fig.add_trace(go.Scatter(x=ts, y=cpu_mean, mode="lines", name="CPU %"))
+            if {"sys_used", "sys_total"}.issubset(df.columns):
+                mem_pct = (df["sys_used"] / df["sys_total"] * 100.0).clip(lower=0, upper=100)
+                fig.add_trace(go.Scatter(x=ts, y=ctx.smooth_series(mem_pct), mode="lines", name="Mem %"))
+            if len(fig.data) > 0:
+                fig.update_layout(title=self.title, yaxis_title="%")
+            return fig
+
+    def add_event_markers(fig, events_list, display_tz, display_tz_custom):
+        try:
+            events_list = events_list or []
+            for evt in events_list:
+                ts_raw = evt.get("timestamp")
+                name = evt.get("name", "event")
+                if not ts_raw:
+                    continue
+                ts = pd.to_datetime(ts_raw)
+                # Events are stored UTC-naive internally. Convert to selected display tz (naive) for rendering.
+                try:
+                    if display_tz == "local" and tzlocal is not None:
+                        ts = ts.tz_localize("UTC").tz_convert(tzlocal()).tz_localize(None)
+                    elif display_tz == "custom" and display_tz_custom and gettz is not None:
+                        tz = gettz(display_tz_custom)
+                        if tz is not None:
+                            ts = ts.tz_localize("UTC").tz_convert(tz).tz_localize(None)
+                    else:
+                        # utc: leave as UTC-naive
+                        pass
+                except Exception:
+                    pass
+                # Draw vertical line via shape for broader Plotly compatibility
+                try:
+                    fig.add_shape(
+                        type="line",
+                        xref="x",
+                        x0=ts,
+                        x1=ts,
+                        yref="paper",
+                        y0=0,
+                        y1=1,
+                        line=dict(color="#8888d8", width=1, dash="dash"),
+                        layer="above",
+                    )
+                    fig.add_annotation(
+                        x=ts,
+                        y=1,
+                        xref="x",
+                        yref="paper",
+                        text=name,
+                        showarrow=False,
+                        xanchor="left",
+                        yanchor="bottom",
+                        font=dict(size=10),
+                        bgcolor="rgba(136,132,216,0.15)",
+                    )
+                except Exception:
+                    pass
+        except Exception:
+            pass
+        return fig
+
+    def render_event_list(events_list):
+        events_list = events_list or []
+        items = []
+        for idx, e in enumerate(events_list):
+            ts_txt = e.get("timestamp", "")
+            name_txt = e.get("name", "event")
+            items.append(
+                html.Div(
+                    [
+                        html.Span(f"{ts_txt} — {name_txt}", className="event-text"),
+                        html.Button(
+                            "Delete",
+                            id={"type": "event-delete", "index": idx},
+                            n_clicks=0,
+                            className="inline button tiny",
+                            style={"marginLeft": "8px"},
+                        ),
+                    ],
+                    className="event-item",
+                    style={"display": "flex", "alignItems": "center", "justifyContent": "space-between"},
+                )
+            )
+        if not items:
+            return html.Div("No events", style={"opacity": 0.7})
+        return items
+
+    def make_empty_fig(title):
+        fig = go.Figure()
+        fig.add_annotation(text=title, showarrow=False, yref="paper", y=0.5, xref="paper", x=0.5)
+        fig.update_layout(margin=dict(l=30, r=10, t=30, b=30))
+        return fig
+
+    def convert_ts_for_display(ts_series, display_tz, display_tz_custom, data_tz):
+        try:
+            ts = pd.to_datetime(ts_series)
+            # First, assign the correct base timezone to the stored timestamps (they are naive on disk)
+            base = None
+            if data_tz == "utc":
+                base = "UTC"
+            else:
+                try:
+                    base = tzlocal() if tzlocal is not None else None
+                except Exception:
+                    base = None
+
+            if base is not None:
+                try:
+                    ts = ts.dt.tz_localize(base)
+                except Exception:
+                    # fallback: try scalar localize if Series.dt failed
+                    try:
+                        ts = pd.DatetimeIndex(ts).tz_localize(base)
+                    except Exception:
+                        pass
+
+            # Convert to target display timezone and drop tz to keep axes naive
+            try:
+                target = None
+                if display_tz == "local" and tzlocal is not None:
+                    target = tzlocal()
+                elif display_tz == "custom" and display_tz_custom and gettz is not None:
+                    target = gettz(display_tz_custom)
+                else:
+                    target = "UTC"
+                if target is not None:
+                    ts = pd.DatetimeIndex(ts).tz_convert(target).tz_localize(None)
+            except Exception:
+                pass
+            return ts
+        except Exception:
+            return pd.to_datetime(ts_series, errors="coerce")
+
+    def event_times_for_display(events_list, display_tz, display_tz_custom):
+        try:
+            if not events_list:
+                return pd.Series([], dtype="datetime64[ns]")
+            ets = pd.to_datetime([e.get("timestamp") for e in events_list if e.get("timestamp")])
+            try:
+                if display_tz == "local" and tzlocal is not None:
+                    ets = pd.DatetimeIndex(ets).tz_localize("UTC").tz_convert(tzlocal()).tz_localize(None)
+                elif display_tz == "custom" and display_tz_custom and gettz is not None:
+                    tz = gettz(display_tz_custom)
+                    if tz is not None:
+                        ets = pd.DatetimeIndex(ets).tz_localize("UTC").tz_convert(tz).tz_localize(None)
+            except Exception:
+                pass
+            return pd.Series(ets)
+        except Exception:
+            return pd.Series([], dtype="datetime64[ns]")
+
+    # Callback to update all graphs periodically
+    @app.callback(
+        Output("display-tz-custom-wrap", "style"),
+        [Input("display-tz", "value")],
+    )
+    def _toggle_custom_tz(display_value):
+        if display_value == "custom":
+            return {"display": "block"}
+        return {"display": "none"}
+
+    @app.callback(
+        [
+            Output("page-container", "style"),
+            Output("data-source", "children"),
+            Output("last-updated", "children"),
+            Output("kpi-row", "children"),
+            Output("system-overview-graph", "figure"),
+            Output("cpu-individual-utilization-graph", "figure"),
+            Output("cpu-aggregated-utilization-graph", "figure"),
+            Output("memory-graph", "figure"),
+            Output("file-count-graph", "figure"),
+            Output("fd-usage-graph", "figure"),
+            Output("network-graph", "figure"),
+            Output("disk-io-graph", "figure"),
+            Output("gpu-utilization-graph", "figure"),
+            Output("gpu-memory-graph", "figure"),
+            Output("container-cpu-utilization-graph", "figure"),
+            Output("container-memory-utilization-graph", "figure"),
+            Output("container-files-graph", "figure"),
+            Output("container-net-graph", "figure"),
+            Output("container-io-graph", "figure"),
+            Output("container-top-cpu-graph", "figure"),
+            Output("container-top-mem-graph", "figure"),
+            Output("process-count-graph", "figure"),
+            Output("thread-count-graph", "figure"),
+        ],
+        [
+            Input("interval-component", "n_intervals"),
+            Input("time-range", "value"),
+            Input("theme-toggle", "value"),
+            Input("smoothing-window", "value"),
+            Input("datafile-store", "data"),
+            Input("data-source-mode", "value"),
+            Input("container-auto-top", "value"),
+            Input("container-top-n", "value"),
+            Input("container-select", "value"),
+            Input("event-store", "data"),
+            Input("event-auto-store", "data"),
+            Input("event-display-options", "value"),
+            Input("display-tz", "value"),
+            Input("display-tz-custom", "value"),
+            Input("data-tz", "value"),
+        ],
+    )
+    def update_graphs(
+        n,
+        time_range,
+        theme_value,
+        smoothing_window,
+        data_path,
+        source_mode,
+        auto_top,
+        top_n,
+        selected_manual,
+        events_data,
+        auto_events,
+        event_display_options,
+        display_tz,
+        display_tz_custom,
+        data_tz,
+    ):
+        # Load data (respect selected source mode)
+        df = load_data(data_path or datafile, time_range, source_mode or "auto")
+        last_timestamp = "Never"
+        if not df.empty and "timestamp" in df.columns:
+            try:
+                last_timestamp = pd.to_datetime(df["timestamp"].max()).strftime("%Y-%m-%d %H:%M:%S")
+            except Exception:
+                last_timestamp = str(df["timestamp"].max())
+
+        # Helpers
+        def smooth_series(s):
+            try:
+                w = max(1, int(smoothing_window or 1))
+                if w > 1:
+                    return s.rolling(window=w, min_periods=1).mean()
+            except Exception:
+                pass
+            return s
+
+        ts = (
+            convert_ts_for_display(df["timestamp"], display_tz, display_tz_custom, data_tz)
+            if (not df.empty and "timestamp" in df.columns)
+            else pd.Series([])
+        )
+
+        # Build display timezone label and offset for UI
+        def display_tz_info():
+            try:
+                label = "UTC"
+                tzinfo = None
+                if display_tz == "local" and tzlocal is not None:
+                    tzinfo = tzlocal()
+                    label = "Local"
+                elif display_tz == "custom" and display_tz_custom and gettz is not None:
+                    tzinfo = gettz(display_tz_custom)
+                    label = f"Custom ({display_tz_custom})"
+                else:
+                    from dateutil.tz import tzutc
+
+                    tzinfo = tzutc()
+                    label = "UTC"
+                now_dt = datetime.now(tzinfo) if tzinfo is not None else datetime.utcnow()
+                offset = now_dt.utcoffset() or pd.Timedelta(0)
+                total_minutes = int(offset.total_seconds() // 60)
+                sign = "+" if total_minutes >= 0 else "-"
+                hh = abs(total_minutes) // 60
+                mm = abs(total_minutes) % 60
+                offset_str = f"UTC{sign}{hh:02d}:{mm:02d}"
+                # Try to get a friendly tz name
+                tzname = now_dt.tzname() if tzinfo is not None else "UTC"
+                return label, tzname, offset_str
+            except Exception:
+                return "UTC", "UTC", "UTC+00:00"
+
+        disp_label, disp_name, disp_offset = display_tz_info()
+
+        # KPI row (compact, minimal)
+        def kpi_card(label, value, spark_fig=None):
+            return html.Div(
+                [
+                    html.Div(label, style={"fontSize": "12px", "opacity": 0.7}),
+                    html.Div(value, style={"fontSize": "18px", "fontWeight": "600"}),
+                    (dcc.Graph(figure=spark_fig, style={"height": "36px"}) if spark_fig is not None else None),
+                ],
+                style={"border": "1px solid #333", "borderRadius": "6px", "padding": "8px", "marginRight": "8px"},
+            )
+
+        kpi_children = [kpi_card("Samples", f"{len(df)}")]
+
+        # Add key latest metrics if present (Tufte-inspired: high information density, no chartjunk)
+        try:
+            if not df.empty:
+                latest_row = df.iloc[-1]
+                # CPU % (aggregate)
+                cpu_cols = [c for c in df.columns if c.startswith("cpu_") and c.endswith("_utilization")]
+                if cpu_cols:
+                    cpu_latest = float(pd.to_numeric(latest_row[cpu_cols], errors="coerce").mean())
+                    # sparkline for CPU mean
+                    try:
+                        cpu_mean_series = pd.to_numeric(df[cpu_cols], errors="coerce").mean(axis=1)
+                        cpu_spark = make_sparkline(ts, cpu_mean_series, theme_value)
+                    except Exception:
+                        cpu_spark = None
+                    kpi_children.append(kpi_card("CPU %", f"{cpu_latest:0.1f}", cpu_spark))
+                # Mem %
+                if {"sys_used", "sys_total"}.issubset(df.columns):
+                    try:
+                        mem_latest = float(latest_row["sys_used"]) / float(latest_row["sys_total"]) * 100.0
+                        mem_latest = max(0.0, min(100.0, mem_latest))
+                        try:
+                            mem_pct_series = (df["sys_used"] / df["sys_total"] * 100.0).clip(lower=0, upper=100)
+                            mem_spark = make_sparkline(ts, mem_pct_series, theme_value)
+                        except Exception:
+                            mem_spark = None
+                        kpi_children.append(kpi_card("Mem %", f"{mem_latest:0.1f}", mem_spark))
+                    except Exception:
+                        pass
+                # Processes / Threads
+                if "system_process_count" in df.columns:
+                    try:
+                        kpi_children.append(kpi_card("Procs", f"{int(latest_row['system_process_count'])}"))
+                    except Exception:
+                        pass
+                if "system_thread_count" in df.columns:
+                    try:
+                        kpi_children.append(kpi_card("Threads", f"{int(latest_row['system_thread_count'])}"))
+                    except Exception:
+                        pass
+                # Net MB/s (sum up/down)
+                rx_col = "net_bytes_recv_per_sec" if "net_bytes_recv_per_sec" in df.columns else None
+                tx_col = "net_bytes_sent_per_sec" if "net_bytes_sent_per_sec" in df.columns else None
+                if rx_col or tx_col:
+                    try:
+                        rx = float(latest_row.get(rx_col, 0.0) or 0.0)
+                        tx = float(latest_row.get(tx_col, 0.0) or 0.0)
+                        mbps = (rx + tx) / (1024**2)
+                        try:
+                            rx_series = df[rx_col] if rx_col in df.columns else 0.0
+                            tx_series = df[tx_col] if tx_col in df.columns else 0.0
+                            net_series = (
+                                pd.to_numeric(rx_series, errors="coerce").fillna(0)
+                                + pd.to_numeric(tx_series, errors="coerce").fillna(0)
+                            ) / (1024**2)
+                            net_spark = make_sparkline(ts, net_series, theme_value)
+                        except Exception:
+                            net_spark = None
+                        kpi_children.append(kpi_card("Net MB/s", f"{mbps:0.2f}", net_spark))
+                    except Exception:
+                        pass
+                # Disk MB/s (sum r+w)
+                r_col = "disk_read_bytes_per_sec" if "disk_read_bytes_per_sec" in df.columns else None
+                w_col = "disk_write_bytes_per_sec" if "disk_write_bytes_per_sec" in df.columns else None
+                if r_col or w_col:
+                    try:
+                        r = float(latest_row.get(r_col, 0.0) or 0.0)
+                        w = float(latest_row.get(w_col, 0.0) or 0.0)
+                        mbps = (r + w) / (1024**2)
+                        try:
+                            r_series = df[r_col] if r_col in df.columns else 0.0
+                            w_series = df[w_col] if w_col in df.columns else 0.0
+                            io_series = (
+                                pd.to_numeric(r_series, errors="coerce").fillna(0)
+                                + pd.to_numeric(w_series, errors="coerce").fillna(0)
+                            ) / (1024**2)
+                            io_spark = make_sparkline(ts, io_series, theme_value)
+                        except Exception:
+                            io_spark = None
+                        kpi_children.append(kpi_card("Disk MB/s", f"{mbps:0.2f}", io_spark))
+                    except Exception:
+                        pass
+        except Exception:
+            pass
+
+        # Build component registry and figures using strategy pattern
+        # Determine selected containers first (support old and new docker column names)
+        all_containers = docker_container_names(df) if (not df.empty) else []
+        if all_containers:
+            if auto_top and "auto" in (auto_top or []):
+                latest = df.iloc[-1]
+                scored = []
+                for name in all_containers:
+                    col = docker_pick_col(df, name, "cpu_percent", "container_cpu_percent")
+                    if col:
+                        scored.append((name, latest[col]))
+                selected_containers = [
+                    n for n, _ in sorted(scored, key=lambda x: x[1], reverse=True)[: int(top_n or 5)]
+                ]
+            else:
+                selected_containers = selected_manual or []
+            if not selected_containers:
+                selected_containers = all_containers[: min(5, len(all_containers))]
+        else:
+            selected_containers = []
+
+        ctx_obj = GraphContext(smoothing_window, apply_theme, smooth_series)
+        params = {"selected_containers": selected_containers}
+
+        components = [
+            OverviewGraph(),
+            CPUIndividualGraph(),
+            CPUAggregateGraph(),
+            MemoryGraph(),
+            FileCountGraph(),
+            FDUsageGraph(),
+            NetworkGraph(),
+            DiskIOGraph(),
+            GPUUtilGraph(),
+            GPUMemoryGraph(),
+            ContainerCPUGraph(),
+            ContainerMemGraph(),
+            ContainerFilesGraph(),
+            ContainerNetGraph(),
+            ContainerIOGraph(),
+            TopContainersCPUBar(),
+            TopContainersMemBar(),
+            ProcessCountGraph(),
+            ThreadCountGraph(),
+        ]
+
+        # Build figures map by id
+        figures_by_id = {c.component_id: go.Figure() for c in components}
+        if not df.empty and "timestamp" in df.columns:
+            for comp in components:
+                try:
+                    figures_by_id[comp.component_id] = comp.build(df, ts, ctx_obj, params)
+                except Exception:
+                    figures_by_id[comp.component_id] = go.Figure()
+
+        # Merge events and theme
+        merged_events = (events_data or []) + (auto_events or [])
+        # Add events KPI
+        try:
+            evt_count = len(merged_events)
+        except Exception:
+            evt_count = 0
+        kpi_children.append(
+            html.Div(
+                [
+                    html.Div("Events", style={"fontSize": "12px", "opacity": 0.7}),
+                    html.Div(f"{evt_count}", style={"fontSize": "18px", "fontWeight": "600"}),
+                ],
+                style={"border": "1px solid #333", "borderRadius": "6px", "padding": "8px"},
+            )
+        )
+        # Compute visible event time bounds to ensure markers are in-range
+        evt_ts = event_times_for_display(merged_events, display_tz, display_tz_custom)
+        # Removed unused figs_all variable (was redundant with time_series_figs and not referenced)
+        # Time-series figs exclude categorical bar charts (top_*). Bar charts won't get event lines.
+        # Post-process: apply x-axis type, include events and theme to time-series figs
+        time_series_ids = [
+            "system-overview-graph",
+            "cpu-individual-utilization-graph",
+            "cpu-aggregated-utilization-graph",
+            "memory-graph",
+            "file-count-graph",
+            "fd-usage-graph",
+            "network-graph",
+            "disk-io-graph",
+            "gpu-utilization-graph",
+            "gpu-memory-graph",
+            "container-cpu-utilization-graph",
+            "container-memory-utilization-graph",
+            "container-files-graph",
+            "container-net-graph",
+            "container-io-graph",
+            "process-count-graph",
+            "thread-count-graph",
+        ]
+        markers_enabled = True
+        try:
+            markers_enabled = (event_display_options is None) or ("markers" in (event_display_options or []))
+        except Exception:
+            markers_enabled = True
+
+        for fig_id in time_series_ids:
+            f = figures_by_id.get(fig_id, go.Figure())
+            # Ensure date x-axis for proper placement of vertical lines
+            f.update_xaxes(type="date")
+            # If we have both data ts and event ts, expand range to include both
+            try:
+                if len(ts) > 0 and len(evt_ts) > 0:
+                    xmin = min(pd.to_datetime(ts.min()), pd.to_datetime(evt_ts.min()))
+                    xmax = max(pd.to_datetime(ts.max()), pd.to_datetime(evt_ts.max()))
+                    # Add small padding
+                    pad = pd.Timedelta(seconds=1)
+                    f.update_xaxes(range=[xmin - pad, xmax + pad])
+                elif len(ts) == 0 and len(evt_ts) > 0:
+                    # No data, but we do have events: center axis around events so markers are visible
+                    xmin = pd.to_datetime(evt_ts.min())
+                    xmax = pd.to_datetime(evt_ts.max())
+                    if xmin == xmax:
+                        xmax = xmin + pd.Timedelta(minutes=1)
+                    pad = pd.Timedelta(seconds=1)
+                    f.update_xaxes(range=[xmin - pad, xmax + pad])
+            except Exception:
+                pass
+            if markers_enabled:
+                add_event_markers(f, merged_events, display_tz, display_tz_custom)
+            apply_theme(f, theme_value)
+            style_minimal_figure(f, theme_value)
+            # Always show legends for clarity (after styling which doesn't override legend)
+            f.update_layout(
+                showlegend=True,
+                legend=dict(orientation="h", x=0.0, y=1.0, yanchor="top"),
+                margin=dict(l=40, r=10, t=40, b=28),
+            )
+
+        # Process Tree handled by manual callback; nothing to build here
+
+        # Apply theme to bar charts as well
+        for fig_id in ["container-top-cpu-graph", "container-top-mem-graph"]:
+            f = figures_by_id.get(fig_id, go.Figure())
+            apply_theme(f, theme_value)
+            style_minimal_figure(f, theme_value)
+            f.update_layout(showlegend=True)
+
+        # Page style per theme
+        page_style = {
+            "padding": "20px",
+            "backgroundColor": ("#111111" if theme_value == "dark" else "#ffffff"),
+            "color": ("#e5e5e5" if theme_value == "dark" else "#222222"),
+        }
+
+        return (
+            page_style,
+            f"Data source: {data_path or datafile} | Data TZ: UTC-naive (stored); Displayed in:"
+            f" {disp_label} ({disp_name}, {disp_offset})",
+            f"Last updated: {last_timestamp} | Display TZ: {disp_label} ({disp_name}, {disp_offset})",
+            kpi_children,
+            figures_by_id["system-overview-graph"],
+            figures_by_id["cpu-individual-utilization-graph"],
+            figures_by_id["cpu-aggregated-utilization-graph"],
+            figures_by_id["memory-graph"],
+            figures_by_id["file-count-graph"],
+            figures_by_id["fd-usage-graph"],
+            figures_by_id["network-graph"],
+            figures_by_id["disk-io-graph"],
+            figures_by_id["gpu-utilization-graph"],
+            figures_by_id["gpu-memory-graph"],
+            figures_by_id["container-cpu-utilization-graph"],
+            figures_by_id["container-memory-utilization-graph"],
+            figures_by_id["container-files-graph"],
+            figures_by_id["container-net-graph"],
+            figures_by_id["container-io-graph"],
+            figures_by_id["container-top-cpu-graph"],
+            figures_by_id["container-top-mem-graph"],
+            figures_by_id["process-count-graph"],
+            figures_by_id["thread-count-graph"],
+        )
+
+    # (migrated) Manual process tree callback registered in callbacks/proctree_impl.py
+
+    # (migrated) proctree style callbacks registered in callbacks/proctree.py
+
+    # (migrated) proctree view toggle registered in callbacks/proctree.py
+
+    # (migrated) proctree Cytoscape callbacks registered in callbacks/proctree.py
+
+    # (migrated) proctree status helper registered in callbacks/proctree.py
+
+    # (migrated) proctree snapshot callback registered in callbacks/proctree.py
+
+    # (migrated) proctree diff callback registered in callbacks/proctree.py
+
+    # Populate container dropdown options dynamically
+    @app.callback(
+        Output("container-select", "options"),
+        [Input("interval-component", "n_intervals"), Input("time-range", "value"), Input("datafile-store", "data")],
+    )
+    def update_container_options(n, time_range, data_path):
+        try:
+            df = load_data(data_path or datafile, time_range)
+            if df.empty:
+                return []
+            names = sorted(
+                {
+                    col.replace("_container_cpu_percent", "")
+                    for col in df.columns
+                    if col.endswith("_container_cpu_percent")
+                }
+            )
+            return [{"label": name, "value": name} for name in names]
+        except Exception:
+            return []
+
+    # Clientside callback to sync CSS theme via data-theme attribute
+    app.clientside_callback(
+        """
+        function(theme){
+            var isLight = (theme === 'light');
+            var body = document.body, html = document.documentElement;
+            if (isLight) { body.setAttribute('data-theme','light'); html.setAttribute('data-theme','light'); }
+            else { body.removeAttribute('data-theme'); html.removeAttribute('data-theme'); }
+            return '';
+        }
+        """,
+        Output("body-bg-sync", "children"),
+        Input("theme-toggle", "value"),
+    )
+
+    # Manage events: add, add-now, clear
+    @app.callback(
+        [Output("event-store", "data"), Output("event-list", "children")],
+        [
+            Input("add-event-btn", "n_clicks"),
+            Input("add-event-now-btn", "n_clicks"),
+            Input("clear-events-btn", "n_clicks"),
+            Input("event-upload", "contents"),
+            Input({"type": "event-delete", "index": ALL}, "n_clicks"),
+        ],
+        [
+            State("event-name", "value"),
+            State("event-date", "date"),
+            State("event-time", "value"),
+            State("event-store", "data"),
+            State("event-auto-store", "data"),
+            State("time-range", "value"),
+            State("display-tz", "value"),
+            State("display-tz-custom", "value"),
+            State("datafile-store", "data"),
+        ],
+    )
+    def manage_events(
+        n_add,
+        n_now,
+        n_clear,
+        upload_contents,
+        delete_clicks,
+        name,
+        date_val,
+        time_val,
+        data,
+        auto_data,
+        time_range,
+        display_tz,
+        display_tz_custom,
+        data_path,
+    ):
+        data = data or []
+        triggered = getattr(dash, "callback_context", None)
+        trig_id = ""
+        if triggered and triggered.triggered:
+            trig_id = triggered.triggered[0]["prop_id"].split(".")[0]
+
+        # Helper to get latest timestamp from datafile
+        def latest_data_ts():
+            df = load_data(data_path or datafile, time_range)
+            if not df.empty and "timestamp" in df.columns:
+                try:
+                    return df["timestamp"].max()
+                except Exception:
+                    pass
+            # Fallback: now in selected display timezone (naive), will be normalized to UTC-naive below
+            return pd.Timestamp(datetime.now())
+
+        def selected_tzinfo():
+            if display_tz == "utc":
+                try:
+                    from dateutil.tz import tzutc
+
+                    return tzutc()
+                except Exception:
+                    return None
+            if display_tz == "custom" and display_tz_custom and gettz is not None:
+                tz = gettz(display_tz_custom)
+                if tz is not None:
+                    return tz
+            return tzlocal() if tzlocal is not None else None
+
+        def normalize_ts_to_utc_naive(ts_any):
+            ts = pd.to_datetime(ts_any)
+            # If tz-aware: convert to UTC then drop tz
+            if getattr(ts, "tzinfo", None) is not None and ts.tzinfo is not None:
+                try:
+                    ts = ts.tz_convert("UTC").tz_localize(None)
+                except Exception:
+                    try:
+                        # If tz_convert fails, maybe it's offset-naive; localize first assuming UTC
+                        ts = ts.tz_localize("UTC").tz_localize(None)
+                    except Exception:
+                        pass
+                return ts
+            # tz-naive: assume in the selected display tz, convert to UTC, drop tz
+            try:
+                tzinf = selected_tzinfo()
+                if tzinf is not None:
+                    ts_loc = ts.tz_localize(tzinf)
+                    ts = ts_loc.tz_convert("UTC").tz_localize(None)
+                    return ts
+            except Exception:
+                pass
+            return ts
+
+        def from_text_to_utc_naive(val):
+            if not val:
+                raise ValueError("no datetime provided")
+            base = pd.to_datetime(val)
+            # Treat input as wall time in selected display tz; convert to UTC, then drop tz
+            tzinf = selected_tzinfo()
+            if tzinf is not None:
+                try:
+                    base_loc = base.tz_localize(tzinf)
+                    base = base_loc.tz_convert("UTC").tz_localize(None)
+                except Exception:
+                    pass
+            return base
+
+        # Handle per-item delete via pattern-matching id
+        if trig_id.startswith("{") and "event-delete" in trig_id:
+            try:
+                obj = json.loads(trig_id)
+                idx = int(obj.get("index", -1))
+            except Exception:
+                idx = -1
+            if 0 <= idx < len(data):
+                data.pop(idx)
+            return data, render_event_list(data)
+
+        if trig_id == "clear-events-btn":
+            return [], html.Div("No events", style={"opacity": 0.7})
+
+        if trig_id in ("add-event-btn", "add-event-now-btn"):
+            evt_name = (name or "").strip() or "event"
+            if trig_id == "add-event-now-btn":
+                # Use current time in selected tz -> convert to UTC-naive for storage
+                now_dt = pd.Timestamp(datetime.now())
+                tzinf = selected_tzinfo()
+                if tzinf is not None:
+                    try:
+                        now_dt = now_dt.tz_localize(tzinf).tz_convert("UTC").tz_localize(None)
+                    except Exception:
+                        pass
+                ts = now_dt
+            else:
+                # Combine date + time into a single wall time in selected display tz
+                if date_val:
+                    t_str = (time_val or "00:00:00").strip()
+                    # normalize time format HH:MM[:SS]
+                    parts = t_str.split(":")
+                    if len(parts) == 1:
+                        t_str = f"{parts[0]}:00:00"
+                    elif len(parts) == 2:
+                        t_str = f"{parts[0]}:{parts[1]}:00"
+                    try:
+                        ts = from_text_to_utc_naive(f"{date_val} {t_str}")
+                    except Exception:
+                        ts = latest_data_ts()
+                else:
+                    ts = latest_data_ts()
+            # Normalize to isoformat string
+            try:
+                ts_norm = normalize_ts(ts)
+                ts_str = pd.to_datetime(ts_norm).strftime("%Y-%m-%d %H:%M:%S")
+            except Exception:
+                ts_str = str(ts)
+            new_data = data + [{"name": evt_name, "timestamp": ts_str}]
+            return new_data, render_event_list(new_data)
+
+        if trig_id == "event-upload":
+            try:
+                content_type, content_string = upload_contents.split(",")
+                decoded = base64.b64decode(content_string)
+                text = decoded.decode("utf-8", errors="ignore")
+                reader = csv.reader(io.StringIO(text))
+                imported = []
+                for row in reader:
+                    if not row:
+                        continue
+                    if len(row) == 1:
+                        # try split by comma manually
+                        parts = row[0].split(",")
+                        if len(parts) >= 2:
+                            row = [parts[0], ",".join(parts[1:])]
+                        else:
+                            continue
+                    evt_name = (row[0] or "").strip() or "event"
+                    ts_text = (row[1] or "").strip()
+                    if not ts_text:
+                        continue
+                    try:
+                        ts = normalize_ts_to_utc_naive(ts_text)
+                        ts_str = pd.to_datetime(ts).strftime("%Y-%m-%d %H:%M:%S")
+                    except Exception:
+                        ts_str = str(ts_text)
+                    imported.append({"name": evt_name, "timestamp": ts_str})
+                new_data = (data or []) + imported
+                return new_data, render_event_list(new_data)
+            except Exception:
+                return data, render_event_list(data)
+
+        # default: just render existing
+        return data, render_event_list(data)
+
+    # Auto event triggers (watch points)
+    @app.callback(
+        [Output("event-auto-store", "data"), Output("watch-state-store", "data")],
+        [Input("interval-component", "n_intervals"), Input("clear-auto-events-btn", "n_clicks")],
+        [
+            State("watch-enable", "value"),
+            State("watch-cpu", "value"),
+            State("watch-mem", "value"),
+            State("watch-threads", "value"),
+            State("watch-procs", "value"),
+            State("event-auto-store", "data"),
+            State("watch-state-store", "data"),
+            State("datafile-store", "data"),
+            State("time-range", "value"),
+        ],
+    )
+    def apply_watch_points(
+        n,
+        n_clear_auto,
+        enable_vals,
+        thr_cpu,
+        thr_mem,
+        thr_thr,
+        thr_prc,
+        auto_events,
+        watch_state,
+        data_path,
+        time_range,
+    ):
+        auto_events = auto_events or []
+        watch_state = watch_state or {"cpu": False, "mem": False, "threads": False, "procs": False}
+        enabled = set(enable_vals or [])
+        # If clear button triggered, reset auto events and watch state immediately
+        triggered = getattr(dash, "callback_context", None)
+        trig_id = ""
+        if triggered and triggered.triggered:
+            trig_id = triggered.triggered[0]["prop_id"].split(".")[0]
+        if trig_id == "clear-auto-events-btn":
+            # Clear auto events and set watch_state according to current readings
+            try:
+                df_now = load_data(data_path or datafile, time_range)
+            except Exception:
+                return [], watch_state
+            if df_now.empty or "timestamp" not in df_now.columns:
+                return [], watch_state
+            latest_now = df_now.iloc[-1]
+            # Compute over-threshold flags to avoid immediate re-trigger if still over
+            new_state = {"cpu": False, "mem": False, "threads": False, "procs": False}
+            try:
+                if "cpu" in enabled and thr_cpu is not None:
+                    cpu_cols = [c for c in df_now.columns if c.startswith("cpu_") and c.endswith("_utilization")]
+                    cpu_avg = float(latest_now[cpu_cols].mean()) if cpu_cols else None
+                    new_state["cpu"] = cpu_avg is not None and cpu_avg >= float(thr_cpu)
+            except Exception:
+                pass
+            try:
+                if "mem" in enabled and thr_mem is not None:
+                    used = float(latest_now.get("sys_used", 0.0))
+                    total = float(latest_now.get("sys_total", 0.0))
+                    mem_pct = (used / total * 100.0) if total > 0 else None
+                    new_state["mem"] = mem_pct is not None and mem_pct >= float(thr_mem)
+            except Exception:
+                pass
+            try:
+                if "threads" in enabled and thr_thr is not None:
+                    thr_count = float(latest_now.get("system_thread_count", 0))
+                    new_state["threads"] = thr_count >= float(thr_thr)
+            except Exception:
+                pass
+            try:
+                if "procs" in enabled and thr_prc is not None:
+                    prc_count = float(latest_now.get("system_process_count", 0))
+                    new_state["procs"] = prc_count >= float(thr_prc)
+            except Exception:
+                pass
+            return [], new_state
+        try:
+            df = load_data(data_path or datafile, time_range)
+        except Exception:
+            return auto_events, watch_state
+        if df.empty or "timestamp" not in df.columns:
+            return auto_events, watch_state
+        latest = df.iloc[-1]
+        ts = latest.get("timestamp")
+        try:
+            ts_norm = pd.to_datetime(normalize_ts(ts)).strftime("%Y-%m-%d %H:%M:%S")
+        except Exception:
+            ts_norm = str(ts)
+
+        new_events = []
+        # CPU avg threshold
+        if "cpu" in enabled and thr_cpu is not None:
+            try:
+                cpu_cols = [c for c in df.columns if c.startswith("cpu_") and c.endswith("_utilization")]
+                cpu_avg = float(latest[cpu_cols].mean()) if cpu_cols else None
+                if cpu_avg is not None:
+                    over = cpu_avg >= float(thr_cpu)
+                    if over and not watch_state.get("cpu", False):
+                        new_events.append(
+                            {
+                                "name": f"CPU% > {float(thr_cpu):.0f} (now {cpu_avg:.1f})",
+                                "timestamp": ts_norm,
+                            }
+                        )
+                    watch_state["cpu"] = over
+            except Exception:
+                pass
+        # Memory percent threshold
+        if "mem" in enabled and thr_mem is not None:
+            try:
+                used = float(latest.get("sys_used", 0.0))
+                total = float(latest.get("sys_total", 0.0))
+                mem_pct = (used / total * 100.0) if total > 0 else None
+                if mem_pct is not None:
+                    over = mem_pct >= float(thr_mem)
+                    if over and not watch_state.get("mem", False):
+                        new_events.append(
+                            {
+                                "name": f"Mem% > {float(thr_mem):.0f} (now {mem_pct:.1f})",
+                                "timestamp": ts_norm,
+                            }
+                        )
+                    watch_state["mem"] = over
+            except Exception:
+                pass
+        # Threads threshold
+        if "threads" in enabled and thr_thr is not None:
+            try:
+                thr_count = float(latest.get("system_thread_count", 0))
+                over = thr_count >= float(thr_thr)
+                if over and not watch_state.get("threads", False):
+                    new_events.append(
+                        {
+                            "name": f"Threads > {float(thr_thr):.0f} (now {thr_count:.0f})",
+                            "timestamp": ts_norm,
+                        }
+                    )
+                watch_state["threads"] = over
+            except Exception:
+                pass
+        # Processes threshold
+        if "procs" in enabled and thr_prc is not None:
+            try:
+                prc_count = float(latest.get("system_process_count", 0))
+                over = prc_count >= float(thr_prc)
+                if over and not watch_state.get("procs", False):
+                    new_events.append(
+                        {
+                            "name": f"Processes > {float(thr_prc):.0f} (now {prc_count:.0f})",
+                            "timestamp": ts_norm,
+                        }
+                    )
+                watch_state["procs"] = over
+            except Exception:
+                pass
+
+        if new_events:
+            return (auto_events + new_events), watch_state
+        return auto_events, watch_state
+
+    # ----------------------------
+    # Tracer integration
+    # ----------------------------
+    import threading
+
+    _tracer_lock = threading.Lock()
+    _tracer_obj = {"tracer": None, "thread": None}  # type: ignore
+
+    def _is_running():
+        t = _tracer_obj.get("thread")
+        return t is not None and t.is_alive()
+
+    @app.callback(
+        [Output("tracer-status", "children"), Output("datafile-store", "data")],
+        [
+            Input("tracer-start-btn", "n_clicks"),
+            Input("tracer-stop-btn", "n_clicks"),
+            Input("tracer-reset-btn", "n_clicks"),
+            Input("tracer-snapshot-btn", "n_clicks"),
+        ],
+        [
+            State("tracer-output-path", "value"),
+            State("tracer-sample-interval", "value"),
+            State("tracer-write-interval", "value"),
+            State("tracer-options", "value"),
+            State("datafile-store", "data"),
+        ],
+        prevent_initial_call=True,
+    )
+    def manage_tracer(n_start, n_stop, n_reset, n_snap, out_path, sample_iv, write_iv, options, current_path):
+        trig = getattr(dash, "callback_context", None)
+        trig_id = ""
+        if trig and trig.triggered:
+            trig_id = trig.triggered[0]["prop_id"].split(".")[0]
+
+        out_path = (out_path or current_path or datafile).strip()
+        enable_gpu = (options or []) and ("gpu" in (options or []))
+        enable_docker = (options or []) and ("docker" in (options or []))
+        use_utc = (options or []) and ("utc" in (options or []))
+
+        if trig_id == "tracer-start-btn":
+            with _tracer_lock:
+                if _is_running():
+                    return (f"Tracer already running -> {out_path}", out_path)
+                tracer = SystemTracer(
+                    sample_interval=float(sample_iv or 5.0),
+                    # Keep data in memory when running from dashboard; only snapshot persists
+                    write_interval=0.0,
+                    output_file=out_path,
+                    enable_gpu=bool(enable_gpu),
+                    enable_docker=bool(enable_docker),
+                    use_utc=bool(use_utc),
+                    write_final=False,
+                )
+                th = threading.Thread(target=tracer.run, kwargs={"duration": None, "verbose": False}, daemon=True)
+                _tracer_obj["tracer"] = tracer
+                _tracer_obj["thread"] = th
+                th.start()
+            return (f"Tracer started -> {out_path}", out_path)
+
+        if trig_id == "tracer-stop-btn":
+            with _tracer_lock:
+                tracer = _tracer_obj.get("tracer")
+                th = _tracer_obj.get("thread")
+                if tracer is not None:
+                    try:
+                        tracer.stop()
+                    except Exception:
+                        pass
+                if th is not None:
+                    try:
+                        th.join(timeout=2.0)
+                    except Exception:
+                        pass
+                _tracer_obj["tracer"] = None
+                _tracer_obj["thread"] = None
+            return ("Tracer stopped.", out_path)
+
+        if trig_id == "tracer-reset-btn":
+            with _tracer_lock:
+                tracer = _tracer_obj.get("tracer")
+                if tracer is not None:
+                    try:
+                        tracer.reset()
+                    except Exception:
+                        pass
+            return ("Tracer buffer reset.", out_path)
+
+        if trig_id == "tracer-snapshot-btn":
+            with _tracer_lock:
+                tracer = _tracer_obj.get("tracer")
+                if tracer is not None:
+                    try:
+                        tracer.set_output_file(out_path)
+                        path = tracer.snapshot(out_path)
+                        return (f"Snapshot saved to {path}", path)
+                    except Exception as e:
+                        return (f"Snapshot failed: {e}", out_path)
+            # If tracer not running, still write empty/new file to path
+            try:
+                # Create a unique suffixed filename if destination exists
+                base_path = (out_path or "system_monitor.parquet").strip()
+                root, ext = os.path.splitext(base_path)
+                if not ext:
+                    ext = ".parquet"
+                    root = base_path  # original base without extension
+                    base_path = base_path + ext
+                path = base_path
+                if os.path.exists(path):
+                    idx = 0
+                    while True:
+                        candidate = f"{root}_{idx}{ext}"
+                        if not os.path.exists(candidate):
+                            path = candidate
+                            break
+                        idx += 1
+                pd.DataFrame([]).to_parquet(path)
+                return (f"Snapshot (empty) saved to {path}", path)
+            except Exception as e:
+                return (f"Snapshot failed: {e}", out_path)
+
+        return ("", out_path)
+
+    # Start the server
+    print(f"Starting dashboard server on http://{host}:{port}/")
+    print(f"Using data file: {datafile}")
+    print(f"Refresh interval: {interval} seconds")
+    print("Press Ctrl+C to stop the server")
+    app.run(debug=debug, host=host, port=port)
+
+
+if __name__ == "__main__":
+    run_dashboard()  # This invokes the Click command
diff --git a/scripts/support/system_monitor/system_tracer.py b/scripts/support/system_monitor/system_tracer.py
new file mode 100644
index 000000000..1b08fef7d
--- /dev/null
+++ b/scripts/support/system_monitor/system_tracer.py
@@ -0,0 +1,980 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import time
+import json
+import argparse
+from typing import Optional, Dict, Any, List
+import threading
+import os
+
+import pandas as pd
+import psutil
+
+try:
+    import pynvml  # type: ignore
+except Exception:  # NVML is optional
+    pynvml = None  # type: ignore
+try:
+    import docker  # type: ignore
+except Exception:  # Docker is optional
+    docker = None  # type: ignore
+import subprocess
+
+# Add these to your requirements.txt:
+# pyarrow>=12.0.0
+# fastparquet>=2023.2.0
+
+# --- Helpers to mirror docker stats behavior ---
+# Keep names internal to avoid changing external API
+
+
+def _docker_cpu_percent(stats: dict) -> float:
+    """Compute CPU% similar to `docker stats` using precpu_stats.
+    Falls back gracefully if fields are missing.
+    """
+    try:
+        cpu_stats = stats.get("cpu_stats", {})
+        precpu_stats = stats.get("precpu_stats", {})
+
+        cpu_total = cpu_stats.get("cpu_usage", {}).get("total_usage")
+        pre_cpu_total = precpu_stats.get("cpu_usage", {}).get("total_usage")
+
+        system_total = cpu_stats.get("system_cpu_usage")
+        pre_system_total = precpu_stats.get("system_cpu_usage")
+
+        if cpu_total is None or pre_cpu_total is None or system_total is None or pre_system_total is None:
+            return 0.0
+
+        cpu_delta = cpu_total - pre_cpu_total
+        system_delta = system_total - pre_system_total
+
+        # Prefer online_cpus when available (cgroup v2 aware); otherwise percpu length
+        online_cpus = cpu_stats.get("online_cpus")
+        if not online_cpus:
+            percpu = cpu_stats.get("cpu_usage", {}).get("percpu_usage") or []
+            online_cpus = len(percpu) if percpu else (psutil.cpu_count() or 1)
+
+        if system_delta > 0 and cpu_delta > 0:
+            return (cpu_delta / system_delta) * online_cpus * 100.0
+        return 0.0
+    except Exception:
+        return 0.0
+
+
+def _docker_memory_usage_limit_percent(mem_stats: dict):
+    """Return (used_bytes, limit_bytes, percent) using docker's approach.
+    On cgroup v1: used = usage - cache. On v2: prefer inactive_file subtraction if present.
+    """
+    try:
+        usage = mem_stats.get("usage", 0) or 0
+        limit = mem_stats.get("limit", 0) or 0
+        stats = mem_stats.get("stats", {}) or {}
+
+        # Prefer inactive_file (cgroup v2) when present; otherwise cache (v1)
+        inactive_file = stats.get("inactive_file")
+        if inactive_file is None:
+            inactive_file = stats.get("total_inactive_file")
+        cache = stats.get("cache")
+
+        if inactive_file is not None:
+            used = max(usage - inactive_file, 0)
+        elif cache is not None:
+            used = max(usage - cache, 0)
+        else:
+            used = usage
+
+        percent = (used / limit * 100.0) if limit and limit > 0 else 0.0
+        return used, limit, percent
+    except Exception:
+        return 0, 0, 0.0
+
+
+def _aggregate_network_bytes(stats: dict):
+    """Sum rx/tx across all interfaces from docker stats JSON."""
+    rx = 0
+    tx = 0
+    try:
+        networks = stats.get("networks", {}) or {}
+        for _if, vals in networks.items():
+            rx += int(vals.get("rx_bytes", 0) or 0)
+            tx += int(vals.get("tx_bytes", 0) or 0)
+    except Exception:
+        pass
+    return rx, tx
+
+
+def _aggregate_blkio_bytes(stats: dict):
+    """Sum blkio read/write bytes from docker stats JSON."""
+    read = 0
+    write = 0
+    try:
+        entries = stats.get("blkio_stats", {}).get("io_service_bytes_recursive", []) or []
+        for e in entries:
+            op = (e.get("op") or "").lower()
+            val = int(e.get("value", 0) or 0)
+            if op == "read":
+                read += val
+            elif op == "write":
+                write += val
+    except Exception:
+        pass
+    return read, write
+
+
+class BaseCollector:
+    def collect(self) -> Dict[str, Any]:  # pragma: no cover - interface
+        return {}
+
+    def close(self) -> None:  # pragma: no cover - optional
+        pass
+
+
+class MemoryCollector(BaseCollector):
+    def collect(self) -> Dict[str, Any]:
+        mem = psutil.virtual_memory()
+        return {"sys_total": mem.total, "sys_used": mem.used, "sys_free": mem.free}
+
+
+class CPUCollector(BaseCollector):
+    def __init__(self, percpu: bool = True, interval: Optional[float] = None) -> None:
+        self.percpu = percpu
+        self.interval = interval
+
+    def collect(self) -> Dict[str, Any]:
+        utils = psutil.cpu_percent(percpu=self.percpu, interval=self.interval)
+        if self.percpu:
+            return {f"cpu_{i}_utilization": v for i, v in enumerate(utils)}
+        else:
+            return {"cpu_avg_utilization": utils}
+
+
+class OpenFilesCollector(BaseCollector):
+    def __init__(self, use_lsof_fallback: bool = True) -> None:
+        self.use_lsof_fallback = use_lsof_fallback
+
+    def collect(self) -> Dict[str, Any]:
+        try:
+            total_open_files = len(psutil.Process().net_connections())
+            for proc in psutil.process_iter(["pid", "name"]):
+                try:
+                    total_open_files += len(proc.open_files())
+                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                    pass
+
+            if self.use_lsof_fallback:
+                try:
+                    result = subprocess.run(["lsof", "-n"], capture_output=True, text=True)
+                    lsof_count = len(result.stdout.splitlines()) - 1
+                    total_open_files = max(total_open_files, lsof_count)
+                except (subprocess.SubprocessError, FileNotFoundError):
+                    pass
+
+            max_files = 0
+            max_files_process = "None"
+            for proc in psutil.process_iter(["pid", "name"]):
+                try:
+                    open_count = len(proc.open_files())
+                    if open_count > max_files:
+                        max_files = open_count
+                        max_files_process = f"{proc.name()}({proc.pid})"
+                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                    pass
+
+            try:
+                with open("/proc/sys/fs/file-max", "r") as f:
+                    fd_max = int(f.read().strip())
+                with open("/proc/sys/fs/file-nr", "r") as f:
+                    fd_used = int(f.read().split()[0])
+                fd_percentage = (fd_used / fd_max) * 100 if fd_max > 0 else 0
+            except (FileNotFoundError, ValueError, IndexError):
+                fd_max = 0
+                fd_used = 0
+                fd_percentage = 0
+
+            return {
+                "total_open_files": total_open_files,
+                "max_files_process": max_files_process,
+                "max_files_count": max_files,
+                "fd_used": fd_used,
+                "fd_max": fd_max,
+                "fd_usage_percent": fd_percentage,
+            }
+        except Exception as e:
+            print(f"Error getting open files count: {e}")
+            return {
+                "total_open_files": -1,
+                "max_files_process": f"Error: {str(e)}",
+                "max_files_count": -1,
+                "fd_used": -1,
+                "fd_max": -1,
+                "fd_usage_percent": -1,
+            }
+
+
+# -------- Process tree/thread inspector (Python equivalent of thread_checker.sh) --------
+def get_process_tree_summary(root_pid: int, verbose: bool = False) -> Dict[str, Any]:
+    """Return a summary of a process tree rooted at root_pid.
+
+    Provides per-process thread counts and command names, totals, and aggregation by command.
+    This mirrors the functionality of thread_checker.sh using psutil.
+    """
+    result: Dict[str, Any] = {
+        "root_pid": root_pid,
+        "processes": [],  # list of {pid, ppid, name, threads}
+        "totals": {"total_processes": 0, "total_threads": 0},
+        "aggregated_by_command": [],  # list of {command, processes, total_threads}
+        "verbose": verbose,
+    }
+    try:
+        if root_pid <= 0:
+            return result
+        try:
+            root = psutil.Process(root_pid)
+        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+            # Fallback: scan process table, locate root by pid and build tree using PPID relationships
+            all_infos: Dict[int, Dict[str, Any]] = {}
+            try:
+                for p in psutil.process_iter(attrs=["pid", "ppid", "name", "num_threads"]):
+                    info = p.info
+                    all_infos[info.get("pid")] = {
+                        "pid": info.get("pid"),
+                        "ppid": info.get("ppid"),
+                        "name": info.get("name") or "(unknown)",
+                        "threads": int(info.get("num_threads") or 0),
+                    }
+            except Exception:
+                pass
+            if root_pid not in all_infos:
+                return result
+            # Build children map
+            by_ppid: Dict[Optional[int], list] = {}
+            for it in all_infos.values():
+                by_ppid.setdefault(it.get("ppid"), []).append(it)
+            # DFS from root_pid to collect subtree
+            stack = [root_pid]
+            per_pid = []
+            total_threads = 0
+            seen = set()
+            while stack:
+                cur = stack.pop()
+                if cur in seen:
+                    continue
+                seen.add(cur)
+                ent = all_infos.get(cur)
+                if not ent:
+                    continue
+                per_pid.append(ent)
+                total_threads += ent.get("threads", 0)
+                for child in by_ppid.get(cur, []):
+                    cid = child.get("pid")
+                    if cid is not None:
+                        stack.append(cid)
+            result["processes"] = sorted(per_pid, key=lambda x: (x.get("ppid") or -1, x.get("pid") or -1))
+            result["totals"] = {"total_processes": len(per_pid), "total_threads": total_threads}
+            # Aggregate by command
+            agg: Dict[str, Dict[str, int]] = {}
+            for it in per_pid:
+                cmd = it.get("name") or "(unknown)"
+                ent = agg.setdefault(cmd, {"processes": 0, "total_threads": 0})
+                ent["processes"] += 1
+                ent["total_threads"] += int(it.get("threads") or 0)
+            result["aggregated_by_command"] = [
+                {"command": k, **v} for k, v in sorted(agg.items(), key=lambda kv: kv[1]["total_threads"], reverse=True)
+            ]
+            return result
+
+        # Gather all processes in the tree (root + recursive children)
+        procs = [root]
+        try:
+            procs.extend(root.children(recursive=True))
+        except Exception:
+            pass
+
+        per_pid = []
+        total_threads = 0
+        for p in procs:
+            if p is None:
+                continue
+            pid = None
+            ppid = None
+            name = None
+            threads = 0
+            try:
+                pid = p.pid
+            except Exception:
+                continue
+            try:
+                ppid = p.ppid()
+            except Exception:
+                ppid = None
+            try:
+                name = p.name()
+            except Exception:
+                name = "(access-denied)"
+            try:
+                threads = int(p.num_threads())
+            except Exception:
+                # If threads cannot be read due to permissions, treat as 0 but still include the process
+                threads = 0
+            info = {"pid": pid, "ppid": ppid, "name": name, "threads": threads}
+            per_pid.append(info)
+            total_threads += threads
+
+        result["processes"] = sorted(per_pid, key=lambda x: x["pid"])
+        result["totals"] = {"total_processes": len(per_pid), "total_threads": total_threads}
+
+        # Aggregate by command
+        agg: Dict[str, Dict[str, int]] = {}
+        for it in per_pid:
+            cmd = it["name"] or "(unknown)"
+            ent = agg.setdefault(cmd, {"processes": 0, "total_threads": 0})
+            ent["processes"] += 1
+            ent["total_threads"] += it["threads"]
+        result["aggregated_by_command"] = [
+            {"command": k, **v} for k, v in sorted(agg.items(), key=lambda kv: kv[1]["total_threads"], reverse=True)
+        ]
+    except Exception as e:
+        result["error"] = str(e)
+    return result
+
+
+class DiskIOCollector(BaseCollector):
+    def collect(self) -> Dict[str, Any]:
+        try:
+            io_counters = psutil.disk_io_counters()
+            return {
+                "disk_read_bytes": io_counters.read_bytes,
+                "disk_write_bytes": io_counters.write_bytes,
+                "disk_read_count": io_counters.read_count,
+                "disk_write_count": io_counters.write_count,
+                "disk_busy_time": io_counters.busy_time if hasattr(io_counters, "busy_time") else 0,
+            }
+        except Exception as e:
+            print(f"Error getting disk I/O stats: {e}")
+            return {
+                "disk_read_bytes": -1,
+                "disk_write_bytes": -1,
+                "disk_read_count": -1,
+                "disk_write_count": -1,
+                "disk_busy_time": -1,
+            }
+
+
+class NetworkCollector(BaseCollector):
+    def collect(self) -> Dict[str, Any]:
+        try:
+            net_io = psutil.net_io_counters()
+            return {
+                "net_bytes_sent": net_io.bytes_sent,
+                "net_bytes_recv": net_io.bytes_recv,
+                "net_packets_sent": net_io.packets_sent,
+                "net_packets_recv": net_io.packets_recv,
+                "net_errin": net_io.errin,
+                "net_errout": net_io.errout,
+                "net_dropin": net_io.dropin,
+                "net_dropout": net_io.dropout,
+            }
+        except Exception as e:
+            print(f"Error getting network stats: {e}")
+            return {
+                "net_bytes_sent": -1,
+                "net_bytes_recv": -1,
+                "net_packets_sent": -1,
+                "net_packets_recv": -1,
+                "net_errin": -1,
+                "net_errout": -1,
+                "net_dropin": -1,
+                "net_dropout": -1,
+            }
+
+
+class GPUCollector(BaseCollector):
+    def __init__(self) -> None:
+        self._inited = False
+        self._available = False
+
+    def _init(self):
+        if self._inited:
+            return
+        try:
+            pynvml.nvmlInit()
+            self._available = True
+        except Exception as e:
+            print(f"GPU monitoring not available: {e}")
+            self._available = False
+        finally:
+            self._inited = True
+
+    def collect(self) -> Dict[str, Any]:
+        self._init()
+        gpu_stats: Dict[str, Any] = {}
+        if not self._available:
+            return gpu_stats
+        try:
+            device_count = pynvml.nvmlDeviceGetCount()
+            for i in range(device_count):
+                try:
+                    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                    memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+                    utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
+                    gpu_stats[f"gpu_{i}_total"] = memory_info.total
+                    gpu_stats[f"gpu_{i}_used"] = memory_info.used
+                    gpu_stats[f"gpu_{i}_free"] = memory_info.free
+                    gpu_stats[f"gpu_{i}_utilization"] = utilization.gpu
+                    try:
+                        temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
+                        gpu_stats[f"gpu_{i}_temp"] = temp
+                    except:  # noqa: E722
+                        pass
+                except pynvml.NVMLError as e:
+                    print(f"Error retrieving info for GPU {i}: {e}")
+        except Exception as e:
+            print(f"Error initializing GPU monitoring: {e}")
+        return gpu_stats
+
+    def close(self) -> None:
+        if self._inited and self._available:
+            try:
+                pynvml.nvmlShutdown()
+            except:  # noqa: E722
+                pass
+            finally:
+                self._inited = False
+                self._available = False
+
+
+class ProcessThreadCollector(BaseCollector):
+    def collect(self) -> Dict[str, Any]:
+        proc_count = 0
+        thread_count = 0
+        try:
+            for proc in psutil.process_iter(["pid"]):
+                proc_count += 1
+                try:
+                    thread_count += proc.num_threads()
+                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                    pass
+        except Exception as e:
+            print(f"Error counting processes/threads: {e}")
+        return {"system_process_count": proc_count, "system_thread_count": thread_count}
+
+
+class DockerCollector(BaseCollector):
+    """Collect per-container stats and flatten into the row namespace.
+
+    Key format (clear, consistent):
+      <prefix>_<container>_<metric>
+    Example: docker_nginx_cpu_percent, docker_postgres_mem_used_bytes
+    """
+
+    def __init__(
+        self,
+        client: Optional["docker.DockerClient"] = None,
+        key_prefix: str = "docker",
+        separator: str = "_",
+    ) -> None:
+        self.client = client
+        self.key_prefix = key_prefix
+        self.separator = separator
+        if self.client is None:
+            try:
+                self.client = docker.from_env()
+            except Exception as e:
+                print("Error connecting to Docker daemon:", e)
+                self.client = None
+
+    def collect(self) -> Dict[str, Any]:
+        out: Dict[str, Any] = {}
+        if self.client is None:
+            return out
+        try:
+            containers = self.client.containers.list()
+            for container in containers:
+                try:
+                    stats_raw = container.stats(stream=False)
+                    stats = (
+                        json.loads(stats_raw.decode("utf-8"))
+                        if isinstance(stats_raw, (bytes, bytearray))
+                        else stats_raw
+                    )
+
+                    cpu_percent = _docker_cpu_percent(stats)
+                    used_bytes, limit_bytes, mem_percent = _docker_memory_usage_limit_percent(
+                        stats.get("memory_stats", {})
+                    )
+                    mem_usage_gb = used_bytes / (1024**3)  # noqa: F841
+                    mem_limit_gb = limit_bytes / (1024**3) if limit_bytes else 0  # noqa: F841
+
+                    rx_bytes, tx_bytes = _aggregate_network_bytes(stats)
+                    blk_read, blk_write = _aggregate_blkio_bytes(stats)
+
+                    # Best-effort open files from container init PID
+                    try:
+                        inspect_data = container.attrs
+                        pid = inspect_data.get("State", {}).get("Pid", 0)
+                        if pid and pid > 0:
+                            proc = psutil.Process(pid)
+                            open_files_count = len(proc.open_files())
+                        else:
+                            open_files_count = -1
+                    except Exception:
+                        open_files_count = -1
+
+                    cname = container.name
+                    sep = self.separator
+                    pref = f"{self.key_prefix}{sep}{cname}" if self.key_prefix else cname
+                    out.update(
+                        {
+                            f"{pref}{sep}cpu_percent": cpu_percent,
+                            # memory (expose both raw bytes and derived percent/limit)
+                            f"{pref}{sep}mem_used_bytes": int(used_bytes),
+                            f"{pref}{sep}mem_limit_bytes": int(limit_bytes),
+                            f"{pref}{sep}mem_percent": mem_percent,
+                            # open files (best-effort)
+                            f"{pref}{sep}open_files": open_files_count,
+                            # cumulative counters for per-second derivation
+                            f"{pref}{sep}net_rx_bytes": rx_bytes,
+                            f"{pref}{sep}net_tx_bytes": tx_bytes,
+                            f"{pref}{sep}blkio_read_bytes": blk_read,
+                            f"{pref}{sep}blkio_write_bytes": blk_write,
+                        }
+                    )
+                except Exception as e:
+                    print(f"Error retrieving stats for container {container.name}: {e}")
+        except Exception as e:
+            print("Error listing Docker containers:", e)
+        return out
+
+
+def calculate_deltas(current, previous, delta_keys):
+    deltas = {}
+    if previous:
+        for key in delta_keys:
+            if key in current and key in previous:
+                if isinstance(current[key], (int, float)) and isinstance(previous[key], (int, float)):
+                    time_diff = (current["timestamp"] - previous["timestamp"]).total_seconds()
+                    if time_diff > 0:
+                        delta_per_sec = (current[key] - previous[key]) / time_diff
+                        deltas[f"{key}_per_sec"] = delta_per_sec
+    return deltas
+
+
+class SystemTracer:
+    """Encapsulated system monitoring with configurable options.
+
+    Provides collection of system metrics, optional Docker and GPU stats,
+    delta computation for cumulative counters, and periodic Parquet writing.
+    """
+
+    def __init__(
+        self,
+        sample_interval: float = 5.0,
+        write_interval: float = 10.0,
+        output_file: str = "system_monitor.parquet",
+        enable_gpu: bool = True,
+        enable_docker: bool = True,
+        docker_client: Optional["docker.DockerClient"] = None,
+        collectors: Optional[List[BaseCollector]] = None,
+        use_utc: bool = False,
+        write_final: bool = True,
+    ) -> None:
+        self.sample_interval = sample_interval
+        self.write_interval = write_interval
+        self.output_file = output_file
+        self.enable_gpu = enable_gpu
+        self.enable_docker = enable_docker
+        self.data_buffer: List[Dict[str, Any]] = []
+        self.previous_row: Optional[Dict[str, Any]] = None
+        self.delta_keys: List[str] = [
+            "disk_read_bytes",
+            "disk_write_bytes",
+            "disk_read_count",
+            "disk_write_count",
+            "net_bytes_sent",
+            "net_bytes_recv",
+            "net_packets_sent",
+            "net_packets_recv",
+        ]
+        self.last_write_time = time.time()
+        self.docker_client = docker_client
+        self.use_utc = use_utc
+        self.write_final = write_final
+        if self.enable_docker and self.docker_client is None:
+            try:
+                self.docker_client = docker.from_env()
+            except Exception as e:
+                print(f"Docker client not available: {e}")
+                self.docker_client = None
+
+        # Initialize per-metric collectors (allow override)
+        if collectors is not None:
+            self.collectors = collectors
+        else:
+            self.collectors: List[BaseCollector] = [
+                MemoryCollector(),
+                CPUCollector(percpu=True, interval=None),
+                OpenFilesCollector(),
+                DiskIOCollector(),
+                NetworkCollector(),
+                ProcessThreadCollector(),
+            ]
+        self.gpu_collector: Optional[GPUCollector] = None
+        if self.enable_gpu:
+            self.gpu_collector = GPUCollector()
+            self.collectors.append(self.gpu_collector)
+        self.docker_collector: Optional[DockerCollector] = None
+        if self.enable_docker and self.docker_client is not None:
+            self.docker_collector = DockerCollector(client=self.docker_client)
+            self.collectors.append(self.docker_collector)
+        # Control flags/state
+        self._stop_event = threading.Event()
+
+    def _shutdown_gpu(self) -> None:
+        # Back-compat: close GPU collector if present
+        if self.gpu_collector is not None:
+            try:
+                self.gpu_collector.close()
+            except Exception:
+                pass
+
+    def write_parquet_to(self, df: pd.DataFrame, destination_path: str) -> None:
+        """Atomically write the dataframe to a specific parquet destination path.
+
+        Mirrors write_parquet but targets the provided destination rather than self.output_file.
+        """
+        tmp_path = f"{destination_path}.tmp"
+        # Try pyarrow first
+        try:
+            import pyarrow as pa  # type: ignore
+            import pyarrow.parquet as pq  # type: ignore
+
+            table = pa.Table.from_pandas(df)
+            pq.write_table(table, tmp_path)
+            os.replace(tmp_path, destination_path)
+            return
+        except ImportError:
+            pass
+        except Exception:
+            try:
+                if os.path.exists(tmp_path):
+                    os.remove(tmp_path)
+            except Exception:
+                pass
+            raise
+
+        # Fallback: fastparquet via pandas
+        try:
+            df.to_parquet(tmp_path, engine="fastparquet")
+            os.replace(tmp_path, destination_path)
+        finally:
+            try:
+                if os.path.exists(tmp_path):
+                    os.remove(tmp_path)
+            except Exception:
+                pass
+
+    def collect_once(self) -> Dict[str, Any]:
+        """Collect a single snapshot of system metrics, computing deltas if possible."""
+        timestamp = pd.Timestamp.utcnow() if self.use_utc else pd.Timestamp.now()
+        row: Dict[str, Any] = {"timestamp": timestamp}
+        # Aggregate from all collectors
+        for collector in self.collectors:
+            try:
+                data = collector.collect()
+                if not data:
+                    continue
+                row.update(data)
+            except Exception as e:
+                print(f"Collector {collector.__class__.__name__} failed: {e}")
+
+        # If Docker collector present, add its cumulative keys to delta set
+        if self.docker_collector is not None:
+            # Match new naming: docker_<container>_<metric>
+            suffixes = (
+                "_net_rx_bytes",
+                "_net_tx_bytes",
+                "_blkio_read_bytes",
+                "_blkio_write_bytes",
+            )
+            for k in list(row.keys()):
+                if any(k.endswith(sfx) for sfx in suffixes) and k not in self.delta_keys:
+                    self.delta_keys.append(k)
+
+        # Deltas
+        if self.previous_row:
+            deltas = calculate_deltas(row, self.previous_row, self.delta_keys)
+            row.update(deltas)
+        self.previous_row = row.copy()
+        return row
+
+    def write_parquet(self, df: pd.DataFrame) -> None:
+        """Atomically write the current dataframe to the parquet output path.
+
+        Prefers pyarrow; falls back to fastparquet if available. Writes to a temp
+        file and atomically replaces the target so readers never see partial data.
+        """
+        tmp_path = f"{self.output_file}.tmp"
+        # Try pyarrow first
+        try:
+            import pyarrow as pa  # type: ignore
+            import pyarrow.parquet as pq  # type: ignore
+
+            table = pa.Table.from_pandas(df)
+            pq.write_table(table, tmp_path)
+            os.replace(tmp_path, self.output_file)
+            return
+        except ImportError:
+            pass
+        except Exception:
+            # If pyarrow present but write failed, clean up and re-raise to try fallback
+            try:
+                if os.path.exists(tmp_path):
+                    os.remove(tmp_path)
+            except Exception:
+                pass
+            raise
+
+        # Fallback: fastparquet via pandas
+        try:
+            df.to_parquet(tmp_path, engine="fastparquet")
+            os.replace(tmp_path, self.output_file)
+        finally:
+            try:
+                if os.path.exists(tmp_path):
+                    os.remove(tmp_path)
+            except Exception:
+                pass
+
+    def run(self, duration: Optional[float] = None, verbose: bool = True) -> None:
+        """Run monitoring loop. If duration is set (seconds), stop after duration; else run until Ctrl+C."""
+        start_time = time.time()
+        if verbose:
+            print(
+                f"Starting system monitoring. Data will be written to {self.output_file} every "
+                f"{self.write_interval} seconds."
+            )
+            print("Press Ctrl+C to stop monitoring." if duration is None else f"Stopping after {duration} seconds...")
+        try:
+            while not self._stop_event.is_set():
+                row = self.collect_once()
+                self.data_buffer.append(row)
+
+                if verbose:
+                    ts = row["timestamp"]
+                    cpu_cols = [k for k in row.keys() if k.startswith("cpu_") and k.endswith("_utilization")]
+                    cpu_vals = [row[k] for k in cpu_cols]
+                    cpu_avg = sum(cpu_vals) / len(cpu_vals) if cpu_vals else 0.0
+                    mem_pct = (row["sys_used"] / row["sys_total"] * 100.0) if row.get("sys_total") else 0.0
+                    print(
+                        f"\n[{ts}] CPU Avg: {cpu_avg:.1f}% | Memory: {mem_pct:.1f}% | "
+                        f"Open Files: {row.get('total_open_files', 0)}"
+                    )
+                    if "disk_read_bytes_per_sec" in row:
+                        print(
+                            f"Disk I/O: {row['disk_read_bytes_per_sec']/1024**2:.2f} MB/s read, "
+                            f"{row['disk_write_bytes_per_sec']/1024**2:.2f} MB/s write"
+                        )
+                    if "net_bytes_recv_per_sec" in row:
+                        print(
+                            f"Network: {row['net_bytes_recv_per_sec']/1024**2:.2f} MB/s down, "
+                            f"{row['net_bytes_sent_per_sec']/1024**2:.2f} MB/s up"
+                        )
+
+                # Periodic write (overwrite full buffered data) if enabled
+                now = time.time()
+                if (
+                    self.write_interval
+                    and self.write_interval > 0
+                    and (now - self.last_write_time) >= self.write_interval
+                ):
+                    df = pd.DataFrame(self.data_buffer)
+                    try:
+                        self.write_parquet(df)
+                        if verbose:
+                            print(
+                                f"Total accumulated data ({len(self.data_buffer)} rows) "
+                                f"written to {self.output_file} at {row['timestamp']}"
+                            )
+                        self.last_write_time = now
+                    except Exception as e:
+                        print(f"Error writing periodic data: {e}")
+
+                # Stop conditions
+                if duration is not None and (now - start_time) >= duration:
+                    break
+
+                time.sleep(self.sample_interval)
+        except KeyboardInterrupt:
+            if verbose:
+                print("\nStopping monitoring. Writing final data batch...")
+        finally:
+            # Final write if enabled
+            if self.write_final and self.data_buffer:
+                df = pd.DataFrame(self.data_buffer)
+                try:
+                    self.write_parquet(df)
+                    if verbose:
+                        print(f"Final data written to {self.output_file}. Exiting.")
+                except Exception as e:
+                    print(f"Error writing final data: {e}")
+            # Close any collectors that need cleanup
+            try:
+                self._shutdown_gpu()
+            except Exception:
+                pass
+
+    def stop(self) -> None:
+        """Signal the run loop to stop."""
+        self._stop_event.set()
+
+    def reset(self) -> None:
+        """Clear accumulated data and deltas. Does not change output file."""
+        self.data_buffer = []
+        self.previous_row = None
+        self.last_write_time = time.time()
+        # Do not clear stop flag to allow caller to decide lifecycle
+
+    def set_output_file(self, output_file: str) -> None:
+        """Update the output parquet path used by periodic writes."""
+        self.output_file = output_file
+
+    def snapshot(self, output_file: Optional[str] = None) -> str:
+        """Write the current buffered dataframe to the specified parquet path (or self.output_file).
+
+        Returns the path written.
+        """
+        base_path = output_file or self.output_file
+        if not base_path:
+            raise ValueError("No output_file specified for snapshot.")
+        # If destination exists, create a unique suffixed name: file.parquet -> file_0.parquet, ...
+        root, ext = os.path.splitext(base_path)
+        if not ext:
+            ext = ".parquet"
+            root = base_path  # original base without extension
+            base_path = base_path + ext
+
+        path = base_path
+        if os.path.exists(path):
+            idx = 0
+            while True:
+                candidate = f"{root}_{idx}{ext}"
+                if not os.path.exists(candidate):
+                    path = candidate
+                    break
+                idx += 1
+        df = pd.DataFrame(self.data_buffer)
+        if not df.empty:
+            self.write_parquet_to(df, path)
+        else:
+            # Still write an empty table with schema
+            self.write_parquet_to(pd.DataFrame([]), path)
+        return path
+
+
+# -------- Functional API --------
+def collect_system_snapshot(enable_gpu: bool = True, enable_docker: bool = True, docker_client=None) -> Dict[str, Any]:
+    tracer = SystemTracer(
+        sample_interval=0.0,
+        write_interval=0.0,
+        output_file="",
+        enable_gpu=enable_gpu,
+        enable_docker=enable_docker,
+        docker_client=docker_client,
+        use_utc=False,
+    )
+    return tracer.collect_once()
+
+
+def monitor_to_parquet(
+    output_file: str = "system_monitor.parquet",
+    sample_interval: float = 5.0,
+    write_interval: float = 10.0,
+    duration: Optional[float] = None,
+    enable_gpu: bool = True,
+    enable_docker: bool = True,
+    docker_client=None,
+    verbose: bool = True,
+    use_utc: bool = False,
+) -> None:
+    tracer = SystemTracer(
+        sample_interval=sample_interval,
+        write_interval=write_interval,
+        output_file=output_file,
+        enable_gpu=enable_gpu,
+        enable_docker=enable_docker,
+        docker_client=docker_client,
+        use_utc=use_utc,
+    )
+    tracer.run(duration=duration, verbose=verbose)
+
+
+# -------- CLI utility --------
+def main():
+    parser = argparse.ArgumentParser(description="System monitor/tracer CLI")
+    sub = parser.add_subparsers(dest="command")
+
+    # run (default)
+    p_run = sub.add_parser("run", help="Run continuous monitoring and write Parquet")
+    p_run.add_argument("--output", default="system_monitor.parquet", help="Parquet output file path")
+    p_run.add_argument("--sample-interval", type=float, default=5.0, help="Sampling interval seconds")
+    p_run.add_argument("--write-interval", type=float, default=10.0, help="Write interval seconds")
+    p_run.add_argument("--duration", type=float, default=None, help="Optional duration to run (seconds)")
+    p_run.add_argument("--no-gpu", action="store_true", help="Disable GPU collection")
+    p_run.add_argument("--no-docker", action="store_true", help="Disable Docker collection")
+    p_run.add_argument("--quiet", action="store_true", help="Reduce console output")
+    p_run.add_argument("--utc", action="store_true", help="Record timestamps in UTC (default is local time)")
+
+    # snapshot
+    p_snap = sub.add_parser("snapshot", help="Collect a single snapshot and print JSON")
+    p_snap.add_argument("--no-gpu", action="store_true", help="Disable GPU collection")
+    p_snap.add_argument("--no-docker", action="store_true", help="Disable Docker collection")
+    p_snap.add_argument("--utc", action="store_true", help="Use UTC timestamp for the snapshot")
+
+    # proctree (process/thread inspection)
+    p_tree = sub.add_parser("proctree", help="Inspect a process tree and summarize threads")
+    p_tree.add_argument("pid", type=int, help="Root PID to inspect")
+    p_tree.add_argument("--verbose", action="store_true", help="Verbose per-PID output in JSON")
+
+    args = parser.parse_args()
+    if not getattr(args, "command", None):
+        # No subcommand provided; default to 'run' so that subparser defaults are applied
+        args = parser.parse_args(["run"])
+    cmd = args.command
+
+    if cmd == "snapshot":
+        # One-off snapshot; use_utc affects only the timestamp on this row
+        tracer = SystemTracer(
+            sample_interval=0.0,
+            write_interval=0.0,
+            output_file="",
+            enable_gpu=not args.no_gpu,
+            enable_docker=not args.no_docker,
+            docker_client=None,
+            use_utc=bool(getattr(args, "utc", False)),
+        )
+        snap = tracer.collect_once()
+        print(json.dumps(snap, default=str))
+        return
+
+    if cmd == "proctree":
+        summary = get_process_tree_summary(args.pid, verbose=args.verbose)
+        print(json.dumps(summary, default=str))
+        return
+
+    # default: run
+    monitor_to_parquet(
+        output_file=args.output,
+        sample_interval=args.sample_interval,
+        write_interval=args.write_interval,
+        duration=args.duration,
+        enable_gpu=not args.no_gpu,
+        enable_docker=not args.no_docker,
+        verbose=not args.quiet,
+        use_utc=bool(getattr(args, "utc", False)),
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/support/system_monitor/tracer.py b/scripts/support/system_monitor/tracer.py
new file mode 100644
index 000000000..98e9483b9
--- /dev/null
+++ b/scripts/support/system_monitor/tracer.py
@@ -0,0 +1,25 @@
+"""Stable import surface for tracer utilities.
+
+Usage:
+    from system_monitor.tracer import (
+        get_process_tree_summary,
+    )
+"""
+
+# flake8: noqa
+
+if __name__ == "__main__":
+    import sys
+
+    # Delegate to the full CLI in system_tracer
+    from .system_tracer import main as tracer_main
+
+    sys.exit(tracer_main())
+
+"""Lightweight wrapper to expose tracer CLI under a short module path."""
+
+from .system_tracer import (
+    get_process_tree_summary,
+)
+
+__all__ = ["get_process_tree_summary"]
diff --git a/scripts/support/trace_summarizer.py b/scripts/support/trace_summarizer.py
new file mode 100644
index 000000000..22b588ffb
--- /dev/null
+++ b/scripts/support/trace_summarizer.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+"""
+Trace Summarizer
+
+Parses NV-Ingest trace maps (trace::entry::<name>, trace::exit::<name>) and computes
+total time spent in each area. Aggregates across numbered suffixes like _0, _1.
+
+Usage:
+  - From file(s):
+      python scripts/support/trace_summarizer.py traces.json [more.json]
+  - From stdin:
+      cat traces.json | python scripts/support/trace_summarizer.py -
+
+Options:
+  --units [ns|ms|s]     Output units (default: ms)
+  --json                Output JSON summary
+  --top N               Show top N entries only
+"""
+
+import sys
+import json
+import re
+import argparse
+import os
+import glob
+import math
+from collections import defaultdict
+from typing import Dict, Any, List, Tuple
+
+
+ENTRY_PREFIX = "trace::entry::"
+EXIT_PREFIX = "trace::exit::"
+SUFFIX_NUM_RE = re.compile(r"(.*)_(\d+)$")
+
+
+def _normalize_name(name: str) -> str:
+    """
+    Normalize a trace name by stripping trailing _<digits> to aggregate repeated items.
+    Example: "pdf_extractor::pdf_extraction::pdfium_pages_to_numpy_0" -> "...::pdfium_pages_to_numpy"
+    """
+    m = SUFFIX_NUM_RE.match(name)
+    return m.group(1) if m else name
+
+
+def _load_trace_map(path: str) -> Dict[str, Any]:
+    if path == "-":
+        data = sys.stdin.read()
+        return json.loads(data)
+    with open(path, "r") as f:
+        return json.load(f)
+
+
+def _iter_entries(trace_map: Dict[str, Any]) -> List[Tuple[str, int]]:
+    """
+    Return list of (name, entry_ts) for all entry keys.
+    """
+    out = []
+    for k, v in trace_map.items():
+        if isinstance(k, str) and k.startswith(ENTRY_PREFIX):
+            name = k[len(ENTRY_PREFIX) :]
+            try:
+                ts = int(v)
+            except Exception:
+                # Attempt float -> int if provided as float
+                ts = int(float(v))
+            out.append((name, ts))
+    return out
+
+
+def _get_exit_ts(trace_map: Dict[str, Any], name: str) -> int | None:
+    k = EXIT_PREFIX + name
+    if k not in trace_map:
+        return None
+    v = trace_map[k]
+    try:
+        return int(v)
+    except Exception:
+        return int(float(v))
+
+
+def _convert_units(ns: int, units: str) -> float:
+    if units == "ns":
+        return float(ns)
+    if units == "ms":
+        return ns / 1e6
+    if units == "s":
+        return ns / 1e9
+    raise ValueError("Unsupported units: " + units)
+
+
+def summarize_durations(trace_maps: List[Dict[str, Any]], normalize_suffixes: bool = True) -> Dict[str, List[int]]:
+    """
+    Compute list of durations (in ns) per normalized area name across all maps.
+    """
+    durations: Dict[str, List[int]] = defaultdict(list)
+    for trace_map in trace_maps:
+        entries = _iter_entries(trace_map)
+        for name, ts_entry in entries:
+            ts_exit = _get_exit_ts(trace_map, name)
+            if ts_exit is None:
+                print(f"[warn] missing exit for '{name}'", file=sys.stderr)
+                continue
+            if ts_exit < ts_entry:
+                print(f"[warn] exit < entry for '{name}'", file=sys.stderr)
+                continue
+            norm = _normalize_name(name) if normalize_suffixes else name
+            durations[norm].append(ts_exit - ts_entry)
+    return dict(durations)
+
+
+def summarize(trace_maps: List[Dict[str, Any]], normalize_suffixes: bool = True) -> Dict[str, int]:
+    """
+    Compute total durations in nanoseconds per normalized area name across all maps.
+    """
+    totals_ns: Dict[str, int] = defaultdict(int)
+    durations = summarize_durations(trace_maps, normalize_suffixes=normalize_suffixes)
+    for k, vals in durations.items():
+        totals_ns[k] = sum(vals)
+    return dict(totals_ns)
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Summarize NV-Ingest trace timings.")
+    ap.add_argument("inputs", nargs="+", help="JSON files or '-' for stdin")
+    ap.add_argument("--units", choices=["ns", "ms", "s"], default="ms", help="Output units (default: ms)")
+    ap.add_argument("--json", action="store_true", help="Output JSON instead of table")
+    ap.add_argument("--tree", action="store_true", help="Output hierarchical, indented tree view")
+    ap.add_argument(
+        "--no-aggregate-suffixes",
+        action="store_true",
+        help="Do not strip trailing numeric suffixes; keep entries like *_0, *_1 separate",
+    )
+    ap.add_argument(
+        "--cumulative",
+        action="store_true",
+        help="Tree mode: show parent totals including the sum of all descendants",
+    )
+    ap.add_argument("--top", type=int, default=0, help="Show top N entries")
+    ap.add_argument(
+        "--threshold",
+        type=float,
+        default=0.0,
+        help="Minimum fraction (0..1) of total aggregate time a stage must account for to be shown",
+    )
+    ap.add_argument(
+        "--exclude-channel-in",
+        action="store_true",
+        help="Exclude entries whose names contain 'channel_in' or 'network_in' from listings",
+    )
+    args = ap.parse_args()
+
+    trace_maps = []
+    # Aggregate primitive counts across inputs if present
+    primitive_total = 0
+    primitive_by_type = defaultdict(int)
+    structured_by_subtype = defaultdict(int)
+    # Expand inputs: allow directories and glob patterns
+    expanded_inputs: List[str] = []
+    for spec in args.inputs:
+        if spec == "-":
+            expanded_inputs.append(spec)
+            continue
+        if os.path.isdir(spec):
+            # Aggregate all *.traces.json files in the directory (non-recursive)
+            expanded_inputs.extend(sorted(glob.glob(os.path.join(spec, "*.traces.json"))))
+            continue
+        # Glob pattern (supports recursive with **)
+        matches = glob.glob(spec, recursive=True)
+        if matches:
+            expanded_inputs.extend(sorted(matches))
+        else:
+            # Fallback: treat as a single file path
+            expanded_inputs.append(spec)
+
+    for p in expanded_inputs:
+        try:
+            m = _load_trace_map(p)
+            trace_maps.append(m)
+            pc = m.get("primitive_counts")
+            if isinstance(pc, dict):
+                try:
+                    primitive_total += int(pc.get("total", 0))
+                except Exception:
+                    pass
+                by_type = pc.get("by_type") or {}
+                if isinstance(by_type, dict):
+                    for k, v in by_type.items():
+                        try:
+                            primitive_by_type[k] += int(v)
+                        except Exception:
+                            continue
+                by_sub = pc.get("structured_by_subtype") or {}
+                if isinstance(by_sub, dict):
+                    for k, v in by_sub.items():
+                        try:
+                            structured_by_subtype[k] += int(v)
+                        except Exception:
+                            continue
+        except Exception as e:
+            print(f"[error] failed to load {p}: {e}", file=sys.stderr)
+            return 2
+
+    normalize_suffixes = not args.no_aggregate_suffixes
+    durations_by_name = summarize_durations(trace_maps, normalize_suffixes=normalize_suffixes)
+
+    # Optionally exclude channel_in/network_in entries
+    if args.exclude_channel_in:
+        durations_by_name = {
+            k: v for k, v in durations_by_name.items() if ("channel_in" not in k and "network_in" not in k)
+        }
+
+    # Compute stats per name
+    def _percentile(values: List[int], p: float) -> float:
+        if not values:
+            return 0.0
+        vs = sorted(values)
+        n = len(vs)
+        # Nearest-rank method
+        rank = max(1, int(math.ceil(p * n)))
+        return float(vs[rank - 1])
+
+    stats_map: Dict[str, Dict[str, float]] = {}
+    for name, vals in durations_by_name.items():
+        total = float(sum(vals))
+        count = len(vals)
+        mean = (total / count) if count else 0.0
+        p95 = _percentile(vals, 0.95)
+        p99 = _percentile(vals, 0.99)
+        stats_map[name] = {
+            "total_ns": total,
+            "count": count,
+            "mean_ns": mean,
+            "p95_ns": p95,
+            "p99_ns": p99,
+        }
+
+    # Apply threshold filtering as a fraction of total aggregate time
+    total_ns_all = sum(meta["total_ns"] for meta in stats_map.values())
+    if args.threshold and total_ns_all > 0:
+        stats_map = {
+            name: meta for name, meta in stats_map.items() if (meta["total_ns"] / total_ns_all) >= args.threshold
+        }
+
+    # Sort by total time desc
+    items = sorted(stats_map.items(), key=lambda kv: kv[1]["total_ns"], reverse=True)
+
+    if args.json:
+        out = {
+            name: {
+                "total": _convert_units(int(meta["total_ns"]), args.units),
+                "count": int(meta["count"]),
+                "mean": _convert_units(int(meta["mean_ns"]), args.units),
+                "p95": _convert_units(int(meta["p95_ns"]), args.units),
+                "p99": _convert_units(int(meta["p99_ns"]), args.units),
+            }
+            for name, meta in items
+        }
+        print(json.dumps(out, indent=2))
+        return 0
+
+    if args.tree:
+        # Print primitive distribution before timing tree (non-JSON)
+        print("primitive distribution")
+        print("-" * 98)
+        print(f"{'total':<80} {primitive_total:>16}")
+        if primitive_by_type:
+            for k, v in sorted(primitive_by_type.items(), key=lambda kv: kv[1], reverse=True):
+                print(f"{k:<80} {v:>16}")
+        if structured_by_subtype:
+            print()
+            print("structured by subtype")
+            print("-" * 98)
+            for k, v in sorted(structured_by_subtype.items(), key=lambda kv: kv[1], reverse=True):
+                print(f"{k:<80} {v:>16}")
+        print()
+
+        # Human-readable hierarchical tree
+        def build_tree(pairs: List[Tuple[str, int]]):
+            tree = {}
+            for full_name, total_ns in pairs:
+                parts = full_name.split("::") if full_name else [full_name]
+                cur = tree
+                for i, part in enumerate(parts):
+                    if part not in cur:
+                        cur[part] = {"__total_ns__": 0, "__children__": {}}
+                    if i == len(parts) - 1:
+                        cur[part]["__total_ns__"] += total_ns
+                    cur = cur[part]["__children__"]
+            return tree
+
+        def flatten_tree(node: dict, level: int = 0):
+            rows = []
+            children = [(k, v) for k, v in node.items() if not k.startswith("__")]
+            children.sort(key=lambda kv: kv[1]["__total_ns__"], reverse=True)
+            for name, meta in children:
+                rows.append((level, name, meta["__total_ns__"]))
+                rows.extend(flatten_tree(meta["__children__"], level + 1))
+            return rows
+
+        # Apply --top to root-level only (tree mode)
+        root_totals = defaultdict(int)
+        for name, meta in items:
+            total_ns = int(meta["total_ns"])
+            root = name.split("::")[0]
+            root_totals[root] += total_ns
+        sorted_roots = sorted(root_totals.items(), key=lambda kv: kv[1], reverse=True)
+        if args.top and args.top > 0:
+            keep_roots = set([r for r, _ in sorted_roots[: args.top]])
+            filtered_items = [
+                (name, int(meta["total_ns"])) for name, meta in items if name.split("::")[0] in keep_roots
+            ]
+        else:
+            filtered_items = [(name, int(meta["total_ns"])) for name, meta in items]
+
+        tree = build_tree(filtered_items)
+        rows = flatten_tree(tree)
+        # Print as table with indentation for the area column
+        col1 = "area"
+        col2 = f"total ({args.units})"
+        col3 = "count"
+        col4 = f"mean ({args.units})"
+        col5 = f"p95 ({args.units})"
+        col6 = f"p99 ({args.units})"
+        print(f"{col1:<60} {col2:>12} {col3:>8} {col4:>12} {col5:>12} {col6:>12}")
+        print("-" * 120)
+        for level, name, total_ns in rows:
+            total_val = _convert_units(total_ns, args.units)
+            display = f"{'  ' * level}{name}"
+            meta = stats_map.get(name)
+            if meta:
+                count = int(meta["count"])
+                mean_val = _convert_units(int(meta["mean_ns"]), args.units)
+                p95_val = _convert_units(int(meta["p95_ns"]), args.units)
+                p99_val = _convert_units(int(meta["p99_ns"]), args.units)
+                print(
+                    f"{display:<60} {total_val:>12.3f} {count:>8} {mean_val:>12.3f} {p95_val:>12.3f} {p99_val:>12.3f}"
+                )
+            else:
+                print(f"{display:<60} {total_val:>12.3f} {'-':>8} {'-':>12} {'-':>12} {'-':>12}")
+    else:
+        # Print primitive distribution before flat table (non-JSON)
+        print("primitive distribution")
+        print("-" * 98)
+        print(f"{'total':<80} {primitive_total:>16}")
+        if primitive_by_type:
+            for k, v in sorted(primitive_by_type.items(), key=lambda kv: kv[1], reverse=True):
+                print(f"{k:<80} {v:>16}")
+        if structured_by_subtype:
+            print()
+            print("structured by subtype")
+            print("-" * 98)
+            for k, v in sorted(structured_by_subtype.items(), key=lambda kv: kv[1], reverse=True):
+                print(f"{k:<80} {v:>16}")
+        print()
+        # Flat table output with stats
+        flat_items = items[: args.top] if (args.top and args.top > 0) else items
+        col1 = "area"
+        col2 = f"total ({args.units})"
+        col3 = "count"
+        col4 = f"mean ({args.units})"
+        col5 = f"p95 ({args.units})"
+        col6 = f"p99 ({args.units})"
+        print(f"{col1:<60} {col2:>12} {col3:>8} {col4:>12} {col5:>12} {col6:>12}")
+        print("-" * 120)
+        for name, meta in flat_items:
+            total_val = _convert_units(int(meta["total_ns"]), args.units)
+            count = int(meta["count"])
+            mean_val = _convert_units(int(meta["mean_ns"]), args.units)
+            p95_val = _convert_units(int(meta["p95_ns"]), args.units)
+            p99_val = _convert_units(int(meta["p99_ns"]), args.units)
+            print(f"{name:<60} {total_val:>12.3f} {count:>8} {mean_val:>12.3f} {p95_val:>12.3f} {p99_val:>12.3f}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())