NVIDIA · jioffe502 · Nov 19, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 21, 2025
@@ -0,0 +1 @@
+"""Project-level scripts package."""
@@ -198,6 +198,72 @@ EXTRACT_IMAGES=true API_VERSION=v2 python run.py --case=e2e --dataset=bo767
 - `artifacts_dir` (string): Test output directory (auto-generated if null)
 - `collection_name` (string): Milvus collection name (auto-generated as `{test_name}_multimodal` if null, deterministic - no timestamp)
 
+#### Trace Capture Options
+- `enable_traces` (boolean): When `true`, the `e2e` case requests parent-level trace payloads from the V2 API (see `docs/docs/extraction/v2-api-guide.md`) and emits per-stage summaries.
+- `trace_output_dir` (string or null): Optional override for where raw trace JSON files are written. Defaults to `<artifact_dir>/traces` when null.
+
+When traces are enabled:
+
+1. Each processed document gets a raw payload written to `trace_output_dir/<index>_<source>.json` that contains the complete `"trace::"` dictionary plus a small stage summary. These files match the structure shown in `scripts/private_local/trace.json`.
+2. `results.json` automatically gains a `trace_summary` block every time `run.py` executes a traced case—no manual helper calls needed.
+3. The per-document section now includes the full timing context needed for wait-time analysis:
+   - `submission_ts_s`: FastAPI submission timestamp (wall-clock seconds)
+   - `ray_start_ts_s` / `ray_end_ts_s`: first stage entry and last stage exit inside Ray
+   - `ray_wait_s`: latency between submission and Ray start (pre-Ray wait)
+   - `in_ray_queue_s`: cumulative resident seconds spent in `*_channel_in` actors (Ray queue wait)
+   - `total_wall_s`: elapsed Ray processing window (`ray_end - ray_start`)
+   - `total_resident_s`: summed resident seconds across all compute stages
+
+Example:
+
+```json
+"trace_summary": {
+  "documents": 20,
+  "output_dir": "/raid/.../scripts/tests/artifacts/bo20_20251119_183733_UTC/traces",
+  "stage_totals": {
+    "pdf_extractor": {
+      "documents": 20,
+      "total_resident_s": 32.18,
+      "avg_resident_s": 1.61,
+      "max_resident_s": 2.04,
+      "min_resident_s": 1.42
+    }
+  },
+  "document_totals": [
+    {
+      "document_index": 0,
+      "source_id": "doc001.pdf",
+      "submission_ts_s": 1732194383.812345,
+      "total_resident_s": 1.58,
+      "ray_start_ts_s": 1732194384.123456,
+      "ray_end_ts_s": 1732194399.901234,
+      "total_wall_s": 15.78,
+      "ray_wait_s": 0.311111,
+      "in_ray_queue_s": 0.042381
+    },
+    {
+      "document_index": 1,
+      "source_id": "doc002.pdf",
+      "submission_ts_s": 1732194384.034567,
+      "total_resident_s": 1.64,
+      "ray_start_ts_s": 1732194385.012345,
+      "ray_end_ts_s": 1732194400.456789,
+      "total_wall_s": 15.44,
+      "ray_wait_s": 0.977778,
+      "in_ray_queue_s": 0.063255
+    }
+    // ...
+  ]
+}
+```
+
+**Enable via YAML or environment variables:**
+
+- YAML: set `enable_traces: true` (and optionally `trace_output_dir`) in the `active` section.
+- Environment: `ENABLE_TRACES=true TRACE_OUTPUT_DIR=/raid/jioffe/traces python run.py --case=e2e --dataset=bo20`
+
+> ℹ️ Tracing is most valuable with V2 + PDF splitting enabled. Follow the guidance in `docs/docs/extraction/v2-api-guide.md` to ensure `api_version=v2` and `pdf_split_page_count` are configured.
+
 ### Valid Configuration Values
 
 **text_depth**: `block`, `body`, `document`, `header`, `line`, `nearby_block`, `other`, `page`, `span`
@@ -740,6 +806,8 @@ All test cases receive a validated `TestConfig` object with typed fields, elimin
    CASES = ["e2e", "e2e_with_llm_summary", "your_new_case"]
    ```
 
+> ℹ️ Import hygiene: `run.py` now prepends the `cases/`, `tests/`, `scripts/`, and repo-root directories to `sys.path` before loading a case, so new modules can directly import shared helpers (`from scripts.interact import ...`, `from scripts.tests.utils import ...`) without mutating `sys.path` locally.
+
 ### Extending Configuration
 
 To add new configurable parameters:
@@ -820,6 +888,71 @@ EXTRACT_IMAGES=true python run.py --case=e2e --dataset=bo767
 
 - **Configuration**: See `config.py` for complete field list and validation logic
 - **Test utilities**: See `interact.py` for shared helper functions  
+## Profiling & Trace Capture Workflow
+
+### Baseline vs RC Trace Runs
+
+1. **Prep environment**
+   ```bash
+   source ~/setup_env.sh
+   nv-restart             # baseline (local build)
+   # or
+   nv-stop && nv-start-release   # RC image
+   ```
+2. **Enable traces + V2 split settings (env vars or YAML)**
+   ```bash
+   export API_VERSION=v2
+   export PDF_SPLIT_PAGE_COUNT=32          # use 16/64/etc. as needed
+   export ENABLE_TRACES=true
+   # Optional: export TRACE_OUTPUT_DIR=/raid/jioffe/traces/baseline
+   ```
+3. **Run datasets (repeat for bo20 + bo767, baseline + RC)**
+   ```bash
+   python scripts/tests/run.py --case=e2e --dataset=bo20
+   python scripts/tests/run.py --case=e2e --dataset=bo767
+   ```
+4. **Collect artifacts**
+   - `stdout.txt` → console logs
+   - `results.json` → includes `trace_summary` with per-stage resident seconds
+   - `traces/*.json` → raw payloads identical to `scripts/private_local/trace.json`
+   - Use descriptive folder names (e.g., copy artifacts to `artifacts/baseline_bo20/`) to keep baseline vs RC side-by-side.
+5. **Visualize stage + wall/wait timings**
+   ```bash
+   # Positional argument = results.json path; emits stage + wall PNGs
+   python scripts/tests/tools/plot_stage_totals.py \
+     scripts/tests/artifacts/<run>/results.json \
+     --doc-top-n 25 \
+     --doc-sort wait \
+     --sort total \
+     --keep-nested \
+     --exclude-network
+   ```
+   - `--keep-nested` preserves nested entries such as chart/table OCR workloads
+   - `--exclude-network` hides broker/Redis wait time so the chart focuses on Ray stages
+   - `--doc-top-n` controls how many documents appear on the wall-time plot (set `0` for all)
+   - `--doc-sort wait` sorts documents by Ray wait time instead of wall time (useful for large jobs)
+   - `--skip-wall-plot` emits only the resident-time chart if you want the legacy behavior
+   - Wall-time output now includes:
+     - Pre-Ray wait (submission → Ray start)
+     - In-Ray queue totals (sum of `*_channel_in` resident time)
+     - Execution window (Ray start/stop timestamps + wall vs resident bars)
+   - Percentile summaries for wait and queue time are printed in the CLI output for the entire dataset.
+
+### Future Trace Visualization Case (roadmap)
+
+With `trace_summary` + raw dumps in place, a follow-on `cases/e2e_profile.py` can:
+- Accept one or two artifact directories (baseline vs RC) as input.
+- Load each run’s `trace_summary` and optional raw trace JSON to compute deltas.
+- Emit per-stage bar/violin charts (resident vs wall clock) plus CSV summaries.
+- Store outputs under `trace_profile/` inside each artifact directory.
+
+Open questions before implementing:
+- Preferred plotting backend (`matplotlib`, `plotly`, or Altair) and whether CI should emit PNG, HTML, or both.
+- Artifact naming for comparisons (e.g., `trace_profile/baseline_vs_rc/bo20.png`).
+- Regression thresholds and alerting expectations (what % delta constitutes a failure?).
+- Should the visualization case automatically diff against the most recent baseline or just emit standalone assets?
+
+Capturing these answers (plus a few representative trace samples) will let a future sprint land the visualization workflow quickly.
 - **Docker setup**: See project root README for service management commands
 - **API documentation**: See `docs/` for API version differences
 

@@ -0,0 +1 @@
+"""scripts.tests package initialization."""
@@ -2,16 +2,20 @@
 import logging
 import os
 import shutil
-import sys
 import time
 
 from nv_ingest_client.client import Ingestor
 from nv_ingest_client.util.document_analysis import analyze_document_chunks
 from nv_ingest_client.util.milvus import nvingest_retrieval
 
-# Import from interact module (now properly structured)
-sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
-from interact import embed_info, kv_event_log, milvus_chunks, segment_results, pdf_page_count
+from scripts.interact import (
+    embed_info,
+    kv_event_log,
+    milvus_chunks,
+    pdf_page_count,
+    segment_results,
+)
+from scripts.tests.utils.trace_summary import summarize_traces
 
 # Future: Will integrate with modular ingest_documents.py when VDB upload is separated
 
@@ -77,6 +81,18 @@ def main(config=None, log_path: str = "test_results") -> int:
     enable_caption = config.enable_caption
     enable_split = config.enable_split
 
+    # Trace capture
+    enable_traces = getattr(config, "enable_traces", False)
+    trace_output_dir = getattr(config, "trace_output_dir", None)
+    trace_dir = None
+    if enable_traces:
+        trace_dir = (
+            os.path.abspath(os.path.expanduser(trace_output_dir))
+            if trace_output_dir
+            else os.path.join(log_path, "traces")
+        )
+        print(f"Trace capture enabled. Raw traces will be written to: {trace_dir}")
+
     # Text splitting configuration
     split_chunk_size = config.split_chunk_size
     split_chunk_overlap = config.split_chunk_overlap
@@ -179,7 +195,20 @@ def main(config=None, log_path: str = "test_results") -> int:
         .save_to_disk(output_directory=spill_dir)
     )
 
-    results, failures = ingestor.ingest(show_progress=True, return_failures=True, save_to_disk=True)
+    ingest_kwargs = {
+        "show_progress": True,
+        "return_failures": True,
+        "save_to_disk": True,
+    }
+    if enable_traces:
+        ingest_kwargs["return_traces"] = True
+
+    ingest_result = ingestor.ingest(**ingest_kwargs)
+    if enable_traces:
+        results, failures, traces_list = ingest_result
+    else:
+        results, failures = ingest_result
+        traces_list = []
     ingestion_time = time.time() - ingestion_start
     kv_event_log("result_count", len(results), log_path)
     kv_event_log("failure_count", len(failures), log_path)
@@ -236,6 +265,12 @@ def main(config=None, log_path: str = "test_results") -> int:
     retrieval_time = time.time() - querying_start
     kv_event_log("retrieval_time_s", retrieval_time, log_path)
 
+    trace_summary = None
+    if enable_traces:
+        trace_summary = summarize_traces(traces_list, results, trace_dir)
+        if trace_summary:
+            kv_event_log("trace_documents", trace_summary["documents"], log_path)
+
     # Summarize - Build comprehensive results dict
     dataset_name = os.path.basename(data_dir.rstrip("/")) if data_dir else "unknown"
     test_name = config.test_name or dataset_name
@@ -280,6 +315,9 @@ def main(config=None, log_path: str = "test_results") -> int:
         test_results["test_config"]["split_chunk_size"] = split_chunk_size
         test_results["test_config"]["split_chunk_overlap"] = split_chunk_overlap
 
+    if trace_summary:
+        test_results["trace_summary"] = trace_summary
+
     print(f"\n{test_name}_e2e summary:")
     print(json.dumps(test_results, indent=2))
 

@@ -56,6 +56,10 @@ class TestConfig:
     split_chunk_size: int = 1024
     split_chunk_overlap: int = 150
 
+    # Trace configuration
+    enable_traces: bool = False
+    trace_output_dir: Optional[str] = None
+
     # Storage configuration
     spill_dir: str = "/tmp/spill"
     artifacts_dir: Optional[str] = None
@@ -265,6 +269,8 @@ def parse_list(value: str) -> List[str]:
         "ENABLE_SPLIT": ("enable_split", parse_bool),
         "SPLIT_CHUNK_SIZE": ("split_chunk_size", parse_int),
         "SPLIT_CHUNK_OVERLAP": ("split_chunk_overlap", parse_int),
+        "ENABLE_TRACES": ("enable_traces", parse_bool),
+        "TRACE_OUTPUT_DIR": ("trace_output_dir", str),
         "SPILL_DIR": ("spill_dir", str),
         "ARTIFACTS_DIR": ("artifacts_dir", str),
         "COLLECTION_NAME": ("collection_name", str),

@@ -179,6 +179,8 @@ def run_datasets(
             "infrastructure": "managed" if managed else "attach",
             "api_version": config.api_version,
             "pdf_split_page_count": config.pdf_split_page_count,
+            "enable_traces": getattr(config, "enable_traces", False),
+            "trace_output_dir": getattr(config, "trace_output_dir", None),
             "return_code": rc,
         }
 
@@ -269,8 +271,13 @@ def close(self):
 
         # Add cases directory to sys.path so modules can import from utils
         cases_dir = os.path.dirname(case_path)
-        if cases_dir not in sys.path:
-            sys.path.insert(0, cases_dir)
+        tests_dir = os.path.dirname(cases_dir)
+        scripts_dir = os.path.dirname(tests_dir)
+        repo_root = os.path.dirname(scripts_dir)
+
+        for path in (cases_dir, tests_dir, scripts_dir, repo_root):
+            if path and path not in sys.path:
+                sys.path.insert(0, path)
 
         # Load and execute the test case module
         spec = importlib.util.spec_from_file_location(case_name, case_path)

@@ -38,6 +38,10 @@ active:
   split_chunk_size: 1024  # Chunk size for text splitting
   split_chunk_overlap: 150  # Overlap for text splitting
 
+  # Trace configuration
+  enable_traces: false
+  trace_output_dir: null  # Null => artifacts/<run>/traces
+
   # Storage configuration
   spill_dir: /tmp/spill
   artifacts_dir: null  # null = use default (scripts/tests/artifacts)