From 3d7d408a5a725f9844560822797f92ae33cf3638 Mon Sep 17 00:00:00 2001
From: Sai Krishnan Chandrasekar <saikrishnanc@nvidia.com>
Date: Thu, 13 Nov 2025 09:59:16 -0800
Subject: [PATCH 1/6] Add Zarr reader for Crash

---
 .../crash/conf/config.yaml                    |   2 +-
 .../crash/conf/reader/zarr.yaml               |  18 +
 .../crash/tests/test_zarr_reader.py           | 340 ++++++++++++++++++
 .../structural_mechanics/crash/zarr_reader.py | 226 ++++++++++++
 4 files changed, 585 insertions(+), 1 deletion(-)
 create mode 100644 examples/structural_mechanics/crash/conf/reader/zarr.yaml
 create mode 100644 examples/structural_mechanics/crash/tests/test_zarr_reader.py
 create mode 100644 examples/structural_mechanics/crash/zarr_reader.py

diff --git a/examples/structural_mechanics/crash/conf/config.yaml b/examples/structural_mechanics/crash/conf/config.yaml
index 3a3e288d66..7bba46e8c4 100644
--- a/examples/structural_mechanics/crash/conf/config.yaml
+++ b/examples/structural_mechanics/crash/conf/config.yaml
@@ -25,7 +25,7 @@ experiment_desc: "unified training recipe for crash models"
 run_desc: "unified training recipe for crash models"
 
 defaults:
-  - reader: vtp #d3plot
+  - reader: vtp # Options are: vtp, d3plot, zarr
   - datapipe: point_cloud   # will be overridden by model configs
   - model: transolver_autoregressive_rollout_training
   - training: default
diff --git a/examples/structural_mechanics/crash/conf/reader/zarr.yaml b/examples/structural_mechanics/crash/conf/reader/zarr.yaml
new file mode 100644
index 0000000000..733730067f
--- /dev/null
+++ b/examples/structural_mechanics/crash/conf/reader/zarr.yaml
@@ -0,0 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_target_: zarr_reader.Reader
+_convert_: all
\ No newline at end of file
diff --git a/examples/structural_mechanics/crash/tests/test_zarr_reader.py b/examples/structural_mechanics/crash/tests/test_zarr_reader.py
new file mode 100644
index 0000000000..d79f173e77
--- /dev/null
+++ b/examples/structural_mechanics/crash/tests/test_zarr_reader.py
@@ -0,0 +1,340 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
+import zarr
+
+# Import functions from zarr_reader
+import sys
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import zarr_reader
+
+
+def create_mock_zarr_store(
+    store_path: Path,
+    num_timesteps: int = 3,
+    num_nodes: int = 4,
+    thickness_value: float = 1.0,
+):
+    """
+    Helper function to create a mock Zarr store with crash simulation data.
+
+    Args:
+        store_path: Path where the Zarr store should be created
+        num_timesteps: Number of timesteps
+        num_nodes: Number of nodes
+        thickness_value: Constant thickness value for all nodes
+
+    Returns:
+        Tuple of (mesh_pos, node_thickness, edges) arrays that were written
+    """
+    store_path.mkdir(exist_ok=True)
+
+    # Create mock data
+    mesh_pos = np.random.randn(num_timesteps, num_nodes, 3).astype(np.float32)
+    node_thickness = np.ones(num_nodes, dtype=np.float32) * thickness_value
+    edges = np.array([[0, 1], [1, 2], [2, 3], [3, 0]], dtype=np.int64)
+
+    # Write to Zarr store
+    store = zarr.open(str(store_path), mode="w")
+    store.create_dataset("mesh_pos", data=mesh_pos, dtype=np.float32)
+    store.create_dataset("thickness", data=node_thickness, dtype=np.float32)
+    store.create_dataset("edges", data=edges, dtype=np.int64)
+
+    return mesh_pos, node_thickness, edges
+
+
+@pytest.fixture
+def mock_zarr_store():
+    """Create a temporary Zarr store with mock crash simulation data."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        store_path = Path(temp_dir) / "Run001.zarr"
+        mesh_pos, node_thickness, edges = create_mock_zarr_store(
+            store_path, thickness_value=2.0
+        )
+        yield temp_dir, mesh_pos, node_thickness, edges
+
+
+@pytest.fixture
+def mock_zarr_directory():
+    """Create a directory with multiple Zarr stores."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+
+        # Create multiple zarr stores
+        for i in range(3):
+            store_path = temp_path / f"Run{i:03d}.zarr"
+            create_mock_zarr_store(store_path)
+
+        # Create a non-zarr directory (should be ignored)
+        (temp_path / "NotAZarr").mkdir()
+
+        # Create a regular file (should be ignored)
+        (temp_path / "some_file.txt").touch()
+
+        yield temp_dir
+
+
+def test_find_zarr_stores(mock_zarr_directory):
+    """Test that find_zarr_stores correctly identifies Zarr directories."""
+    zarr_stores = zarr_reader.find_zarr_stores(mock_zarr_directory)
+
+    assert len(zarr_stores) == 3, f"Expected 3 zarr stores, got {len(zarr_stores)}"
+    assert all(path.endswith(".zarr") for path in zarr_stores)
+    assert all("Run" in Path(path).name for path in zarr_stores)
+
+
+def test_find_zarr_stores_empty_directory():
+    """Test find_zarr_stores with empty directory."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        zarr_stores = zarr_reader.find_zarr_stores(temp_dir)
+        assert len(zarr_stores) == 0, (
+            "Should return empty list for directory with no zarr stores"
+        )
+
+
+def test_find_zarr_stores_nonexistent_directory():
+    """Test find_zarr_stores with nonexistent directory."""
+    zarr_stores = zarr_reader.find_zarr_stores("/nonexistent/path")
+    assert len(zarr_stores) == 0, "Should return empty list for nonexistent directory"
+
+
+def test_load_zarr_store(mock_zarr_store):
+    """Test loading data from a Zarr store."""
+    temp_dir, expected_mesh_pos, expected_thickness, expected_edges = mock_zarr_store
+    store_path = Path(temp_dir) / "Run001.zarr"
+
+    mesh_pos, edges, point_data_dict = zarr_reader.load_zarr_store(str(store_path))
+
+    # Check shapes
+    assert mesh_pos.shape == expected_mesh_pos.shape
+    assert edges.shape == expected_edges.shape
+    assert "thickness" in point_data_dict, "Should have thickness in point_data"
+    assert point_data_dict["thickness"].shape == expected_thickness.shape
+
+    # Check data types
+    assert mesh_pos.dtype == np.float64, "mesh_pos should be float64"
+    assert point_data_dict["thickness"].dtype == np.float32, (
+        "thickness should be float32"
+    )
+    assert edges.dtype == np.int64, "edges should be int64"
+
+    # Check values
+    np.testing.assert_array_almost_equal(mesh_pos, expected_mesh_pos)
+    np.testing.assert_array_almost_equal(
+        point_data_dict["thickness"], expected_thickness
+    )
+    np.testing.assert_array_equal(edges, expected_edges)
+
+
+def test_load_zarr_store_missing_fields():
+    """Test that loading a Zarr store with missing required fields raises KeyError."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        store_path = Path(temp_dir) / "incomplete.zarr"
+        store_path.mkdir()
+
+        # Create store with only thickness (missing mesh_pos and edges)
+        store = zarr.open(str(store_path), mode="w")
+        store.create_dataset("thickness", data=np.ones(4, dtype=np.float32))
+
+        # Should raise KeyError for missing mesh_pos
+        with pytest.raises(KeyError, match="mesh_pos"):
+            zarr_reader.load_zarr_store(str(store_path))
+
+        # Test missing edges
+        store_path2 = Path(temp_dir) / "incomplete2.zarr"
+        store_path2.mkdir()
+        store2 = zarr.open(str(store_path2), mode="w")
+        store2.create_dataset(
+            "mesh_pos", data=np.random.randn(3, 4, 3).astype(np.float32)
+        )
+
+        # Should raise KeyError for missing edges
+        with pytest.raises(KeyError, match="edges"):
+            zarr_reader.load_zarr_store(str(store_path2))
+
+
+def test_load_zarr_store_multiple_point_data_fields():
+    """Test that load_zarr_store dynamically reads all point data fields."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        store_path = Path(temp_dir) / "multi_fields.zarr"
+        store_path.mkdir()
+
+        # Create store with multiple point data fields
+        num_nodes = 10
+        store = zarr.open(str(store_path), mode="w")
+        store.create_dataset(
+            "mesh_pos", data=np.random.randn(3, num_nodes, 3).astype(np.float32)
+        )
+        store.create_dataset("edges", data=np.array([[0, 1]], dtype=np.int64))
+        # Add multiple point data fields
+        store.create_dataset("thickness", data=np.ones(num_nodes, dtype=np.float32))
+        store.create_dataset(
+            "stress", data=np.random.randn(num_nodes).astype(np.float32)
+        )
+        store.create_dataset(
+            "temperature", data=np.random.randn(num_nodes).astype(np.float32)
+        )
+        # This should be skipped (mesh connectivity, not point data)
+        store.create_dataset(
+            "mesh_connectivity_flat", data=np.array([0, 1, 2], dtype=np.int64)
+        )
+
+        mesh_pos, edges, point_data_dict = zarr_reader.load_zarr_store(str(store_path))
+
+        # Should have all three point data fields
+        assert "thickness" in point_data_dict
+        assert "stress" in point_data_dict
+        assert "temperature" in point_data_dict
+        # Should NOT include mesh connectivity
+        assert "mesh_connectivity_flat" not in point_data_dict
+        # Should NOT include mesh_pos or edges
+        assert "mesh_pos" not in point_data_dict
+        assert "edges" not in point_data_dict
+
+        # Check that all point data fields have correct shape
+        for name, data in point_data_dict.items():
+            assert data.shape == (num_nodes,), (
+                f"{name} should have shape ({num_nodes},)"
+            )
+            assert data.dtype == np.float32, f"{name} should be float32"
+
+
+def test_process_zarr_data(mock_zarr_directory):
+    """Test processing multiple Zarr stores."""
+    srcs, dsts, point_data = zarr_reader.process_zarr_data(
+        data_dir=mock_zarr_directory,
+        num_samples=2,
+    )
+
+    # Check we got 2 samples
+    assert len(srcs) == 2, f"Expected 2 samples, got {len(srcs)}"
+    assert len(dsts) == 2
+    assert len(point_data) == 2
+
+    # Check each sample has correct structure
+    for i in range(2):
+        assert srcs[i].ndim == 1, "srcs should be 1D array"
+        assert dsts[i].ndim == 1, "dsts should be 1D array"
+        assert len(srcs[i]) == len(dsts[i]), "srcs and dsts should have same length"
+
+        # Check point_data structure
+        assert "coords" in point_data[i], "point_data should have 'coords' key"
+        assert "thickness" in point_data[i], "point_data should have 'thickness' key"
+
+        coords = point_data[i]["coords"]
+        thickness = point_data[i]["thickness"]
+
+        assert coords.ndim == 3, "coords should be [T,N,3]"
+        assert coords.shape[-1] == 3, "coords last dimension should be 3"
+        assert thickness.ndim == 1, "thickness should be 1D"
+        assert len(thickness) == coords.shape[1], (
+            "thickness length should match num_nodes"
+        )
+
+
+def test_process_zarr_data_no_stores():
+    """Test that processing directory with no Zarr stores raises error."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        with pytest.raises(ValueError, match="No .zarr stores found"):
+            zarr_reader.process_zarr_data(
+                data_dir=temp_dir,
+                num_samples=1,
+            )
+
+
+def test_process_zarr_data_validation():
+    """Test that process_zarr_data validates data shapes."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        store_path = Path(temp_dir) / "bad_store.zarr"
+        store_path.mkdir()
+
+        # Create store with invalid mesh_pos shape (should be [T,N,3])
+        store = zarr.open(str(store_path), mode="w")
+        store.create_dataset(
+            "mesh_pos", data=np.random.randn(3, 4, 2).astype(np.float32)
+        )  # Wrong last dim
+        store.create_dataset("thickness", data=np.ones(4, dtype=np.float32))
+        store.create_dataset("edges", data=np.array([[0, 1]], dtype=np.int64))
+
+        with pytest.raises(ValueError, match="mesh_pos must be"):
+            zarr_reader.process_zarr_data(
+                data_dir=temp_dir,
+                num_samples=1,
+            )
+
+
+def test_process_zarr_data_edge_bounds():
+    """Test that process_zarr_data validates edge indices are within bounds."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        store_path = Path(temp_dir) / "bad_edges.zarr"
+        store_path.mkdir()
+
+        num_nodes = 4
+        store = zarr.open(str(store_path), mode="w")
+        store.create_dataset(
+            "mesh_pos", data=np.random.randn(3, num_nodes, 3).astype(np.float32)
+        )
+        store.create_dataset("thickness", data=np.ones(num_nodes, dtype=np.float32))
+        # Edge references node 10 which is out of bounds
+        store.create_dataset("edges", data=np.array([[0, 10]], dtype=np.int64))
+
+        with pytest.raises(ValueError, match="Edge indices out of bounds"):
+            zarr_reader.process_zarr_data(
+                data_dir=temp_dir,
+                num_samples=1,
+            )
+
+
+def test_reader_class(mock_zarr_directory):
+    """Test the Reader class callable interface."""
+    reader = zarr_reader.Reader()
+
+    srcs, dsts, point_data = reader(
+        data_dir=mock_zarr_directory,
+        num_samples=2,
+        split="train",
+    )
+
+    assert len(srcs) == 2
+    assert len(dsts) == 2
+    assert len(point_data) == 2
+
+
+def test_natural_sorting(mock_zarr_directory):
+    """Test that Zarr stores are sorted naturally (Run1, Run2, ..., Run10)."""
+    temp_path = Path(mock_zarr_directory)
+
+    # Add more stores with different numbering
+    for i in [10, 5, 20]:
+        store_path = temp_path / f"Run{i}.zarr"
+        create_mock_zarr_store(store_path)
+
+    zarr_stores = zarr_reader.find_zarr_stores(mock_zarr_directory)
+    store_names = [Path(p).name for p in zarr_stores]
+
+    # Should be sorted: Run000, Run001, Run002, Run5, Run10, Run20
+    assert store_names[0] == "Run000.zarr"
+    assert store_names[1] == "Run001.zarr"
+    assert store_names[2] == "Run002.zarr"
+    assert "Run5.zarr" in store_names
+    assert "Run10.zarr" in store_names
+    assert "Run20.zarr" in store_names
diff --git a/examples/structural_mechanics/crash/zarr_reader.py b/examples/structural_mechanics/crash/zarr_reader.py
new file mode 100644
index 0000000000..191494c2a5
--- /dev/null
+++ b/examples/structural_mechanics/crash/zarr_reader.py
@@ -0,0 +1,226 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import numpy as np
+import zarr
+
+
+def find_zarr_stores(base_data_dir: str) -> list[str]:
+    """
+    Find all Zarr stores (directories ending with .zarr) in the base directory.
+
+    Args:
+        base_data_dir: Path to directory containing Zarr stores.
+
+    Returns:
+        List of Zarr store paths sorted naturally.
+    """
+    if not os.path.isdir(base_data_dir):
+        return []
+
+    zarr_stores = [
+        os.path.join(base_data_dir, f)
+        for f in os.listdir(base_data_dir)
+        if f.endswith(".zarr") and os.path.isdir(os.path.join(base_data_dir, f))
+    ]
+
+    def natural_key(name):
+        """Natural sort key to handle numeric sorting."""
+        return [
+            int(s) if s.isdigit() else s.lower()
+            for s in re.findall(r"\d+|\D+", os.path.basename(name))
+        ]
+
+    return sorted(zarr_stores, key=natural_key)
+
+
+def load_zarr_store(zarr_path: str):
+    """
+    Load mesh positions, edges, and all point data fields from a Zarr store.
+
+    Args:
+        zarr_path: Path to the Zarr store directory.
+
+    Returns:
+        mesh_pos: (timesteps, num_nodes, 3) temporal positions
+        edges: (num_edges, 2) edge connectivity
+        point_data_dict: Dictionary of all point data fields (e.g., thickness, etc.)
+    """
+    store = zarr.open(zarr_path, mode="r")
+
+    # Read mesh positions (temporal coordinates)
+    if "mesh_pos" not in store:
+        raise KeyError(f"'mesh_pos' not found in Zarr store {zarr_path}")
+    mesh_pos = np.array(store["mesh_pos"][:], dtype=np.float64)
+
+    # Read edges
+    if "edges" not in store:
+        raise KeyError(f"'edges' not found in Zarr store {zarr_path}")
+    edges = np.array(store["edges"][:], dtype=np.int64)
+
+    # Extract all other datasets as point data (excluding mesh-level data)
+    # Skip: mesh_pos, edges, mesh_connectivity_* (these are not per-node features)
+    point_data_dict = {}
+    for name in store.keys():
+        if name in ("mesh_pos", "edges"):
+            continue
+        if name.startswith("mesh_connectivity_"):
+            continue
+        # Read as point data feature
+        point_data_dict[name] = np.array(store[name][:], dtype=np.float32)
+
+    return mesh_pos, edges, point_data_dict
+
+
+def process_zarr_data(
+    data_dir: str,
+    num_samples: int,
+    logger=None,
+):
+    """
+    Process Zarr crash simulation data from a given directory.
+
+    Each .zarr store is treated as one sample. Reads mesh positions, edges,
+    and all available point data fields (e.g., thickness, etc.) from the Zarr stores.
+
+    Args:
+        data_dir: Directory containing .zarr stores
+        num_samples: Maximum number of samples to process
+        logger: Optional logger for logging progress
+
+    Returns:
+        srcs: List of source node indices for edges (one array per sample)
+        dsts: List of destination node indices for edges (one array per sample)
+        point_data_all: List of dicts with 'coords' and all point data fields
+    """
+    zarr_stores = find_zarr_stores(data_dir)
+
+    if not zarr_stores:
+        if logger:
+            logger.error(f"No .zarr stores found in: {data_dir}")
+        raise ValueError(f"No .zarr stores found in: {data_dir}")
+
+    srcs, dsts = [], []
+    point_data_all = []
+
+    processed_runs = 0
+    for zarr_path in zarr_stores:
+        if processed_runs >= num_samples:
+            break
+
+        if logger:
+            logger.info(f"Processing Zarr store: {os.path.basename(zarr_path)}")
+
+        try:
+            mesh_pos, edges, point_data_dict = load_zarr_store(zarr_path)
+
+            # Validate shapes
+            if mesh_pos.ndim != 3 or mesh_pos.shape[-1] != 3:
+                raise ValueError(
+                    f"mesh_pos must be [T,N,3], got {mesh_pos.shape} in {zarr_path}"
+                )
+
+            if edges.ndim != 2 or edges.shape[-1] != 2:
+                raise ValueError(
+                    f"edges must be [E,2], got {edges.shape} in {zarr_path}"
+                )
+
+            num_nodes = mesh_pos.shape[1]
+
+            # Validate point data features
+            for name, data in point_data_dict.items():
+                if data.ndim != 1:
+                    raise ValueError(
+                        f"Point data '{name}' must be [N], got {data.shape} in {zarr_path}"
+                    )
+                if len(data) != num_nodes:
+                    raise ValueError(
+                        f"Point data '{name}' length {len(data)} doesn't match "
+                        f"number of nodes {num_nodes} in {zarr_path}"
+                    )
+
+            # Validate edge indices are within bounds
+            if edges.size > 0:
+                if edges.min() < 0 or edges.max() >= num_nodes:
+                    raise ValueError(
+                        f"Edge indices out of bounds [0, {num_nodes - 1}] in {zarr_path}"
+                    )
+
+            # Extract source and destination node indices from edges
+            src, dst = edges.T
+            srcs.append(src)
+            dsts.append(dst)
+
+            # Create record with coordinates and all point data fields
+            record = {"coords": mesh_pos}
+            record.update(point_data_dict)  # Add all point data features dynamically
+            point_data_all.append(record)
+
+            processed_runs += 1
+
+        except Exception as e:
+            if logger:
+                logger.error(f"Error processing {zarr_path}: {e}")
+            raise
+
+    if logger:
+        logger.info(f"Successfully processed {processed_runs} Zarr stores")
+
+    return srcs, dsts, point_data_all
+
+
+class Reader:
+    """
+    Reader for Zarr crash simulation stores.
+
+    This reader loads preprocessed crash simulation data from Zarr stores
+    created by the PhysicsNeMo Curator ETL pipeline.
+    """
+
+    def __init__(self):
+        """Initialize the Zarr reader."""
+        pass
+
+    def __call__(
+        self,
+        data_dir: str,
+        num_samples: int,
+        split: str | None = None,
+        logger=None,
+        **kwargs,
+    ):
+        """
+        Load Zarr crash simulation data.
+
+        Args:
+            data_dir: Directory containing .zarr stores
+            num_samples: Number of samples to load
+            split: Data split ('train', 'validation', 'test') - not used for Zarr
+            logger: Optional logger
+            **kwargs: Additional arguments (ignored)
+
+        Returns:
+            srcs: List of source node arrays for graph edges
+            dsts: List of destination node arrays for graph edges
+            point_data: List of dicts with 'coords' and all available point data fields
+        """
+        return process_zarr_data(
+            data_dir=data_dir,
+            num_samples=num_samples,
+            logger=logger,
+        )

From 748c8f95a9143d91205f5d69c91a0a69a9b0436e Mon Sep 17 00:00:00 2001
From: Sai Krishnan Chandrasekar <saikrishnanc@nvidia.com>
Date: Thu, 13 Nov 2025 10:10:26 -0800
Subject: [PATCH 2/6] Update README

---
 examples/structural_mechanics/crash/README.md | 121 +++++++++++++++---
 1 file changed, 104 insertions(+), 17 deletions(-)

diff --git a/examples/structural_mechanics/crash/README.md b/examples/structural_mechanics/crash/README.md
index 1475fa393f..6b8a5ebb3e 100644
--- a/examples/structural_mechanics/crash/README.md
+++ b/examples/structural_mechanics/crash/README.md
@@ -36,7 +36,7 @@ For an in-depth comparison between the Transolver and MeshGraphNet models and th
 ```yaml
 # conf/config.yaml
 defaults:
-  - reader: vtp                  # or d3plot, or your custom reader
+  - reader: vtp                  # vtp, zarr, d3plot, or your custom reader
   - datapipe: point_cloud        # or graph
   - model: transolver_time_conditional   # or an MGN variant
   - training: default
@@ -47,7 +47,7 @@ defaults:
 2) Point to your datasets and core training knobs.
 
 - `conf/training/default.yaml`:
-  - `raw_data_dir`: path to TRAIN runs (folder of run folders for d3plot, or folder of .vtp files for VTP)
+  - `raw_data_dir`: path to TRAIN runs (folder of run folders for d3plot, folder of .vtp files for VTP, or folder of .zarr stores for Zarr)
   - `num_time_steps`: number of frames to use per run
   - `num_training_samples`: how many runs to load
 
@@ -77,6 +77,7 @@ features: [thickness]   # or [] for no features; preserve order if adding more
 4) Reader‑specific options (optional).
 
 - d3plot: `conf/reader/d3plot.yaml` → `wall_node_disp_threshold`
+- VTP and Zarr readers have no additional options (they read pre-processed data)
 
 5) Model config: ensure input dimensions match your features.
 
@@ -127,26 +128,38 @@ This will install:
 [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator).
 Using `PhysicsNeMo-Curator`, crash simulation data from LS-DYNA can be processed into training-ready formats easily.
 
-Currently, this can be used to preprocess d3plot files into VTP.
+PhysicsNeMo-Curator can preprocess d3plot files into **VTP** (for visualization and smaller datasets) or **Zarr** (for large-scale ML training).
 
 ### Quick Start
 
 Install PhysicsNeMo-Curator following
 [these instructions](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#installation-and-usage).
 
-Process your LS-DYNA data:
+Process your LS-DYNA data to **VTP format**:
 
 ```bash
 export PYTHONPATH=$PYTHONPATH:examples &&
-physicsnemo-curator-etl                                    \
-    --config-dir=examples/config                           \
-    --config-name=crash_etl                                \
-    etl.source.input_dir=/data/crash_sims/                 \
-    etl.sink.output_dir=/data/crash_processed_vtp/         \
+physicsnemo-curator-etl                                         \
+    --config-dir=examples/structural_mechanics/crash/config     \
+    --config-name=crash_etl                                     \
+    serialization_format=vtp                                    \
+    etl.source.input_dir=/data/crash_sims/                      \
+    serialization_format.sink.output_dir=/data/crash_vtp/       \
     etl.processing.num_processes=4
 ```
 
-This will process all LS-DYNA runs in `/data/crash_sims/` and output VTP files to `/data/crash_processed_vtp/`.
+Or process to **Zarr format** for large-scale training:
+
+```bash
+export PYTHONPATH=$PYTHONPATH:examples &&
+physicsnemo-curator-etl                                         \
+    --config-dir=examples/structural_mechanics/crash/config     \
+    --config-name=crash_etl                                     \
+    serialization_format=zarr                                   \
+    etl.source.input_dir=/data/crash_sims/                      \
+    serialization_format.sink.output_dir=/data/crash_zarr/      \
+    etl.processing.num_processes=4
+```
 
 ### Input Data Structure
 
@@ -165,7 +178,7 @@ crash_sims/
 
 ### Output Formats
 
-#### VTP Format (Recommended for this example)
+#### VTP Format
 
 Produces single VTP file per run with all timesteps as displacement fields:
 
@@ -179,10 +192,33 @@ crash_processed_vtp/
 Each VTP contains:
 - Reference coordinates at t=0
 - Displacement fields: `displacement_t0.000`, `displacement_t0.005`, etc.
-- Node thickness values
+- Node thickness and other point data features
 
 This format is directly compatible with the VTP reader in this example.
 
+#### Zarr Format
+
+Produces one Zarr store per run with pre-computed graph structure:
+
+```
+crash_processed_zarr/
+├── Run100.zarr/
+│   ├── mesh_pos       # (timesteps, nodes, 3) - temporal positions
+│   ├── thickness      # (nodes,) - node features
+│   └── edges          # (num_edges, 2) - pre-computed graph connectivity
+├── Run101.zarr/
+└── ...
+```
+
+Each Zarr store contains:
+- `mesh_pos`: Full temporal trajectory (no displacement reconstruction needed)
+- `thickness`: Per-node features
+- `edges`: Pre-computed edge connectivity (no edge rebuilding during training)
+
+**NOTE:** All heavy preprocessing (node filtering, edge building, thickness computation) is done once during curation using PhysicsNeMo-Curator. The reader simply loads pre-computed arrays.
+
+This format is directly compatible with the Zarr reader in this example.
+
 ## Training
 
 Training is managed via Hydra configurations located in conf/.
@@ -277,14 +313,15 @@ If you use the graph datapipe, the edge list is produced by walking the filtered
 
 ### Built‑in VTP reader (PolyData)
 
-In addition to `d3plot`, a lightweight VTP reader is provided in `vtp_reader.py`. It treats each `.vtp` file in a directory as a separate run and expects point displacements to be stored as vector arrays in `poly.point_data` with names like `displacement_t0.000`, `displacement_t0.005`, … (a more permissive fallback of any `displacement_t*` is also supported). The reader:
+A lightweight VTP reader is provided in `vtp_reader.py`. It treats each `.vtp` file in a directory as a separate run and expects point displacements to be stored as vector arrays in `poly.point_data` with names like `displacement_t0.000`, `displacement_t0.005`, … (a more permissive fallback of any `displacement_t*` is also supported). The reader:
 
 - loads the reference coordinates from `poly.points`
 - builds absolute positions per timestep as `[t0: coords, t>0: coords + displacement_t]`
 - extracts cell connectivity from the PolyData faces and converts it to unique edges
-- returns `(srcs, dsts, point_data)` where `point_data` contains `'coords': [T, N, 3]`
+- extracts all point data fields dynamically (e.g., thickness, modulus)
+- returns `(srcs, dsts, point_data)` where `point_data` contains `'coords': [T, N, 3]` and all feature arrays
 
-By default, the VTP reader does not attach additional features; it is compatible with `features: []`. If your `.vtp` files include additional per‑point arrays you would like to model (e.g., thickness or modulus), extend the reader to add those arrays to each run’s record using keys that match your `features` list. The datapipe will then concatenate them in the configured order.
+The VTP reader dynamically extracts all non-displacement point data fields from the VTP file and makes them available to the datapipe. If your `.vtp` files include additional per‑point arrays (e.g., thickness or modulus), simply add their names to the `features` list in your datapipe config.
 
 Example Hydra configuration for the VTP reader:
 
@@ -304,12 +341,58 @@ defaults:
   - reader: vtp
 ```
 
-And set `features` to empty (or to the names you add in your extended reader) in `conf/datapipe/point_cloud.yaml` or `conf/datapipe/graph.yaml`:
+And configure features in `conf/datapipe/point_cloud.yaml` or `conf/datapipe/graph.yaml`:
 
 ```yaml
-features: []  # or [thickness, Y_modulus] if your reader provides them
+features: [thickness]  # or [] for no features
 ```
 
+### Built‑in Zarr reader
+
+A Zarr reader provided in `zarr_reader.py`. It reads pre-processed Zarr stores created by PhysicsNeMo-Curator, where all heavy computation (node filtering, edge building, thickness computation) has already been done during the ETL pipeline. The reader:
+
+- loads pre-computed temporal positions directly from `mesh_pos` (no displacement reconstruction)
+- loads pre-computed edges (no connectivity-to-edge conversion needed)
+- dynamically extracts all point data fields (thickness, etc.) from the Zarr store
+- returns `(srcs, dsts, point_data)` similar to VTP reader
+
+Data layout expected by Zarr reader:
+- `<DATA_DIR>/*.zarr/` (each `.zarr` directory is treated as one run)
+- Each Zarr store must contain:
+  - `mesh_pos`: `[T, N, 3]` temporal positions
+  - `edges`: `[E, 2]` pre-computed edge connectivity
+  - Feature arrays (e.g., `thickness`): `[N]` or `[N, K]` per-node features
+
+Example Hydra configuration for the Zarr reader:
+
+```yaml
+# conf/reader/zarr.yaml
+_target_: zarr_reader.Reader
+```
+
+Select it in `conf/config.yaml`:
+
+```yaml
+defaults:
+  - reader: zarr # Options are: vtp, d3plot, zarr
+  - datapipe: point_cloud   # will be overridden by model configs
+  - model: transolver_autoregressive_rollout_training
+  - training: default
+  - inference: default
+  - _self_
+```
+
+And configure features in `conf/datapipe/graph.yaml`:
+
+```yaml
+features: [thickness]  # Must match fields stored in Zarr
+```
+
+**Recommended workflow:**
+1. Use PhysicsNeMo-Curator to preprocess d3plot → VTP or Zarr once
+2. Use corresponding reader for all training/validation
+3. Optionally use d3plot reader for quick prototyping on raw data
+
 ### Data layout expected by readers
 
 - d3plot reader (`d3plot_reader.py`):
@@ -320,6 +403,10 @@ features: []  # or [thickness, Y_modulus] if your reader provides them
   - `<DATA_DIR>/*.vtp` (each `.vtp` is treated as one run)
   - Displacements stored as 3‑component arrays in point_data with names like `displacement_t0.000`, `displacement_t0.005`, ... (fallback accepts any `displacement_t*`).
 
+- Zarr reader (`zarr_reader.py`):
+  - `<DATA_DIR>/*.zarr/` (each `.zarr` directory is treated as one run)
+  - Contains pre-computed `mesh_pos`, `edges`, and feature arrays
+
 ### Write your own reader
 
 To write your own reader, implement a Hydra‑instantiable function or class whose call returns a three‑tuple `(srcs, dsts, point_data)`. The first two entries are lists of integer arrays describing edges per run (they can be empty lists if you are not producing a graph), and `point_data` is a list of Python dicts with one dict per run. Each dict must contain `'coords'` as a `[T, N, 3]` array and one array per feature name listed in `conf/datapipe/*.yaml` under `features`. Feature arrays can be `[N]` or `[N, K]` and should use the same node indexing as `'coords'`. For convenience, a simple class reader can accept the Hydra `split` argument (e.g., "train" or "test") and decide whether to save VTP frames, but this is optional.

From e31bf6c21b88324985514b6e6527175bf3248733 Mon Sep 17 00:00:00 2001
From: Sai Krishnan Chandrasekar
 <157182662+saikrishnanc-nv@users.noreply.github.com>
Date: Thu, 13 Nov 2025 10:17:15 -0800
Subject: [PATCH 3/6] Update validation logic of point data in Zarr reader

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 .../structural_mechanics/crash/zarr_reader.py   | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/examples/structural_mechanics/crash/zarr_reader.py b/examples/structural_mechanics/crash/zarr_reader.py
index 191494c2a5..c7cedceff0 100644
--- a/examples/structural_mechanics/crash/zarr_reader.py
+++ b/examples/structural_mechanics/crash/zarr_reader.py
@@ -144,9 +144,22 @@ def process_zarr_data(
 
             # Validate point data features
             for name, data in point_data_dict.items():
-                if data.ndim != 1:
+            for name, data in point_data_dict.items():
+                if data.ndim == 1:
+                    if len(data) != num_nodes:
+                        raise ValueError(
+                            f"Point data '{name}' length {len(data)} doesn't match "
+                            f"number of nodes {num_nodes} in {zarr_path}"
+                        )
+                elif data.ndim == 2:
+                    if data.shape[0] != num_nodes:
+                        raise ValueError(
+                            f"Point data '{name}' shape {data.shape} doesn't match "
+                            f"number of nodes {num_nodes} in {zarr_path}"
+                        )
+                else:
                     raise ValueError(
-                        f"Point data '{name}' must be [N], got {data.shape} in {zarr_path}"
+                        f"Point data '{name}' must be [N] or [N,K], got shape {data.shape} in {zarr_path}"
                     )
                 if len(data) != num_nodes:
                     raise ValueError(

From dede2483af130175288df046d3068560f0fdcc70 Mon Sep 17 00:00:00 2001
From: Sai Krishnan Chandrasekar
 <157182662+saikrishnanc-nv@users.noreply.github.com>
Date: Thu, 13 Nov 2025 10:21:40 -0800
Subject: [PATCH 4/6] Update examples/structural_mechanics/crash/zarr_reader.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 examples/structural_mechanics/crash/zarr_reader.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/examples/structural_mechanics/crash/zarr_reader.py b/examples/structural_mechanics/crash/zarr_reader.py
index c7cedceff0..ba4ecd993d 100644
--- a/examples/structural_mechanics/crash/zarr_reader.py
+++ b/examples/structural_mechanics/crash/zarr_reader.py
@@ -143,7 +143,7 @@ def process_zarr_data(
             num_nodes = mesh_pos.shape[1]
 
             # Validate point data features
-            for name, data in point_data_dict.items():
+            # Validate point data features
             for name, data in point_data_dict.items():
                 if data.ndim == 1:
                     if len(data) != num_nodes:
@@ -161,11 +161,6 @@ def process_zarr_data(
                     raise ValueError(
                         f"Point data '{name}' must be [N] or [N,K], got shape {data.shape} in {zarr_path}"
                     )
-                if len(data) != num_nodes:
-                    raise ValueError(
-                        f"Point data '{name}' length {len(data)} doesn't match "
-                        f"number of nodes {num_nodes} in {zarr_path}"
-                    )
 
             # Validate edge indices are within bounds
             if edges.size > 0:

From ec2293f4c1033efc5c1a3cbb5e60ee0d7992f7af Mon Sep 17 00:00:00 2001
From: Sai Krishnan Chandrasekar <saikrishnanc@nvidia.com>
Date: Thu, 13 Nov 2025 10:39:26 -0800
Subject: [PATCH 5/6] Add a test for 2D feature arrays

---
 .../crash/tests/test_zarr_reader.py           | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/examples/structural_mechanics/crash/tests/test_zarr_reader.py b/examples/structural_mechanics/crash/tests/test_zarr_reader.py
index d79f173e77..8b5d0fe745 100644
--- a/examples/structural_mechanics/crash/tests/test_zarr_reader.py
+++ b/examples/structural_mechanics/crash/tests/test_zarr_reader.py
@@ -218,6 +218,46 @@ def test_load_zarr_store_multiple_point_data_fields():
             assert data.dtype == np.float32, f"{name} should be float32"
 
 
+def test_load_zarr_store_2d_feature_arrays():
+    """Test that load_zarr_store correctly handles 2D feature arrays [N, K]."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        store_path = Path(temp_dir) / "2d_features.zarr"
+        store_path.mkdir()
+
+        # Create store with 2D feature array
+        num_nodes = 8
+        feature_dim = 3
+        store = zarr.open(str(store_path), mode="w")
+        store.create_dataset(
+            "mesh_pos", data=np.random.randn(3, num_nodes, 3).astype(np.float32)
+        )
+        store.create_dataset("edges", data=np.array([[0, 1]], dtype=np.int64))
+        # Add 1D feature (thickness)
+        store.create_dataset("thickness", data=np.ones(num_nodes, dtype=np.float32))
+        # Add 2D feature array [N, K] (e.g., stress tensor components)
+        stress_tensor = np.random.randn(num_nodes, feature_dim).astype(np.float32)
+        store.create_dataset("stress_tensor", data=stress_tensor)
+
+        mesh_pos, edges, point_data_dict = zarr_reader.load_zarr_store(str(store_path))
+
+        # Should have both 1D and 2D features
+        assert "thickness" in point_data_dict
+        assert "stress_tensor" in point_data_dict
+
+        # Check 1D feature shape
+        assert point_data_dict["thickness"].shape == (num_nodes,)
+        assert point_data_dict["thickness"].ndim == 1
+
+        # Check 2D feature shape
+        assert point_data_dict["stress_tensor"].shape == (num_nodes, feature_dim)
+        assert point_data_dict["stress_tensor"].ndim == 2
+
+        # Verify values match
+        np.testing.assert_array_almost_equal(
+            point_data_dict["stress_tensor"], stress_tensor
+        )
+
+
 def test_process_zarr_data(mock_zarr_directory):
     """Test processing multiple Zarr stores."""
     srcs, dsts, point_data = zarr_reader.process_zarr_data(

From c17619073e3ccb3580427b338a51376da3d7157b Mon Sep 17 00:00:00 2001
From: Sai Krishnan Chandrasekar
 <157182662+saikrishnanc-nv@users.noreply.github.com>
Date: Thu, 13 Nov 2025 10:44:37 -0800
Subject: [PATCH 6/6] Update examples/structural_mechanics/crash/zarr_reader.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 examples/structural_mechanics/crash/zarr_reader.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/structural_mechanics/crash/zarr_reader.py b/examples/structural_mechanics/crash/zarr_reader.py
index ba4ecd993d..150ff957a1 100644
--- a/examples/structural_mechanics/crash/zarr_reader.py
+++ b/examples/structural_mechanics/crash/zarr_reader.py
@@ -142,7 +142,6 @@ def process_zarr_data(
 
             num_nodes = mesh_pos.shape[1]
 
-            # Validate point data features
             # Validate point data features
             for name, data in point_data_dict.items():
                 if data.ndim == 1: