Add tensorrt_engine to Executable API (#608)

yizhuoz004 · pranavm-nvidia · web-flow · commit aa069059a866 · 2025-04-10T14:36:55.000-07:00
Signed-off-by: yizhuoz004 &lt;yizhuoz@nvidia.com&gt;
Co-authored-by: pranavm-nvidia &lt;49246958+pranavm-nvidia@users.noreply.github.com&gt;
diff --git a/tripy/CONTRIBUTING.md b/tripy/CONTRIBUTING.md
@@ -72,7 +72,7 @@ We've written developer guides to help you understand the codebase:
 
 <!-- TODO (pranavm): Update links here -->
 - Start with the
-    [architecture](https://nvidia.github.io/TensorRT-Incubator/post0_developer_guides/architecture.html)
+    [architecture](https://nvidia.github.io/TensorRT-Incubator/post0_developer_guides/00-architecture.html)
     documentation.
 
 
diff --git a/tripy/Dockerfile b/tripy/Dockerfile
@@ -32,3 +32,4 @@ RUN pip install build .[docs,dev,test,build] \
 
 # Export tripy into the PYTHONPATH so it doesn't need to be installed after making changes
 ENV PYTHONPATH=/tripy
+ENV LD_LIBRARY_PATH=/usr/local/lib/python3.9/site-packages/nvidia/cuda_runtime/lib:$LD_LIBRARY_PATH
diff --git a/tripy/nvtripy/backend/api/executable.py b/tripy/nvtripy/backend/api/executable.py
@@ -299,6 +299,44 @@ def add(a, b):
         """
         json_utils.save(self, path)
 
+    def serialized_tensorrt_engine(self) -> bytes:
+        """
+        Returns the serialized TensorRT engine from the executable.
+
+        Returns:
+            The serialized TensorRT engine as ``bytes``.
+
+        .. seealso:: Refer to the `TensorRT developer guide <https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/python-api-docs.html#deserializing-a-plan>_`
+            for details on how to work with serialized TensorRT engines.
+
+        .. code-block:: python
+            :linenos:
+            :caption: TensorRT engine
+
+            def add(a, b):
+                return a + b
+
+            # doc: no-print-locals compiled_add trt_engine
+            compiled_add = tp.compile(
+                add,
+                args=[
+                    tp.InputInfo(shape=((1, 2, 3),), dtype=tp.float32),
+                    tp.InputInfo(shape=((1, 2, 3),), dtype=tp.float32),
+                ],
+            )
+
+            trt_engine = compiled_add.serialized_tensorrt_engine()
+            assert isinstance(trt_engine, bytes)
+        """
+        data_segments = self._executable.get_data_segments()
+        if len(data_segments) != 1:
+            raise_error(
+                "Cannot get tensorrt engine from multiple clusters.",
+                [f"Found {len(data_segments)} clusters in the executable."],
+            )
+        trt_cluster = data_segments[0]  # tuple of (name, data)
+        return trt_cluster[1]
+
 
 @json_utils.Encoder.register(Executable)
 def encode_executable(executable):
diff --git a/tripy/pyproject.toml b/tripy/pyproject.toml
@@ -76,6 +76,7 @@ test = [
   "nvtripy[doc_test_common]",
   "pytest-notebook==0.10.0",
   "notebook==7.2.2",
+  "polygraphy==0.49.20",
 ]
 
 [tool.black]
diff --git a/tripy/tests/backend/api/test_executable.py b/tripy/tests/backend/api/test_executable.py
@@ -17,6 +17,7 @@
 import tempfile
 from typing import Tuple
 
+import numpy as np
 import nvtripy as tp
 import pytest
 from tests import helper
@@ -116,3 +117,15 @@ def test_file_io(self, single_return_executable):
             out1 = single_return_executable(inp, inp)
             out2 = loaded_executable(inp, inp)
             assert tp.equal(out1, out2)
+
+    def test_tensorrt_engine(self, single_return_executable):
+        from polygraphy.backend.trt import EngineFromBytes, TrtRunner
+
+        trt_engine = single_return_executable.serialized_tensorrt_engine()
+        load_engine = EngineFromBytes(trt_engine)
+        with TrtRunner(load_engine) as runner:
+            inp_data0 = np.random.rand(2, 2).astype(np.float32)
+            inp_data1 = np.random.rand(2, 2).astype(np.float32)
+            output = runner.infer(feed_dict={"arg0": inp_data0, "arg1": inp_data1})["result0"]
+            tripy_output = single_return_executable(tp.Tensor(inp_data0), tp.Tensor(inp_data1))
+            assert tp.equal(tripy_output, tp.Tensor(output))

Original file line number	Diff line number	Diff line change
`@@ -32,3 +32,4 @@ RUN pip install build .[docs,dev,test,build] \`
`32`	`32`
`33`	`33`	`# Export tripy into the PYTHONPATH so it doesn't need to be installed after making changes`
`34`	`34`	`ENV PYTHONPATH=/tripy`
	`35`	`+ENV LD_LIBRARY_PATH=/usr/local/lib/python3.9/site-packages/nvidia/cuda_runtime/lib:$LD_LIBRARY_PATH`
Original file line number	Diff line number	Diff line change
`@@ -76,6 +76,7 @@ test = [`
`76`	`76`	`"nvtripy[doc_test_common]",`
`77`	`77`	`"pytest-notebook==0.10.0",`
`78`	`78`	`"notebook==7.2.2",`
	`79`	`+ "polygraphy==0.49.20",`
`79`	`80`	`]`
`80`	`81`
`81`	`82`	`[tool.black]`