Skip to content

Commit aa06905

Browse files
Add tensorrt_engine to Executable API (#608)
Signed-off-by: yizhuoz004 <[email protected]> Co-authored-by: pranavm-nvidia <[email protected]>
1 parent ea50844 commit aa06905

File tree

5 files changed

+54
-1
lines changed

5 files changed

+54
-1
lines changed

tripy/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ We've written developer guides to help you understand the codebase:
7272

7373
<!-- TODO (pranavm): Update links here -->
7474
- Start with the
75-
[architecture](https://nvidia.github.io/TensorRT-Incubator/post0_developer_guides/architecture.html)
75+
[architecture](https://nvidia.github.io/TensorRT-Incubator/post0_developer_guides/00-architecture.html)
7676
documentation.
7777

7878

tripy/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,4 @@ RUN pip install build .[docs,dev,test,build] \
3232

3333
# Export tripy into the PYTHONPATH so it doesn't need to be installed after making changes
3434
ENV PYTHONPATH=/tripy
35+
ENV LD_LIBRARY_PATH=/usr/local/lib/python3.9/site-packages/nvidia/cuda_runtime/lib:$LD_LIBRARY_PATH

tripy/nvtripy/backend/api/executable.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,44 @@ def add(a, b):
299299
"""
300300
json_utils.save(self, path)
301301

302+
def serialized_tensorrt_engine(self) -> bytes:
303+
"""
304+
Returns the serialized TensorRT engine from the executable.
305+
306+
Returns:
307+
The serialized TensorRT engine as ``bytes``.
308+
309+
.. seealso:: Refer to the `TensorRT developer guide <https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/python-api-docs.html#deserializing-a-plan>_`
310+
for details on how to work with serialized TensorRT engines.
311+
312+
.. code-block:: python
313+
:linenos:
314+
:caption: TensorRT engine
315+
316+
def add(a, b):
317+
return a + b
318+
319+
# doc: no-print-locals compiled_add trt_engine
320+
compiled_add = tp.compile(
321+
add,
322+
args=[
323+
tp.InputInfo(shape=((1, 2, 3),), dtype=tp.float32),
324+
tp.InputInfo(shape=((1, 2, 3),), dtype=tp.float32),
325+
],
326+
)
327+
328+
trt_engine = compiled_add.serialized_tensorrt_engine()
329+
assert isinstance(trt_engine, bytes)
330+
"""
331+
data_segments = self._executable.get_data_segments()
332+
if len(data_segments) != 1:
333+
raise_error(
334+
"Cannot get tensorrt engine from multiple clusters.",
335+
[f"Found {len(data_segments)} clusters in the executable."],
336+
)
337+
trt_cluster = data_segments[0] # tuple of (name, data)
338+
return trt_cluster[1]
339+
302340

303341
@json_utils.Encoder.register(Executable)
304342
def encode_executable(executable):

tripy/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ test = [
7676
"nvtripy[doc_test_common]",
7777
"pytest-notebook==0.10.0",
7878
"notebook==7.2.2",
79+
"polygraphy==0.49.20",
7980
]
8081

8182
[tool.black]

tripy/tests/backend/api/test_executable.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import tempfile
1818
from typing import Tuple
1919

20+
import numpy as np
2021
import nvtripy as tp
2122
import pytest
2223
from tests import helper
@@ -116,3 +117,15 @@ def test_file_io(self, single_return_executable):
116117
out1 = single_return_executable(inp, inp)
117118
out2 = loaded_executable(inp, inp)
118119
assert tp.equal(out1, out2)
120+
121+
def test_tensorrt_engine(self, single_return_executable):
122+
from polygraphy.backend.trt import EngineFromBytes, TrtRunner
123+
124+
trt_engine = single_return_executable.serialized_tensorrt_engine()
125+
load_engine = EngineFromBytes(trt_engine)
126+
with TrtRunner(load_engine) as runner:
127+
inp_data0 = np.random.rand(2, 2).astype(np.float32)
128+
inp_data1 = np.random.rand(2, 2).astype(np.float32)
129+
output = runner.infer(feed_dict={"arg0": inp_data0, "arg1": inp_data1})["result0"]
130+
tripy_output = single_return_executable(tp.Tensor(inp_data0), tp.Tensor(inp_data1))
131+
assert tp.equal(tripy_output, tp.Tensor(output))

0 commit comments

Comments
 (0)