Skip to content

Commit b9173ce

Browse files
[TensorRT] Sets TRT layer metadata and nvtx profiling verbosity (#674)
For now we query an environment variable `MTRT_TENSORRT_NVTX` to set the nvtx profiling verbosity. This is not ideal because it cannot support per-engine profiling verbosity. We will change that with a runtime option for TRT module. Co-authored-by: pranavm <[email protected]>
1 parent 6584c93 commit b9173ce

File tree

4 files changed

+32
-5
lines changed

4 files changed

+32
-5
lines changed

mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,23 @@
4242
using namespace mlirtrt;
4343
using namespace mlirtrt::runtime;
4444

45+
static constexpr std::string_view kNvtxVerbosityEnvVariable =
46+
"MTRT_TENSORRT_NVTX";
47+
48+
/// Helper method that gets nvtx verbosity from environment value
49+
static nvinfer1::ProfilingVerbosity getNvtxVerbosity() {
50+
const char *verbosity_str = std::getenv(kNvtxVerbosityEnvVariable.data());
51+
if (!verbosity_str)
52+
return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
53+
if (std::string_view(verbosity_str) == "NONE")
54+
return nvinfer1::ProfilingVerbosity::kNONE;
55+
if (std::string_view(verbosity_str) == "DETAILED")
56+
return nvinfer1::ProfilingVerbosity::kDETAILED;
57+
return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
58+
}
59+
60+
static const nvinfer1::ProfilingVerbosity gNvtxVerbosity = getNvtxVerbosity();
61+
4562
namespace {
4663
/// A simple logger that implements TensorRT's logging interface. Errors and
4764
/// warnings are reported through TensorRT's diagnostic system, everything else
@@ -611,6 +628,8 @@ static Status enqueueV3Wrapper(AllocTracker &tracker,
611628
return getStatusWithMsg(StatusCode::InternalError,
612629
"failed to set input-consumed event");
613630

631+
context->setNvtxVerbosity(gNvtxVerbosity);
632+
614633
if (!context->enqueueV3(stream))
615634
return getStatusWithMsg(StatusCode::InternalError,
616635
"failed to enqueue engine execution on stream");
@@ -650,6 +669,8 @@ static Status enqueueAllocV3Wrapper(AllocTracker &tracker,
650669
// Number of results are known in advance.
651670
int64_t nbResults = outputDesc.getNumberOfResults();
652671

672+
context->setNvtxVerbosity(gNvtxVerbosity);
673+
653674
if (!context->enqueueV3(stream))
654675
return getStatusWithMsg(StatusCode::InternalError,
655676
"failed to enqueue engine execution on stream");

mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def TensorRT_Dialect : Dialect {
5151
static constexpr StringRef kTensorRTPerTensorDequantizationMarker = "tensorrt.pt_dq";
5252
static constexpr StringRef kTensorRTPerChannelDequantizationMarker = "tensorrt.pc_dq";
5353
static constexpr StringRef kTensorRTBlockDequantizationMarker = "tensorrt.block_dq";
54+
55+
/// TensorRT layer metadata markder.
56+
static constexpr StringRef kTensorRTLayerMetadataMarker = "metadata";
5457
}];
5558

5659
let dependentDialects = [

mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,10 @@ void NvInferNetworkEncoder::setMetadata(nvinfer1::ILayer *layer,
278278
Operation *sourceOp) {
279279
std::string name = createName(namesSet, sourceOp);
280280
layer->setName(name.c_str());
281+
if (auto metadataAttr = sourceOp->getAttrOfType<StringAttr>(
282+
TensorRTDialect::kTensorRTLayerMetadataMarker)) {
283+
layer->setMetadata(metadataAttr.getValue().str().c_str());
284+
}
281285
}
282286

283287
nvinfer1::ITensor *NvInferNetworkEncoder::lookup(Value v) const {

mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -522,11 +522,10 @@ tensorrt::buildFunction(mlir::FunctionOpInterface op,
522522
<< "failed to set timing cache";
523523
}
524524

525-
// If created, engines and their layer information are
526-
// with detailed description.
527-
if (!opts.saveTensorRTEnginesToDirectory.empty() ||
528-
!opts.saveTensorRTLayerInfoDirectory.empty())
529-
config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
525+
// Enable kDETAILED verbosity unconditionally, then use
526+
// `IExecutionContext::setNvtxVerbosity` to change the verbosity at runtime
527+
// (lower verbosity performs better generally).
528+
config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
530529

531530
setBuilderOptimizationLevel(config.get(), opts.tensorrtBuilderOptLevel,
532531
builderContext.getTensorRTVersion());

0 commit comments

Comments
 (0)