[TensorRT] Sets TRT layer metadata and nvtx profiling verbosity

pranavm-nvidia · yizhuoz004 · commit 45bb9beab5ef · 2025-07-17T14:17:44.000-07:00
For now we query an environment variable `MTRT_TENSORRT_NVTX` to
set the nvtx profiling verbosity. This is not ideal because it cannot
support per-engine profiling verbosity. We will change that with a
runtime option for TRT module.
diff --git a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp
@@ -42,6 +42,25 @@
 using namespace mlirtrt;
 using namespace mlirtrt::runtime;
 
+static const char *kNvtxVerbosityEnvVariable = "MTRT_TENSORRT_NVTX";
+
+/// Helper method that gets nvtx verbosity from environment value
+nvinfer1::ProfilingVerbosity getNvtxVerbosity() {
+  const char *verbosity_str = std::getenv(kNvtxVerbosityEnvVariable);
+  if (!verbosity_str)
+    return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
+  switch (std::string_view(verbosity_str)) {
+  case "NONE":
+    return nvinfer1::ProfilingVerbosity::kNONE;
+  case "DETAILED":
+    return nvinfer1::ProfilingVerbosity::kDETAILED;
+  default:
+    return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
+  }
+}
+
+static const nvinfer1::ProfilingVerbosity gNvtxVerbosity = getNvtxVerbosity();
+
 namespace {
 /// A simple logger that implements TensorRT's logging interface. Errors and
 /// warnings are reported through TensorRT's diagnostic system, everything else
@@ -611,6 +630,8 @@ static Status enqueueV3Wrapper(AllocTracker &tracker,
     return getStatusWithMsg(StatusCode::InternalError,
                             "failed to set input-consumed event");
 
+  context->setNvtxVerbosity(gNvtxVerbosity);
+
   if (!context->enqueueV3(stream))
     return getStatusWithMsg(StatusCode::InternalError,
                             "failed to enqueue engine execution on stream");
@@ -650,6 +671,8 @@ static Status enqueueAllocV3Wrapper(AllocTracker &tracker,
   // Number of results are known in advance.
   int64_t nbResults = outputDesc.getNumberOfResults();
 
+  context->setNvtxVerbosity(gNvtxVerbosity);
+
   if (!context->enqueueV3(stream))
     return getStatusWithMsg(StatusCode::InternalError,
                             "failed to enqueue engine execution on stream");
diff --git a/mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp b/mlir-tensorrt/tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp
@@ -278,6 +278,10 @@ void NvInferNetworkEncoder::setMetadata(nvinfer1::ILayer *layer,
                                         Operation *sourceOp) {
   std::string name = createName(namesSet, sourceOp);
   layer->setName(name.c_str());
+
+  if (auto metadataAttr = sourceOp->getAttrOfType<StringAttr>("metadata")) {
+    layer->setMetadata(metadataAttr.getValue().str().c_str());
+  }
 }
 
 nvinfer1::ITensor *NvInferNetworkEncoder::lookup(Value v) const {
diff --git a/mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp b/mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp
@@ -522,11 +522,10 @@ tensorrt::buildFunction(mlir::FunctionOpInterface op,
              << "failed to set timing cache";
   }
 
-  // If created, engines and their layer information are
-  // with detailed description.
-  if (!opts.saveTensorRTEnginesToDirectory.empty() ||
-      !opts.saveTensorRTLayerInfoDirectory.empty())
-    config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
+  // Enable kDETAILED verbosity unconditionally, then use
+  // `IExecutionContext::setNvtxVerbosity` to change the verbosity at runtime
+  // (lower verbosity performs better generally).
+  config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
 
   setBuilderOptimizationLevel(config.get(), opts.tensorrtBuilderOptLevel,
                               builderContext.getTensorRTVersion());