NVIDIA · yizhuoz004 · Jul 22, 2025 · Jul 7, 2025
@@ -42,6 +42,23 @@
 using namespace mlirtrt;
 using namespace mlirtrt::runtime;
 
+static constexpr std::string_view kNvtxVerbosityEnvVariable =
+    "MTRT_TENSORRT_NVTX";
+
+/// Helper method that gets nvtx verbosity from environment value
+static nvinfer1::ProfilingVerbosity getNvtxVerbosity() {
+  const char *verbosity_str = std::getenv(kNvtxVerbosityEnvVariable.data());
+  if (!verbosity_str)
+    return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
+  if (std::string_view(verbosity_str) == "NONE")
+    return nvinfer1::ProfilingVerbosity::kNONE;
+  if (std::string_view(verbosity_str) == "DETAILED")
+    return nvinfer1::ProfilingVerbosity::kDETAILED;
+  return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
+}
+
+static const nvinfer1::ProfilingVerbosity gNvtxVerbosity = getNvtxVerbosity();
+
 namespace {
 /// A simple logger that implements TensorRT's logging interface. Errors and
 /// warnings are reported through TensorRT's diagnostic system, everything else
@@ -611,6 +628,8 @@ static Status enqueueV3Wrapper(AllocTracker &tracker,
     return getStatusWithMsg(StatusCode::InternalError,
                             "failed to set input-consumed event");
 
+  context->setNvtxVerbosity(gNvtxVerbosity);
+
   if (!context->enqueueV3(stream))
     return getStatusWithMsg(StatusCode::InternalError,
                             "failed to enqueue engine execution on stream");
@@ -650,6 +669,8 @@ static Status enqueueAllocV3Wrapper(AllocTracker &tracker,
   // Number of results are known in advance.
   int64_t nbResults = outputDesc.getNumberOfResults();
 
+  context->setNvtxVerbosity(gNvtxVerbosity);
+
   if (!context->enqueueV3(stream))
     return getStatusWithMsg(StatusCode::InternalError,
                             "failed to enqueue engine execution on stream");

@@ -51,6 +51,9 @@ def TensorRT_Dialect : Dialect {
     static constexpr StringRef kTensorRTPerTensorDequantizationMarker = "tensorrt.pt_dq";
     static constexpr StringRef kTensorRTPerChannelDequantizationMarker = "tensorrt.pc_dq";
     static constexpr StringRef kTensorRTBlockDequantizationMarker = "tensorrt.block_dq";
+
+    /// TensorRT layer metadata markder.
+    static constexpr StringRef kTensorRTLayerMetadataMarker = "metadata";
   }];
 
   let dependentDialects = [

@@ -278,6 +278,10 @@ void NvInferNetworkEncoder::setMetadata(nvinfer1::ILayer *layer,
                                         Operation *sourceOp) {
   std::string name = createName(namesSet, sourceOp);
   layer->setName(name.c_str());
+  if (auto metadataAttr = sourceOp->getAttrOfType<StringAttr>(
+          TensorRTDialect::kTensorRTLayerMetadataMarker)) {
+    layer->setMetadata(metadataAttr.getValue().str().c_str());
+  }
 }
 
 nvinfer1::ITensor *NvInferNetworkEncoder::lookup(Value v) const {

@@ -522,11 +522,10 @@ tensorrt::buildFunction(mlir::FunctionOpInterface op,
              << "failed to set timing cache";
   }
 
-  // If created, engines and their layer information are
-  // with detailed description.
-  if (!opts.saveTensorRTEnginesToDirectory.empty() ||
-      !opts.saveTensorRTLayerInfoDirectory.empty())
-    config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
+  // Enable kDETAILED verbosity unconditionally, then use
+  // `IExecutionContext::setNvtxVerbosity` to change the verbosity at runtime
+  // (lower verbosity performs better generally).
+  config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
 
   setBuilderOptimizationLevel(config.get(), opts.tensorrtBuilderOptLevel,
                               builderContext.getTensorRTVersion());