Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,23 @@
using namespace mlirtrt;
using namespace mlirtrt::runtime;

static constexpr std::string_view kNvtxVerbosityEnvVariable =
"MTRT_TENSORRT_NVTX";

/// Helper method that gets nvtx verbosity from environment value
static nvinfer1::ProfilingVerbosity getNvtxVerbosity() {
const char *verbosity_str = std::getenv(kNvtxVerbosityEnvVariable.data());
if (!verbosity_str)
return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
if (std::string_view(verbosity_str) == "NONE")
return nvinfer1::ProfilingVerbosity::kNONE;
if (std::string_view(verbosity_str) == "DETAILED")
return nvinfer1::ProfilingVerbosity::kDETAILED;
return nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
}

static const nvinfer1::ProfilingVerbosity gNvtxVerbosity = getNvtxVerbosity();

namespace {
/// A simple logger that implements TensorRT's logging interface. Errors and
/// warnings are reported through TensorRT's diagnostic system, everything else
Expand Down Expand Up @@ -611,6 +628,8 @@ static Status enqueueV3Wrapper(AllocTracker &tracker,
return getStatusWithMsg(StatusCode::InternalError,
"failed to set input-consumed event");

context->setNvtxVerbosity(gNvtxVerbosity);

if (!context->enqueueV3(stream))
return getStatusWithMsg(StatusCode::InternalError,
"failed to enqueue engine execution on stream");
Expand Down Expand Up @@ -650,6 +669,8 @@ static Status enqueueAllocV3Wrapper(AllocTracker &tracker,
// Number of results are known in advance.
int64_t nbResults = outputDesc.getNumberOfResults();

context->setNvtxVerbosity(gNvtxVerbosity);

if (!context->enqueueV3(stream))
return getStatusWithMsg(StatusCode::InternalError,
"failed to enqueue engine execution on stream");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def TensorRT_Dialect : Dialect {
static constexpr StringRef kTensorRTPerTensorDequantizationMarker = "tensorrt.pt_dq";
static constexpr StringRef kTensorRTPerChannelDequantizationMarker = "tensorrt.pc_dq";
static constexpr StringRef kTensorRTBlockDequantizationMarker = "tensorrt.block_dq";

/// TensorRT layer metadata markder.
static constexpr StringRef kTensorRTLayerMetadataMarker = "metadata";
}];

let dependentDialects = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ void NvInferNetworkEncoder::setMetadata(nvinfer1::ILayer *layer,
Operation *sourceOp) {
std::string name = createName(namesSet, sourceOp);
layer->setName(name.c_str());
if (auto metadataAttr = sourceOp->getAttrOfType<StringAttr>(
TensorRTDialect::kTensorRTLayerMetadataMarker)) {
layer->setMetadata(metadataAttr.getValue().str().c_str());
}
}

nvinfer1::ITensor *NvInferNetworkEncoder::lookup(Value v) const {
Expand Down
9 changes: 4 additions & 5 deletions mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,11 +522,10 @@ tensorrt::buildFunction(mlir::FunctionOpInterface op,
<< "failed to set timing cache";
}

// If created, engines and their layer information are
// with detailed description.
if (!opts.saveTensorRTEnginesToDirectory.empty() ||
!opts.saveTensorRTLayerInfoDirectory.empty())
config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
// Enable kDETAILED verbosity unconditionally, then use
// `IExecutionContext::setNvtxVerbosity` to change the verbosity at runtime
// (lower verbosity performs better generally).
config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);

setBuilderOptimizationLevel(config.get(), opts.tensorrtBuilderOptLevel,
builderContext.getTensorRTVersion());
Expand Down