NVIDIA
diff --git a/‎mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h‎
Lines changed: 9 additions & 0 deletions b/‎mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h‎
Lines changed: 4 additions & 2 deletions b/‎mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp‎
Lines changed: 26 additions & 1 deletion b/‎mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎mlir-tensorrt/compiler/lib/Compiler/TensorRTExtension/TensorRTExtension.cpp‎
Lines changed: 2 additions & 2 deletions b/‎mlir-tensorrt/compiler/lib/Compiler/TensorRTExtension/TensorRTExtension.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp‎
Lines changed: 40 additions & 2 deletions b/‎mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp‎
Lines changed: 40 additions & 2 deletions
diff --git a/‎mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h‎
Lines changed: 10 additions & 5 deletions b/‎mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TranslateToTensorRT.h‎
Lines changed: 8 additions & 7 deletions b/‎mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TranslateToTensorRT.h‎
Lines changed: 8 additions & 7 deletions
@@ -60,6 +60,10 @@ typedef struct MTRT_StableHLOToExecutableOptions {
   void *ptr;
 } MTRT_StableHLOToExecutableOptions;
 
+typedef void (*MTRT_MetadataCallback)(MlirOperation op,
+                                      MlirStringCallback append,
+                                      void *appendCtx, void *userData);
+
 MLIR_CAPI_EXPORTED MTRT_Status mtrtStableHloToExecutableOptionsCreate(
     MTRT_CompilerClient client, MTRT_StableHLOToExecutableOptions *options,
     int32_t tensorRTBuilderOptLevel, bool tensorRTStronglyTyped);
@@ -77,6 +81,11 @@ MLIR_CAPI_EXPORTED MTRT_Status mtrtStableHloToExecutableOptionsSetDebugOptions(
     const char **debugTypes, size_t debugTypeSizes,
     const char *dumpIrTreeDir = nullptr, const char *dumpTensorRTDir = nullptr);
 
+MLIR_CAPI_EXPORTED MTRT_Status
+mtrtStableHloToExecutableOptionsSetTensorRTTranslationMetadataCallback(
+    MTRT_StableHLOToExecutableOptions options, MTRT_MetadataCallback callback,
+    void *userData);
+
 MLIR_CAPI_EXPORTED MTRT_Status mtrtStableHloToExecutableOptionsDestroy(
     MTRT_StableHLOToExecutableOptions options);
 
 
@@ -33,7 +33,6 @@
 
 #include "mlir-executor/Runtime/API/API.h"
 #include "mlir-executor/Support/Status.h"
-#include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
 #include "mlir-tensorrt/Compiler/Client.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
 #include "mlir-tensorrt/Compiler/Options.h"
@@ -125,11 +124,14 @@ struct StableHLOToExecutableOptions : public mlir::OptionsContext {
   /// Whether to disallow host tensors in TensorRT clusters.
   bool disallowHostTensorsInTensorRTClusters = false;
 
-  /// Entrypiont function name.
+  /// Entrypoint function name.
   std::string entrypoint = "main";
 
   DebugOptions debugOptions;
 
+  std::function<std::string(mlir::Operation *)> layerMetadataCallback =
+      [](mlir::Operation *) { return ""; };
+
   /// Base class for extensions associated with StableHloToExecutableTask.
   class ExtensionBase : public TaskExtensionBase {
   public:
 
@@ -32,7 +32,6 @@
 #include "mlir-tensorrt/Dialect/Plan/IR/Plan.h"
 #include "mlir/CAPI/IR.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace mlirtrt;
 using namespace mlirtrt::compiler;
@@ -199,6 +198,32 @@ MTRT_Status mtrtStableHloToExecutableOptionsSetDebugOptions(
   return mtrtStatusGetOk();
 }
 
+MTRT_Status
+mtrtStableHloToExecutableOptionsSetTensorRTTranslationMetadataCallback(
+    MTRT_StableHLOToExecutableOptions options, MTRT_MetadataCallback callback,
+    void *userData) {
+  StableHLOToExecutableOptions *cppOpts = unwrap(options);
+
+  // Construct the append callback which we will pass to the callback provided
+  // by the user. We do it this way to avoid needing a string construct in the C
+  // API.
+  auto appendFunc = [](MlirStringRef str, void *appendCtx) {
+    std::string &accum = *reinterpret_cast<std::string *>(appendCtx);
+    accum += std::string(str.data, str.length);
+  };
+
+  // Capturing by reference here will cause `callback` to point to the wrong
+  // place at the time this callback is invoked.
+  cppOpts->layerMetadataCallback = [=](Operation *op) {
+    std::string accum;
+    void *appendCtx = reinterpret_cast<void *>(&accum);
+    callback(wrap(op), appendFunc, appendCtx, userData);
+    return accum;
+  };
+
+  return mtrtStatusGetOk();
+}
+
 MTRT_Status mtrtStableHloToExecutableOptionsDestroy(
     MTRT_StableHLOToExecutableOptions options) {
   delete reinterpret_cast<StableHLOToExecutableOptions *>(options.ptr);
 
@@ -64,8 +64,8 @@ void StableHLOToExecutableTensorRTExtension::populatePasses(
     auto &trtPM = pm.nest<tensorrt::TensorRTModuleOp>();
     tensorrt::buildTensorRTModuleTransformationPipeline(
         trtPM, translationOptions.enableStronglyTyped);
-    trtPM.addPass(
-        tensorrt::createTranslateTensorRTPass(nullptr, translationOptions));
+    trtPM.addPass(tensorrt::createTranslateTensorRTPass(
+        nullptr, options.layerMetadataCallback, translationOptions));
     return;
   }
 
 
@@ -19,7 +19,9 @@
 #include "mlir/Bindings/Python/PybindAdaptors.h"
 #include "pybind11/pybind11.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include <iostream>
 #include <pybind11/attr.h>
+#include <pybind11/functional.h>
 
 #ifdef MLIR_TRT_TARGET_TENSORRT
 #include "mlir-tensorrt-dialect/Utils/NvInferAdaptor.h"
@@ -66,6 +68,9 @@ class PyStableHLOToExecutableOptions
           mtrtStableHloToExecutableOptionsDestroy,
           mtrtPythonCapsuleToStableHLOToExecutableOptions,
           mtrtPythonStableHLOToExecutableOptionsToCapsule};
+
+  // We need this member so we can keep the Python callback alive long enough.
+  std::function<std::string(MlirOperation)> callback;
 };
 } // namespace
 
@@ -270,7 +275,40 @@ PYBIND11_MODULE(_api, m) {
           py::arg("enabled"),
           py::arg("debug_types") = std::vector<std::string>{},
           py::arg("dump_ir_tree_dir") = py::none(),
-          py::arg("dump_tensorrt_dir") = py::none());
+          py::arg("dump_tensorrt_dir") = py::none())
+
+#ifdef MLIR_TRT_TARGET_TENSORRT
+      .def(
+          "set_tensorrt_translation_metadata_callback",
+          [](PyStableHLOToExecutableOptions &self,
+             std::function<std::string(MlirOperation)> pyCallback) {
+            // Since we're constructing a C callback, our closures must not
+            // capture. We can pass in the Python callback via the userData
+            // argument.
+            auto callback = [](MlirOperation op, MlirStringCallback append,
+                               void *appendCtx, void *userDataVoid) {
+              auto pyCallback =
+                  *static_cast<std::function<std::string(MlirOperation)> *>(
+                      userDataVoid);
+
+              std::string result;
+              try {
+                result = pyCallback(op);
+              } catch (const std::exception &e) {
+                std::cerr << e.what() << std::endl;
+              }
+
+              append(MlirStringRef{result.data(), result.size()}, appendCtx);
+            };
+
+            self.callback = pyCallback;
+            THROW_IF_MTRT_ERROR(
+                mtrtStableHloToExecutableOptionsSetTensorRTTranslationMetadataCallback(
+                    self, callback, reinterpret_cast<void *>(&self.callback)));
+          },
+          py::arg("callback"), py::keep_alive<1, 2>{})
+#endif
+      ;
 
   m.def(
       "compiler_stablehlo_to_executable",
@@ -308,4 +346,4 @@ PYBIND11_MODULE(_api, m) {
   bindTensorRTPluginAdaptorObjects(m);
 #endif
 #endif
-}
+}
@@ -74,11 +74,14 @@ static constexpr nvinfer1::Weights kNullWeights =
 
 class NvInferNetworkEncoder {
 public:
-  NvInferNetworkEncoder(nvinfer1::INetworkDefinition *network,
-                        nvinfer1::IOptimizationProfile *profile,
-                        TensorRTVersion version, bool usesStronglyTyped)
+  NvInferNetworkEncoder(
+      nvinfer1::INetworkDefinition *network,
+      nvinfer1::IOptimizationProfile *profile, TensorRTVersion version,
+      bool usesStronglyTyped,
+      std::function<std::string(Operation *)> metadataCallback)
       : network(network), profile(profile), version(std::move(version)),
-        usesStronglyTyped(usesStronglyTyped) {}
+        usesStronglyTyped(usesStronglyTyped),
+        layerMetadataCallback(std::move(metadataCallback)) {}
 
   /// Lookup the TRT ITensor* equivalent of a Value.
   nvinfer1::ITensor *lookup(Value v) const;
@@ -141,7 +144,7 @@ class NvInferNetworkEncoder {
 
   /// Set the name of the `trtLayer` to a unique string that contains the op
   /// name and location information from `sourceOp`.
-  void setName(nvinfer1::ILayer *layer, Operation *sourceOp);
+  void setMetadata(nvinfer1::ILayer *layer, Operation *sourceOp);
 
   // Check if network uses fp16 types.
   bool hasFp16Usage() const { return usesFp16; }
@@ -238,6 +241,8 @@ class NvInferNetworkEncoder {
   bool hasQDQOps{false};
 
   PluginManager pluginMgr;
+
+  std::function<std::string(Operation *)> layerMetadataCallback;
 };
 
 //===----------------------------------------------------------------------===//
 
@@ -22,7 +22,6 @@
 
 #ifdef MLIR_TRT_TARGET_TENSORRT
 #include "mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h"
-#include "mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.h"
 #include "mlir-tensorrt-dialect/Utils/Options.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/Support/raw_ostream.h"
@@ -208,17 +207,19 @@ class TensorRTSerializedTimingCache {
 /// `tensorrt.shape_profile` arguments have been populated for each argument
 /// that has unknown dimensions.
 /// TODO(cbate): add additional options here for builder configuration.
-FailureOr<TensorRTEngineResult>
-buildFunction(mlir::FunctionOpInterface op,
-              TensorRTBuilderContext &builderContext,
-              TensorRTSerializedTimingCache &serializedTimingCache,
-              const TensorRTTranslationOptions &options =
-                  TensorRTTranslationOptions::fromCLFlags());
+FailureOr<TensorRTEngineResult> buildFunction(
+    mlir::FunctionOpInterface op, TensorRTBuilderContext &builderContext,
+    TensorRTSerializedTimingCache &serializedTimingCache,
+    const TensorRTTranslationOptions &options =
+        TensorRTTranslationOptions::fromCLFlags(),
+    std::function<std::string(Operation *)> layerMetadataCallback =
+        [](Operation *op) { return ""; });
 
 /// Create an instance of a translate-to-tensorrt pass using an existing
 /// TensorRTBuilderContext.
 std::unique_ptr<mlir::Pass> createTranslateTensorRTPass(
     std::shared_ptr<tensorrt::TensorRTBuilderContext> context,
+    std::function<std::string(Operation *)> layerMetadataCallback,
     TensorRTTranslationOptions options =
         TensorRTTranslationOptions::fromCLFlags());