init commit

pranavm-nvidia · pranavm-nvidia · commit 75bd2b1a6821 · 2024-09-16T10:07:02.000-07:00
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h b/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
@@ -77,6 +77,11 @@ MLIR_CAPI_EXPORTED MTRT_Status mtrtStableHloToExecutableOptionsSetDebugOptions(
     const char **debugTypes, size_t debugTypeSizes,
     const char *dumpIrTreeDir = nullptr, const char *dumpTensorRTDir = nullptr);
 
+MLIR_CAPI_EXPORTED MTRT_Status
+mtrtStableHloToExecutableOptionsSetTensorRTTranslationMetadataCallback(
+    MTRT_StableHLOToExecutableOptions options,
+    const char *(*callback)(MlirOperation));
+
 MLIR_CAPI_EXPORTED MTRT_Status mtrtStableHloToExecutableOptionsDestroy(
     MTRT_StableHLOToExecutableOptions options);
 
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
@@ -34,6 +34,7 @@
 #include "mlir-executor/Runtime/API/API.h"
 #include "mlir-executor/Support/Status.h"
 #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
+#include "mlir-tensorrt-dialect/Utils/Types.h"
 #include "mlir-tensorrt/Compiler/Client.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
 #include "mlir-tensorrt/Compiler/Options.h"
@@ -130,6 +131,9 @@ struct StableHLOToExecutableOptions : public mlir::OptionsContext {
 
   DebugOptions debugOptions;
 
+  // TODO: Add a sane default here:
+  MetadataCallbackT layerMetadataCallback = [](MlirOperation op) { return ""; };
+
   /// Base class for extensions associated with StableHloToExecutableTask.
   class ExtensionBase : public TaskExtensionBase {
   public:
diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
@@ -199,6 +199,15 @@ MTRT_Status mtrtStableHloToExecutableOptionsSetDebugOptions(
   return mtrtStatusGetOk();
 }
 
+MTRT_Status
+mtrtStableHloToExecutableOptionsSetTensorRTTranslationMetadataCallback(
+    MTRT_StableHLOToExecutableOptions options,
+    const char *(*callback)(MlirOperation)) {
+  StableHLOToExecutableOptions *cppOpts = unwrap(options);
+  cppOpts->layerMetadataCallback = callback;
+  return mtrtStatusGetOk();
+}
+
 MTRT_Status mtrtStableHloToExecutableOptionsDestroy(
     MTRT_StableHLOToExecutableOptions options) {
   delete reinterpret_cast<StableHLOToExecutableOptions *>(options.ptr);
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTExtension/TensorRTExtension.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTExtension/TensorRTExtension.cpp
@@ -64,8 +64,8 @@ void StableHLOToExecutableTensorRTExtension::populatePasses(
     auto &trtPM = pm.nest<tensorrt::TensorRTModuleOp>();
     tensorrt::buildTensorRTModuleTransformationPipeline(
         trtPM, translationOptions.enableStronglyTyped);
-    trtPM.addPass(
-        tensorrt::createTranslateTensorRTPass(nullptr, translationOptions));
+    trtPM.addPass(tensorrt::createTranslateTensorRTPass(
+        nullptr, translationOptions, options.layerMetadataCallback));
     return;
   }
 
diff --git a/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h b/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h
@@ -24,9 +24,11 @@
 #ifndef MLIR_TENSORRT_TARGET_TENSORRT_TENSORRTENCODINGOPINTERFACE_NETWORKENCODER
 #define MLIR_TENSORRT_TARGET_TENSORRT_TENSORRTENCODINGOPINTERFACE_NETWORKENCODER
 
+#include "mlir-c/IR.h"
 #include "mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.h"
 #include "mlir-tensorrt-dialect/Utils/NvInferAdaptor.h"
 #include "mlir-tensorrt-dialect/Utils/NvInferPluginUtils.h"
+#include "mlir-tensorrt-dialect/Utils/Types.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/StringSet.h"
 
@@ -76,9 +78,11 @@ class NvInferNetworkEncoder {
 public:
   NvInferNetworkEncoder(nvinfer1::INetworkDefinition *network,
                         nvinfer1::IOptimizationProfile *profile,
-                        TensorRTVersion version, bool usesStronglyTyped)
+                        TensorRTVersion version, bool usesStronglyTyped,
+                        mlirtrt::MetadataCallbackT metadataCallback)
       : network(network), profile(profile), version(std::move(version)),
-        usesStronglyTyped(usesStronglyTyped) {}
+        usesStronglyTyped(usesStronglyTyped),
+        layerMetadataCallback(std::move(metadataCallback)) {}
 
   /// Lookup the TRT ITensor* equivalent of a Value.
   nvinfer1::ITensor *lookup(Value v) const;
@@ -238,6 +242,10 @@ class NvInferNetworkEncoder {
   bool hasQDQOps{false};
 
   PluginManager pluginMgr;
+
+  // TODO: Where to use this? encodeOp doesn't have a way for us to access the
+  // layers.
+  std::function<std::string(MlirOperation)> layerMetadataCallback;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TranslateToTensorRT.h b/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Target/TranslateToTensorRT.h
@@ -24,6 +24,7 @@
 #include "mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h"
 #include "mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.h"
 #include "mlir-tensorrt-dialect/Utils/Options.h"
+#include "mlir-tensorrt-dialect/Utils/Types.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -208,19 +209,26 @@ class TensorRTSerializedTimingCache {
 /// `tensorrt.shape_profile` arguments have been populated for each argument
 /// that has unknown dimensions.
 /// TODO(cbate): add additional options here for builder configuration.
-FailureOr<TensorRTEngineResult>
-buildFunction(mlir::FunctionOpInterface op,
-              TensorRTBuilderContext &builderContext,
-              TensorRTSerializedTimingCache &serializedTimingCache,
-              const TensorRTTranslationOptions &options =
-                  TensorRTTranslationOptions::fromCLFlags());
+FailureOr<TensorRTEngineResult> buildFunction(
+    mlir::FunctionOpInterface op, TensorRTBuilderContext &builderContext,
+    TensorRTSerializedTimingCache &serializedTimingCache,
+    const TensorRTTranslationOptions &options =
+        TensorRTTranslationOptions::fromCLFlags(),
+    // TODO: Add a sane default here:
+    mlirtrt::MetadataCallbackT layerMetadataCallback = [](MlirOperation op) {
+      return "";
+    });
 
 /// Create an instance of a translate-to-tensorrt pass using an existing
 /// TensorRTBuilderContext.
 std::unique_ptr<mlir::Pass> createTranslateTensorRTPass(
     std::shared_ptr<tensorrt::TensorRTBuilderContext> context,
     TensorRTTranslationOptions options =
-        TensorRTTranslationOptions::fromCLFlags());
+        TensorRTTranslationOptions::fromCLFlags(),
+    // TODO: Add a sane default here:
+    mlirtrt::MetadataCallbackT layerMetadataCallback = [](MlirOperation op) {
+      return "";
+    });
 
 /// Register llvm::cl opts related to TensorRT translation. This should be
 /// called before having LLVM parse CL options.
diff --git a/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Utils/Types.h b/mlir-tensorrt/tensorrt/include/mlir-tensorrt-dialect/Utils/Types.h
@@ -0,0 +1,11 @@
+#ifndef MLIR_TENSORRT_UTILS_TYPES_H
+#define MLIR_TENSORRT_UTILS_TYPES_H
+
+#include "mlir/IR/Operation.h"
+#include <functional>
+
+namespace mlirtrt {
+using MetadataCallbackT = std::function<const char *(MlirOperation)>;
+} // namespace mlirtrt
+
+#endif // MLIR_TENSORRT_UTILS_TYPES_H
diff --git a/mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp b/mlir-tensorrt/tensorrt/lib/Target/TranslateToTensorRT.cpp
@@ -336,7 +336,8 @@ FailureOr<TensorRTEngineResult>
 tensorrt::buildFunction(mlir::FunctionOpInterface op,
                         TensorRTBuilderContext &builderContext,
                         TensorRTSerializedTimingCache &serializedTimingCache,
-                        const TensorRTTranslationOptions &opts) {
+                        const TensorRTTranslationOptions &opts,
+                        mlirtrt::MetadataCallbackT layerMetadataCallback) {
   assert(builderContext.getBuilder() && "expected valid builder context");
   std::unique_ptr<nvinfer1::IBuilder> &builder = builderContext.getBuilder();
 
@@ -357,9 +358,9 @@ tensorrt::buildFunction(mlir::FunctionOpInterface op,
   nvinfer1::IOptimizationProfile *optimProfile =
       builder->createOptimizationProfile();
 
-  NvInferNetworkEncoder encoder(network.get(), optimProfile,
-                                builderContext.getTensorRTVersion(),
-                                opts.enableStronglyTyped);
+  NvInferNetworkEncoder encoder(
+      network.get(), optimProfile, builderContext.getTensorRTVersion(),
+      opts.enableStronglyTyped, layerMetadataCallback);
 
   // Currently we only support single-block functions with unique return
   // terminator ops.
@@ -673,9 +674,10 @@ class TranslateToTensorRTEnginePass
 
   explicit TranslateToTensorRTEnginePass(
       std::shared_ptr<TensorRTBuilderContext> builderContext,
-      TensorRTTranslationOptions options)
-      : builderContext(builderContext), translationOptions(std::move(options)) {
-  }
+      TensorRTTranslationOptions options,
+      mlirtrt::MetadataCallbackT metadataCallback)
+      : builderContext(builderContext), translationOptions(std::move(options)),
+        layerMetadataCallback(std::move(metadataCallback)) {}
 
   LogicalResult initialize(MLIRContext *context) final {
     if (!this->builderContext) {
@@ -742,8 +744,9 @@ class TranslateToTensorRTEnginePass
         continue;
       }
 
-      FailureOr<TensorRTEngineResult> engineResult = buildFunction(
-          func, *builderContext, *timingCache, translationOptions);
+      FailureOr<TensorRTEngineResult> engineResult =
+          buildFunction(func, *builderContext, *timingCache, translationOptions,
+                        layerMetadataCallback);
       if (failed(engineResult) || !engineResult->serializedEngine) {
         func.emitError() << "failed to translate function '" << func.getName()
                          << "' to a TensorRT engine";
@@ -820,11 +823,15 @@ class TranslateToTensorRTEnginePass
 
   /// Options affecting TensorRT translation.
   TensorRTTranslationOptions translationOptions;
+
+  mlirtrt::MetadataCallbackT layerMetadataCallback;
 };
 } // namespace
 
 std::unique_ptr<mlir::Pass> tensorrt::createTranslateTensorRTPass(
     std::shared_ptr<tensorrt::TensorRTBuilderContext> context,
-    TensorRTTranslationOptions options) {
-  return std::make_unique<TranslateToTensorRTEnginePass>(context, options);
+    TensorRTTranslationOptions options,
+    mlirtrt::MetadataCallbackT layerMetadataCallback) {
+  return std::make_unique<TranslateToTensorRTEnginePass>(context, options,
+                                                         layerMetadataCallback);
 }