NVIDIA · yizhuoz004 · Jul 14, 2025 · Jul 11, 2025
@@ -36,6 +36,48 @@ jobs:
 
           sudo apt-get autoremove -y
           sudo apt-get autoclean -y
+
+      # Value of `github.workspace` is /home/runner/work/{repo_name}/{repo-name}
+      # i.e. /home/runner/work/TensorRT-Incubator/TensorRT-Incubator in our case.
+      # After this action, repo is cloned inside above path.
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 5
+
+      - name: Validate commit message
+        if: ${{ github.event_name == 'pull_request' }}
+        env:
+          PR_HEAD_COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          cat > commit_message_checker.py <<EOF
+          #!/usr/bin/python3
+          import re
+          import sys
+          import subprocess
+
+          git_cmd = f"git show -s --format=%B {sys.argv[1]}"
+          try:
+            commit_message_cmd = subprocess.run(git_cmd.split(' '), capture_output=True, text=True, check=True)
+            commit_message = commit_message_cmd.stdout.strip()
+          except subprocess.CalledProcessError as e:
+            print(f"Failed to get PR HEAD commit message with error: {e.stderr.strip()}")
+
+          match = re.search(r"^(\[bot\].+|NFC: .+|(.+\n\n+.+\n+.+))$", commit_message, re.DOTALL)
+          if match:
+            print("Commit message is in canonical form :)")
+            sys.exit(0)
+          print("Commit message is not in the canonical form!")
+          print(commit_message)
+          print("")
+          print("Expected format is, ")
+          print("<title>")
+          print("<body>")
+          print("NOTE: Body should start on new line. '2 spaces + enter' for new line!")
+          print("NOTE: Body should be at least two lines.")
+          sys.exit(1)
+          EOF
+
+          python3 commit_message_checker.py ${PR_HEAD_COMMIT_SHA}
 
       # Run initial format check
       - name: Run python format and clang check

@@ -280,14 +280,15 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
       mlir::tensorrt::TensorRTDialect::getShapeProfileArgAttrName();
   StringRef tensorrtDimensionNamesAttrName =
       mlir::tensorrt::TensorRTDialect::getDimensionNamesArgAttrName();
+  StringRef tensorrtValueBoundsAttrName =
+      mlir::tensorrt::TensorRTDialect::getShapeTensorValueBoundsArgAttrName();
+  StringRef hostTensorAttrName = mlir::getHostTensorArgAttrName();
+  StringRef memorySpaceAttrName =
+      plan::PlanDialect::getMemorySpaceConstraintAttrName();
 
-  SmallVector<Attribute> profileAttrsPerInput;
-  SmallVector<Attribute> dimensionNamesAttrsPerInput;
   for (Value v : inputs) {
     auto rtt = dyn_cast<RankedTensorType>(v.getType());
-    if (!rtt || rtt.hasStaticShape()) {
-      profileAttrsPerInput.push_back(Attribute{});
-      dimensionNamesAttrsPerInput.push_back(Attribute{});
+    if (!rtt) {
       continue;
     }
 
@@ -299,30 +300,42 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
     }
 
     int64_t argIndex = blockArg.getArgNumber();
-    profileAttrsPerInput.push_back(
-        parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
-            argIndex, tensorrtShapeBoundsAttrName));
-
-    dimensionNamesAttrsPerInput.push_back(
-        parentFunc.getArgAttrOfType<DictionaryAttr>(
-            argIndex, tensorrtDimensionNamesAttrName));
-
-    if (!profileAttrsPerInput.back()) {
-      return emitError(blockArg.getLoc())
-             << "Profile attribute (" << tensorrtShapeBoundsAttrName
-             << ") of argument " << argIndex << " is not set";
+    // Get shape profile and dynamision name attributes of the input
+    if (rtt.hasStaticShape()) {
+      // static-shaped argument can only have value bound attr (shape input)
+      auto valueBoundAttr =
+          parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
+              argIndex, tensorrtValueBoundsAttrName);
+      if (valueBoundAttr) {
+        func->setArgAttr(argIndex, tensorrtValueBoundsAttrName, valueBoundAttr);
+      }
+      // Get memory space attribute of the input
+      auto memorySpaceAttr =
+          parentFunc.getArgAttr(argIndex, memorySpaceAttrName);
+      if (memorySpaceAttr) {
+        func->setArgAttr(argIndex, memorySpaceAttrName, memorySpaceAttr);
+        // Add tensorrt.host_tensor attr, it is needed by NetworkEncoder for now
+        func->setArgAttr(argIndex, hostTensorAttrName, rewriter.getUnitAttr());
+      }
+    } else {
+      auto shapeBoundAttr =
+          parentFunc.getArgAttrOfType<tensorrt::ShapeProfileAttr>(
+              argIndex, tensorrtShapeBoundsAttrName);
+      if (!shapeBoundAttr) {
+        return emitError(blockArg.getLoc())
+               << "Profile attribute (" << tensorrtShapeBoundsAttrName
+               << ") of argument " << argIndex << " is not set";
+      }
+      func->setArgAttr(argIndex, tensorrtShapeBoundsAttrName, shapeBoundAttr);
+      auto dimensionNameAttr = parentFunc.getArgAttrOfType<DictionaryAttr>(
+          argIndex, tensorrtDimensionNamesAttrName);
+      if (dimensionNameAttr) {
+        func->setArgAttr(argIndex, tensorrtDimensionNamesAttrName,
+                         dimensionNameAttr);
+      }
     }
   }
 
-  for (unsigned idx = 0; idx < func->getNumArguments(); idx++) {
-    if (profileAttrsPerInput[idx])
-      func->setArgAttr(idx, tensorrtShapeBoundsAttrName,
-                       profileAttrsPerInput[idx]);
-    if (dimensionNamesAttrsPerInput[idx])
-      func->setArgAttr(idx, tensorrtDimensionNamesAttrName,
-                       dimensionNamesAttrsPerInput[idx]);
-  }
-
   rewriter.setInsertionPoint(inlineGroupOp);
   auto callOp = rewriter.create<tensorrt::CallAllocOp>(
       inlineGroupOp.getLoc(), inlineGroupOp.getResultTypes(), inputs,

@@ -65,3 +65,27 @@ func.func @trt_dim_names(
   %0 = tensorrt.identity %arg0 : tensor<?x?xf32> to tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
+
+// CHECK-LABEL: @trt_host_input
+//  CHECK-SAME: tensorrt.engine
+func.func @trt_host_input(%arg0: tensor<?x4xf32> {tensorrt.dimension_names = {}, tensorrt.shape_profile = #tensorrt.shape_profile<min = [2, 4], opt = [4, 4], max = [6, 4]>}, %arg1: tensor<i32> {plan.memory_space = #plan.memory_space<host>, tensorrt.value_bounds = #tensorrt.shape_profile<min = [1], opt = [2], max = [3]>}) -> tensor<?x?xf32> {
+    %0 = tensorrt.element_wise <kSUM>(%arg0, %arg0 : tensor<?x4xf32>, tensor<?x4xf32>) -> tensor<?x4xf32>
+    %1 = tensorrt.shape %0 : tensor<?x4xf32> -> tensor<2xi32>
+    %2 = tensorrt.slice %1[0][1][1] : tensor<2xi32> to tensor<1xi32>
+    %3 = tensorrt.collapse_rank %2 : tensor<1xi32> to tensor<i32>
+    %cst_i32 = tensorrt.constant dense<1> : tensor<i32>
+    %4 = tensorrt.element_wise <kPROD>(%3, %cst_i32 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %5 = tensorrt.slice %1[1][1][1] : tensor<2xi32> to tensor<1xi32>
+    %6 = tensorrt.collapse_rank %5 : tensor<1xi32> to tensor<i32>
+    %7 = tensorrt.element_wise <kPROD>(%4, %6 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %cst_i32_0 = tensorrt.constant dense<1> : tensor<i32>
+    %8 = tensorrt.element_wise <kPROD>(%arg1, %cst_i32_0 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %9 = tensorrt.element_wise <kFLOOR_DIV>(%7, %8 : tensor<i32>, tensor<i32>) -> tensor<i32>
+    %cst_i32_1 = tensorrt.constant dense<1> : tensor<1xi32>
+    %10 = tensorrt.reshape %9 shape(%cst_i32_1: tensor<1xi32>) : tensor<i32> to tensor<?xi32>
+    %cst_i32_2 = tensorrt.constant dense<1> : tensor<1xi32>
+    %11 = tensorrt.reshape %arg1 shape(%cst_i32_2: tensor<1xi32>) : tensor<i32> to tensor<?xi32>
+    %12 = tensorrt.concatenation {axis = 0 : i32} ins(%10, %11 : tensor<?xi32>, tensor<?xi32>) -> tensor<2xi32>
+    %13 = tensorrt.reshape %0 shape(%12: tensor<2xi32>) : tensor<?x4xf32> to tensor<?x?xf32>
+    return %13 : tensor<?x?xf32>
+}