[DirectX] Scalarize `extractelement` and `insertelement` with dynamic indices #141676

Icohedron · 2025-05-27T21:47:09Z

Fixes #141136

Implement visitExtractElementInst and visitInsertElementInst in DXILDataScalarizerVisitor to scalarize extractelement and insertelement instructions whose index operand is not a ConstantInt by converting the vector to an array and then loading from the array
Rename the replaceVectorWithArray helper function to equivalentArrayTypeFromVector, relocate the function toward the top of the file, and remove the unused Ctx parameter

llvmbot · 2025-05-27T21:47:43Z

@llvm/pr-subscribers-backend-directx

Author: Deric C. (Icohedron)

Changes

Fixes #141136

Implement visitExtractElementInst in DXILDataScalarizerVisitor to scalarize extractelement instructions whose index operand is not a ConstantInt by converting the vector to an array and then loading from the array
Rename the replaceVectorWithArray helper function to equivalentArrayTypeFromVector, relocate the function toward the top of the file, and remove the unused Ctx parameter

Full diff: https://github.com/llvm/llvm-project/pull/141676.diff

2 Files Affected:

(modified) llvm/lib/Target/DirectX/DXILDataScalarization.cpp (+48-18)
(added) llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll (+38)

diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 06708cec00cec..7bd0539c6bfe0 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -27,6 +27,19 @@ static const int MaxVecSize = 4;
 
 using namespace llvm;
 
+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {
+  if (auto *VecTy = dyn_cast<VectorType>(T))
+    return ArrayType::get(VecTy->getElementType(),
+                          dyn_cast<FixedVectorType>(VecTy)->getNumElements());
+  if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
+    Type *NewElementType = equivalentArrayTypeFromVector(ArrayTy->getElementType());
+    return ArrayType::get(NewElementType, ArrayTy->getNumElements());
+  }
+  // If it's not a vector or array, return the original type.
+  return T;
+}
+
 class DXILDataScalarizationLegacy : public ModulePass {
 
 public:
@@ -55,7 +68,7 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
   bool visitCastInst(CastInst &CI) { return false; }
   bool visitBitCastInst(BitCastInst &BCI) { return false; }
   bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
-  bool visitExtractElementInst(ExtractElementInst &EEI) { return false; }
+  bool visitExtractElementInst(ExtractElementInst &EEI);
   bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
   bool visitPHINode(PHINode &PHI) { return false; }
   bool visitLoadInst(LoadInst &LI);
@@ -90,20 +103,6 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
   return nullptr; // Not found
 }
 
-// Recursively creates an array version of the given vector type.
-static Type *replaceVectorWithArray(Type *T, LLVMContext &Ctx) {
-  if (auto *VecTy = dyn_cast<VectorType>(T))
-    return ArrayType::get(VecTy->getElementType(),
-                          dyn_cast<FixedVectorType>(VecTy)->getNumElements());
-  if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
-    Type *NewElementType =
-        replaceVectorWithArray(ArrayTy->getElementType(), Ctx);
-    return ArrayType::get(NewElementType, ArrayTy->getNumElements());
-  }
-  // If it's not a vector or array, return the original type.
-  return T;
-}
-
 static bool isArrayOfVectors(Type *T) {
   if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
     return isa<VectorType>(ArrType->getElementType());
@@ -116,8 +115,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
 
   ArrayType *ArrType = cast<ArrayType>(AI.getAllocatedType());
   IRBuilder<> Builder(&AI);
-  LLVMContext &Ctx = AI.getContext();
-  Type *NewType = replaceVectorWithArray(ArrType, Ctx);
+  Type *NewType = equivalentArrayTypeFromVector(ArrType);
   AllocaInst *ArrAlloca =
       Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
   ArrAlloca->setAlignment(AI.getAlign());
@@ -173,6 +171,38 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
   return false;
 }
 
+bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
+  // If the index is a constant then we don't need to scalarize it
+  Value *Index = EEI.getIndexOperand();
+  Type *IndexTy = Index->getType();
+  if (isa<ConstantInt>(Index))
+    return false;
+
+  IRBuilder<> Builder(&EEI);
+  VectorType *VecTy = EEI.getVectorOperandType();
+  assert(VecTy->getElementCount().isFixed() &&
+         "Vector operand of ExtractElement must have a fixed size");
+  
+  Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
+  Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+
+  for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+    Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
+    Value *GEP = Builder.CreateInBoundsGEP(
+        ArrTy, ArrAlloca,
+        {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
+    Builder.CreateStore(EE, GEP);
+  }
+
+  Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
+                                         {ConstantInt::get(IndexTy, 0), Index});
+  Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
+
+  EEI.replaceAllUsesWith(Load);
+  EEI.eraseFromParent();
+  return true;
+}
+
 bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
 
   unsigned NumOperands = GEPI.getNumOperands();
@@ -257,7 +287,7 @@ static bool findAndReplaceVectors(Module &M) {
   for (GlobalVariable &G : M.globals()) {
     Type *OrigType = G.getValueType();
 
-    Type *NewType = replaceVectorWithArray(OrigType, Ctx);
+    Type *NewType = equivalentArrayTypeFromVector(OrigType);
     if (OrigType != NewType) {
       // Create a new global variable with the updated type
       // Note: Initializer is set via transformInitializer
diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
new file mode 100644
index 0000000000000..74e9202b540c1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define float @extract_float_vec_dynamic(<4 x float> %0, i32 %1) {
+; CHECK-LABEL: define float @extract_float_vec_dynamic(
+; CHECK-SAME: <4 x float> [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = alloca [4 x float], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 0
+; CHECK-NEXT:    store float [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 1
+; CHECK-NEXT:    store float [[TMP6]], ptr [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 2
+; CHECK-NEXT:    store float [[TMP8]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 3
+; CHECK-NEXT:    store float [[TMP10]], ptr [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
+; CHECK-NEXT:    ret float [[TMP13]]
+;
+  %e = extractelement <4 x float> %0, i32 %1
+  ret float %e
+}
+
+; An extractelement with a constant index should not be converted to array form
+define i16 @extract_i16_vec_constant(<4 x i16> %0) {
+; CHECK-LABEL: define i16 @extract_i16_vec_constant(
+; CHECK-SAME: <4 x i16> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
+; CHECK-NEXT:    ret i16 [[E]]
+;
+  %e = extractelement <4 x i16> %0, i32 1
+  ret i16 %e
+}
+

github-actions · 2025-05-27T21:49:26Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

farzonl · 2025-05-27T22:45:51Z

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

@@ -27,6 +27,20 @@ static const int MaxVecSize = 4;

 using namespace llvm;

+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {


This is fine I would have just made a function declaration at the top so the implementation could live anywhere.

inbelic

Implementation looks great to me. Just want a check on the commented cases

llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll

Icohedron · 2025-06-05T19:52:16Z

This legalization will also need to place allocas in the entry block of the function.

inbelic · 2025-06-06T22:42:03Z

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

+  Type *ArrTy = ArrAlloca->getAllocatedType();
+  Value *GEPForStore =
+      Builder.CreateInBoundsGEP(ArrTy, ArrAlloca, {Builder.getInt32(0), Index},
+                                IEI.getName() + ".dynindex");


nit: This code seems used in both cases and if it makes sense, it could be moved inside the createArrayFromVector and returned instead of the alloca?

Makes sense, but also feels kind of weird to not return the Alloca, given the name of the function. I would need to think of a new name. The createArrayFromVector function would also need a new arg for the index.

inbelic · 2025-06-06T22:46:00Z

llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll

@@ -1,25 +1,76 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

+; Allocas should be placed in the entry block.
+; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector
+define void @alloca_placement_and_reuse(<3 x i32> %v1, <3 x i32> %v2, i32 %a, i32 %i, i32 %j) {


nit: it might be easier to read if this was split out into two tests to test the independent things here

inbelic · 2025-06-06T22:53:06Z

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

@@ -79,6 +79,16 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
  friend bool findAndReplaceVectors(llvm::Module &M);

 private:
+  typedef std::pair<AllocaInst *, SmallVector<Value *, 4>> AllocaAndGEPs;


This seems fine given the number of elements is currently capped at 4. But I think we should be very cautious if we consider any larger numbers, and that might very well be the case soon. Edit: 4 being the max size of a vector.

It think it is probably fine to just re-create the GEP's when we need them and let another optimization pass remove all of those instead of retaining them here.

By the time HLSL has vectors larger than 4 is also when DXIL has native support for dynamic vector indexing, thus this data structure would not be used.

Icohedron · 2025-06-07T00:07:51Z

It just came to me that the implementation still needs more work. Two issues:

Two insertelement instructions on the same vector currently is incorrect because they affect each other's results, which shouldn't be the case.
If extractelement or insertelement is used on a vector that is not a function arg, then the stores to the alloca are invalid because they reference a vector that is not yet defined.

Scalarize extractelement with dynamic index

d3f1a51

llvmbot added the backend:DirectX label May 27, 2025

Icohedron commented May 27, 2025

View reviewed changes

llvm/lib/Target/DirectX/DXILDataScalarization.cpp Outdated Show resolved Hide resolved

Apply clang-format

cda7e88

farzonl reviewed May 27, 2025

View reviewed changes

llvm/lib/Target/DirectX/DXILDataScalarization.cpp Outdated Show resolved Hide resolved

Remove unnecessary assert

be5d425

farzonl reviewed May 27, 2025

View reviewed changes

llvm/lib/Target/DirectX/DXILDataScalarization.cpp Show resolved Hide resolved

farzonl reviewed May 27, 2025

View reviewed changes

llvm/lib/Target/DirectX/DXILDataScalarization.cpp Outdated Show resolved Hide resolved

farzonl reviewed May 27, 2025

View reviewed changes

Icohedron added 3 commits May 27, 2025 23:38

Scalarize dynamically-indexed insertelement

15cf98c

Subroutine creating an array from a vector

4c28344

Refactor visitExtractElementInst and visitInsertElementInst

989d82e

Icohedron changed the title ~~[DirectX] Scalarize extractelement with dynamic index~~ [DirectX] Scalarize extractelement and insertelement with dynamic indices May 28, 2025

Complete scalarization of insertelement with dynamic index

bfa93ea

inbelic reviewed Jun 5, 2025

View reviewed changes

llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll Outdated Show resolved Hide resolved

farzonl approved these changes Jun 5, 2025

View reviewed changes

Reuse allocas and place allocas in entry block

f78b059

Icohedron requested a review from farzonl June 6, 2025 20:31

inbelic reviewed Jun 6, 2025

View reviewed changes

Icohedron marked this pull request as draft June 7, 2025 00:09

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DirectX] Scalarize `extractelement` and `insertelement` with dynamic indices #141676

[DirectX] Scalarize `extractelement` and `insertelement` with dynamic indices #141676

Icohedron commented May 27, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

github-actions bot commented May 27, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

farzonl May 27, 2025

Uh oh!

inbelic left a comment

Uh oh!

Uh oh!

Icohedron commented Jun 5, 2025

Uh oh!

inbelic Jun 6, 2025

Uh oh!

Icohedron Jun 6, 2025

Uh oh!

inbelic Jun 6, 2025

Uh oh!

inbelic Jun 6, 2025 •

edited

Loading

Uh oh!

Icohedron Jun 6, 2025 •

edited

Loading

Uh oh!

Icohedron commented Jun 7, 2025

Uh oh!

Uh oh!

[DirectX] Scalarize extractelement and insertelement with dynamic indices #141676

Are you sure you want to change the base?

[DirectX] Scalarize extractelement and insertelement with dynamic indices #141676

Conversation

Icohedron commented May 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

github-actions bot commented May 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

farzonl May 27, 2025

Choose a reason for hiding this comment

Uh oh!

inbelic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Icohedron commented Jun 5, 2025

Uh oh!

inbelic Jun 6, 2025

Choose a reason for hiding this comment

Uh oh!

Icohedron Jun 6, 2025

Choose a reason for hiding this comment

Uh oh!

inbelic Jun 6, 2025

Choose a reason for hiding this comment

Uh oh!

inbelic Jun 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Icohedron Jun 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Icohedron commented Jun 7, 2025

Uh oh!

Uh oh!

[DirectX] Scalarize `extractelement` and `insertelement` with dynamic indices #141676

[DirectX] Scalarize `extractelement` and `insertelement` with dynamic indices #141676

Icohedron commented May 27, 2025 •

edited

Loading

github-actions bot commented May 27, 2025 •

edited

Loading

inbelic Jun 6, 2025 •

edited

Loading

Icohedron Jun 6, 2025 •

edited

Loading