-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[DirectX] Scalarize extractelement
and insertelement
with dynamic indices
#141676
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-directx Author: Deric C. (Icohedron) ChangesFixes #141136
Full diff: https://github.com/llvm/llvm-project/pull/141676.diff 2 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 06708cec00cec..7bd0539c6bfe0 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -27,6 +27,19 @@ static const int MaxVecSize = 4;
using namespace llvm;
+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {
+ if (auto *VecTy = dyn_cast<VectorType>(T))
+ return ArrayType::get(VecTy->getElementType(),
+ dyn_cast<FixedVectorType>(VecTy)->getNumElements());
+ if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
+ Type *NewElementType = equivalentArrayTypeFromVector(ArrayTy->getElementType());
+ return ArrayType::get(NewElementType, ArrayTy->getNumElements());
+ }
+ // If it's not a vector or array, return the original type.
+ return T;
+}
+
class DXILDataScalarizationLegacy : public ModulePass {
public:
@@ -55,7 +68,7 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
bool visitCastInst(CastInst &CI) { return false; }
bool visitBitCastInst(BitCastInst &BCI) { return false; }
bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
- bool visitExtractElementInst(ExtractElementInst &EEI) { return false; }
+ bool visitExtractElementInst(ExtractElementInst &EEI);
bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
bool visitPHINode(PHINode &PHI) { return false; }
bool visitLoadInst(LoadInst &LI);
@@ -90,20 +103,6 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
return nullptr; // Not found
}
-// Recursively creates an array version of the given vector type.
-static Type *replaceVectorWithArray(Type *T, LLVMContext &Ctx) {
- if (auto *VecTy = dyn_cast<VectorType>(T))
- return ArrayType::get(VecTy->getElementType(),
- dyn_cast<FixedVectorType>(VecTy)->getNumElements());
- if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
- Type *NewElementType =
- replaceVectorWithArray(ArrayTy->getElementType(), Ctx);
- return ArrayType::get(NewElementType, ArrayTy->getNumElements());
- }
- // If it's not a vector or array, return the original type.
- return T;
-}
-
static bool isArrayOfVectors(Type *T) {
if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
return isa<VectorType>(ArrType->getElementType());
@@ -116,8 +115,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
ArrayType *ArrType = cast<ArrayType>(AI.getAllocatedType());
IRBuilder<> Builder(&AI);
- LLVMContext &Ctx = AI.getContext();
- Type *NewType = replaceVectorWithArray(ArrType, Ctx);
+ Type *NewType = equivalentArrayTypeFromVector(ArrType);
AllocaInst *ArrAlloca =
Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
ArrAlloca->setAlignment(AI.getAlign());
@@ -173,6 +171,38 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
return false;
}
+bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
+ // If the index is a constant then we don't need to scalarize it
+ Value *Index = EEI.getIndexOperand();
+ Type *IndexTy = Index->getType();
+ if (isa<ConstantInt>(Index))
+ return false;
+
+ IRBuilder<> Builder(&EEI);
+ VectorType *VecTy = EEI.getVectorOperandType();
+ assert(VecTy->getElementCount().isFixed() &&
+ "Vector operand of ExtractElement must have a fixed size");
+
+ Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
+ Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+
+ for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+ Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
+ Value *GEP = Builder.CreateInBoundsGEP(
+ ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
+ Builder.CreateStore(EE, GEP);
+ }
+
+ Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), Index});
+ Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
+
+ EEI.replaceAllUsesWith(Load);
+ EEI.eraseFromParent();
+ return true;
+}
+
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
unsigned NumOperands = GEPI.getNumOperands();
@@ -257,7 +287,7 @@ static bool findAndReplaceVectors(Module &M) {
for (GlobalVariable &G : M.globals()) {
Type *OrigType = G.getValueType();
- Type *NewType = replaceVectorWithArray(OrigType, Ctx);
+ Type *NewType = equivalentArrayTypeFromVector(OrigType);
if (OrigType != NewType) {
// Create a new global variable with the updated type
// Note: Initializer is set via transformInitializer
diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
new file mode 100644
index 0000000000000..74e9202b540c1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define float @extract_float_vec_dynamic(<4 x float> %0, i32 %1) {
+; CHECK-LABEL: define float @extract_float_vec_dynamic(
+; CHECK-SAME: <4 x float> [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = alloca [4 x float], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 0
+; CHECK-NEXT: store float [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 1
+; CHECK-NEXT: store float [[TMP6]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 2
+; CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 3
+; CHECK-NEXT: store float [[TMP10]], ptr [[TMP11]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
+; CHECK-NEXT: ret float [[TMP13]]
+;
+ %e = extractelement <4 x float> %0, i32 %1
+ ret float %e
+}
+
+; An extractelement with a constant index should not be converted to array form
+define i16 @extract_i16_vec_constant(<4 x i16> %0) {
+; CHECK-LABEL: define i16 @extract_i16_vec_constant(
+; CHECK-SAME: <4 x i16> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
+; CHECK-NEXT: ret i16 [[E]]
+;
+ %e = extractelement <4 x i16> %0, i32 1
+ ret i16 %e
+}
+
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -27,6 +27,20 @@ static const int MaxVecSize = 4; | |||
|
|||
using namespace llvm; | |||
|
|||
// Recursively creates an array-like version of a given vector type. | |||
static Type *equivalentArrayTypeFromVector(Type *T) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is fine I would have just made a function declaration at the top so the implementation could live anywhere.
extractelement
with dynamic indexextractelement
and insertelement
with dynamic indices
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implementation looks great to me. Just want a check on the commented cases
This legalization will also need to place allocas in the entry block of the function. |
Type *ArrTy = ArrAlloca->getAllocatedType(); | ||
Value *GEPForStore = | ||
Builder.CreateInBoundsGEP(ArrTy, ArrAlloca, {Builder.getInt32(0), Index}, | ||
IEI.getName() + ".dynindex"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: This code seems used in both cases and if it makes sense, it could be moved inside the createArrayFromVector
and returned instead of the alloca?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense, but also feels kind of weird to not return the Alloca, given the name of the function. I would need to think of a new name. The createArrayFromVector function would also need a new arg for the index.
@@ -1,25 +1,76 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s | |||
|
|||
; Allocas should be placed in the entry block. | |||
; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector | |||
define void @alloca_placement_and_reuse(<3 x i32> %v1, <3 x i32> %v2, i32 %a, i32 %i, i32 %j) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: it might be easier to read if this was split out into two tests to test the independent things here
@@ -79,6 +79,16 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> { | |||
friend bool findAndReplaceVectors(llvm::Module &M); | |||
|
|||
private: | |||
typedef std::pair<AllocaInst *, SmallVector<Value *, 4>> AllocaAndGEPs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems fine given the number of elements is currently capped at 4. But I think we should be very cautious if we consider any larger numbers, and that might very well be the case soon. Edit: 4 being the max size of a vector.
It think it is probably fine to just re-create the GEP's when we need them and let another optimization pass remove all of those instead of retaining them here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By the time HLSL has vectors larger than 4 is also when DXIL has native support for dynamic vector indexing, thus this data structure would not be used.
It just came to me that the implementation still needs more work. Two issues:
|
Fixes #141136
visitExtractElementInst
andvisitInsertElementInst
inDXILDataScalarizerVisitor
to scalarizeextractelement
andinsertelement
instructions whose index operand is not aConstantInt
by converting the vector to an array and then loading from the arrayreplaceVectorWithArray
helper function toequivalentArrayTypeFromVector
, relocate the function toward the top of the file, and remove the unusedCtx
parameter