-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[HLSL] Add WaveGetLaneCount() intrinsic to FE #143127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This commit adds code to lower WaveGetLaneCount() into the SPV or DXIL intrinsic. The backends will then need to lower the intrinsic into proper SPIR-V/DXIL. Related to llvm#99159
@llvm/pr-subscribers-hlsl @llvm/pr-subscribers-backend-directx Author: Nathan Gauër (Keenuts) ChangesThis commit adds code to lower WaveGetLaneCount() into the SPV or DXIL intrinsic. The backends will then need to lower the intrinsic into proper SPIR-V/DXIL. Related to #99159 Full diff: https://github.com/llvm/llvm-project/pull/143127.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index b15cde05410ab..68cd3d790e78a 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4909,6 +4909,12 @@ def HLSLWaveReadLaneAt : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLWaveGetLaneCount : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_wave_get_lane_count"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "unsigned int()";
+}
+
def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_clamp"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 10dd9fd04eb9e..abebc201808b0 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -691,6 +691,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return EmitRuntimeCall(
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
}
+ case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
+ Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
+ return EmitRuntimeCall(
+ Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+ }
case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
// Due to the use of variadic arguments we must explicitly retreive them and
// create our function type.
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index f7ca2077a576b..bb2b82fa1f5aa 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -107,6 +107,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveCountBits, wave_active_countbits)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(WaveGetLaneCount, wave_get_lane_count)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 3835eb78cec50..74aabbdaad266 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2350,6 +2350,10 @@ _HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_is_first_lane)
__attribute__((convergent)) bool WaveIsFirstLane();
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_count)
+__attribute__((convergent)) bool WaveGetLaneCount();
+
//===----------------------------------------------------------------------===//
// WaveReadLaneAt builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
new file mode 100644
index 0000000000000..8072f6d4ea206
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+
+[numthreads(1, 1, 1)]
+void main() {
+ uint a, b;
+// CHECK-SPIRV: %[[#entry_tok:]] = call token @llvm.experimental.convergence.entry()
+
+// CHECK-SPIRV: %[[#loop_tok:]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#entry_tok]]) ]
+ while (a) {
+
+// CHECK-DXIL: %[[#]] = call i32 @llvm.dx.wave.get.lane.count()
+// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count()
+// CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ]
+ a = WaveGetLaneCount();
+ }
+
+// CHECK-DXIL: %[[#]] = call i32 @llvm.dx.wave.get.lane.count()
+// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count()
+// CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ]
+ b = WaveGetLaneCount();
+}
+
+// CHECK-DXIL: i32 @llvm.dx.wave.get.lane.count() #[[#attr:]]
+// CHECK-SPIRV: i32 @llvm.spv.wave.get.lane.count() #[[#attr:]]
+
+// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 0f73084f88c5e..68b31a899003b 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -148,6 +148,8 @@ def int_dx_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType
def int_dx_wave_reduce_usum : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_get_lane_count
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent]>;
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 8d984d6ce58df..e1c4a7aaf5a2f 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -102,6 +102,8 @@ let TargetPrefix = "spv" in {
def int_spv_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+ def int_spv_wave_get_lane_count
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent]>;
def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
|
@llvm/pr-subscribers-backend-x86 Author: Nathan Gauër (Keenuts) ChangesThis commit adds code to lower WaveGetLaneCount() into the SPV or DXIL intrinsic. The backends will then need to lower the intrinsic into proper SPIR-V/DXIL. Related to #99159 Full diff: https://github.com/llvm/llvm-project/pull/143127.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index b15cde05410ab..68cd3d790e78a 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4909,6 +4909,12 @@ def HLSLWaveReadLaneAt : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLWaveGetLaneCount : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_wave_get_lane_count"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "unsigned int()";
+}
+
def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_clamp"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 10dd9fd04eb9e..abebc201808b0 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -691,6 +691,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return EmitRuntimeCall(
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
}
+ case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
+ Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
+ return EmitRuntimeCall(
+ Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+ }
case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
// Due to the use of variadic arguments we must explicitly retreive them and
// create our function type.
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index f7ca2077a576b..bb2b82fa1f5aa 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -107,6 +107,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveCountBits, wave_active_countbits)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(WaveGetLaneCount, wave_get_lane_count)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 3835eb78cec50..74aabbdaad266 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2350,6 +2350,10 @@ _HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_is_first_lane)
__attribute__((convergent)) bool WaveIsFirstLane();
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_count)
+__attribute__((convergent)) bool WaveGetLaneCount();
+
//===----------------------------------------------------------------------===//
// WaveReadLaneAt builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
new file mode 100644
index 0000000000000..8072f6d4ea206
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+
+[numthreads(1, 1, 1)]
+void main() {
+ uint a, b;
+// CHECK-SPIRV: %[[#entry_tok:]] = call token @llvm.experimental.convergence.entry()
+
+// CHECK-SPIRV: %[[#loop_tok:]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#entry_tok]]) ]
+ while (a) {
+
+// CHECK-DXIL: %[[#]] = call i32 @llvm.dx.wave.get.lane.count()
+// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count()
+// CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ]
+ a = WaveGetLaneCount();
+ }
+
+// CHECK-DXIL: %[[#]] = call i32 @llvm.dx.wave.get.lane.count()
+// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count()
+// CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ]
+ b = WaveGetLaneCount();
+}
+
+// CHECK-DXIL: i32 @llvm.dx.wave.get.lane.count() #[[#attr:]]
+// CHECK-SPIRV: i32 @llvm.spv.wave.get.lane.count() #[[#attr:]]
+
+// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 0f73084f88c5e..68b31a899003b 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -148,6 +148,8 @@ def int_dx_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType
def int_dx_wave_reduce_usum : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_get_lane_count
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent]>;
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 8d984d6ce58df..e1c4a7aaf5a2f 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -102,6 +102,8 @@ let TargetPrefix = "spv" in {
def int_spv_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+ def int_spv_wave_get_lane_count
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent]>;
def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
|
@@ -691,6 +691,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, | |||
return EmitRuntimeCall( | |||
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); | |||
} | |||
case Builtin::BI__builtin_hlsl_wave_get_lane_count: { | |||
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic(); | |||
return EmitRuntimeCall( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is correct, just want to clarify the reason behind wave intrinsics being EmitRuntimeCall
instead of Builder.CreateIntrinsic(
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reason why other wave intrinsics are using this instead of Builder.CreateIntrinsics are convergence intrinsics: correctly adding the token to the call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/16982 Here is the relevant piece of the build log for the reference
|
This commit adds code to lower WaveGetLaneCount() into the SPV or DXIL intrinsic. The backends will then need to lower the intrinsic into proper SPIR-V/DXIL. Related to llvm#99159
This commit adds code to lower WaveGetLaneCount() into the SPV or DXIL intrinsic. The backends will then need to lower the intrinsic into proper SPIR-V/DXIL.
Related to #99159