Skip to content

Commit cd1b822

Browse files
vsemenov368igcbot
authored andcommitted
Add support for bindless images in VC
Enable bindless image support for LSC typed 2d intrinsics in VC.
1 parent f29af52 commit cd1b822

File tree

18 files changed

+534
-53
lines changed

18 files changed

+534
-53
lines changed

IGC/Options/include/igc/Options/CommonInternalOptions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ let Flags = InternalHelper.CommonInternalFlags in {
1717
defm use_bindless_buffers : CommonFlag<"use-bindless-buffers">,
1818
HelpText<"Use bindless mode for buffers">;
1919

20+
// -cl-intel-use-bindless-images
21+
defm use_bindless_images : CommonFlag<"use-bindless-images">,
22+
HelpText<"Use bindless mode for images">;
23+
2024
defm emit_zebin_visa_sections : CommonFlag<"emit-zebin-visa-sections">,
2125
HelpText<"Add vISA asm as sections in ZeBin">;
2226

IGC/Options/include/igc/Options/IGCInternalOptions.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,6 @@ defm force_emu_sp_int32divrem : CommonFlag<"force-emu-sp-int32divrem">;
8484
// -cl-intel-force-disable-4GB-buffer
8585
defm force_disable_4GB_buffer : CommonFlag<"force-disable-4GB-buffer">;
8686

87-
// -cl-intel-use-bindless-images
88-
defm use_bindless_images : CommonFlag<"use-bindless-images">;
89-
9087
// -cl-intel-use-bindless-mode
9188
defm use_bindless_mode : CommonFlag<"use-bindless-mode">;
9289

IGC/Options/include/igc/Options/VCApiOptions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ def vc_use_plain_2d_images : PlainFlag<"vc-use-plain-2d-images">,
7676
def vc_use_bindless_buffers : PlainFlag<"vc-use-bindless-buffers">,
7777
HelpText<"Enable bindless buffer access">;
7878

79+
def vc_use_bindless_images : PlainFlag<"vc-use-bindless-images">,
80+
HelpText<"Enable bindless image access">;
81+
7982
def vc_disable_non_overlapping_region_opt :
8083
PlainFlag<"vc-disable-non-overlapping-region-opt">,
8184
HelpText<"Disable non-overlapping region optimization">;

IGC/VectorCompiler/include/vc/Driver/Driver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct CompileOptions {
130130
std::string StatsFile;
131131
std::string LLVMOptions;
132132
bool UseBindlessBuffers = false;
133+
bool UseBindlessImages = false;
133134
bool EmitZebinVisaSections = false;
134135
bool HasL1ReadOnlyCache = false;
135136
bool HasLocalMemFenceSupress = false;

IGC/VectorCompiler/include/vc/GenXOpts/GenXOpts.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ Pass *createCMImpParamPass(bool);
3232
//
3333
// CMKernelArgOffset - Determine offset of each CM kernel argument
3434
//
35-
Pass *createCMKernelArgOffsetPass(unsigned GrfByteSize);
35+
Pass *createCMKernelArgOffsetPass(unsigned GrfByteSize, bool UseBindlessImages);
3636

3737
//===----------------------------------------------------------------------===//
3838
//

IGC/VectorCompiler/include/vc/InternalIntrinsics/Intrinsic_definitions.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,49 @@
10221022
],
10231023
"attributes" : "WriteMem", },
10241024

1025+
## ``llvm.vc.internal.lsc.*2d.typed.bss.*`` : LSC typed 2d block bindless intrinsics
1026+
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1027+
## * arg0: vNi8, Cache controls, where N is the number of supported cache levels [MBC]
1028+
## * arg1: i32, Surface BSS
1029+
## * arg2: i32, Block height [MBC]
1030+
## * arg3: i32, Block width (in elements) [MBC]
1031+
## * arg4: i32, Memory block X position (in bytes)
1032+
## * arg5: i32, Memory block Y position
1033+
## * arg6: data to write (store only)
1034+
##
1035+
## * Return value: the value read or void
1036+
##
1037+
"lsc_load_2d_tgm_bss" : { "result" : "anyvector",
1038+
"arguments" : [
1039+
"anyvector", # cache controls
1040+
"int", # i32 BSS
1041+
"int", # block height
1042+
"int", # block width
1043+
"int", # X offset
1044+
"int" # Y offset
1045+
],
1046+
"target" : [
1047+
"hasLSCMessages",
1048+
"hasLSCTypedMessages",
1049+
],
1050+
"attributes" : "ReadMem", },
1051+
"lsc_store_2d_tgm_bss" : { "result" : "void",
1052+
"arguments" : [
1053+
"anyvector", # cache controls
1054+
"int", # i32 BSS
1055+
"int", # block height
1056+
"int", # block width
1057+
"int", # X offset
1058+
"int", # Y offset
1059+
"anyvector"
1060+
],
1061+
"target" : [
1062+
"hasLSCMessages",
1063+
"hasLSCTypedMessages",
1064+
],
1065+
"attributes" : "WriteMem", },
1066+
1067+
10251068

10261069
## ``llvm.vc.internal.lsc.*.quad.tgm`` : Typed LSC load BTI intrinsic
10271070
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1089,6 +1132,72 @@
10891132
],
10901133
"attributes": "SideEffects", },
10911134

1135+
## ``llvm.vc.internal.lsc.*.quad.tgm.bss`` : Typed LSC load bindless intrinsic
1136+
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1137+
## * arg0: vNi1, Predicate (overloaded)
1138+
## * arg1: vNi8, Cache controls, where N is the number of supported cache levels [MBC]
1139+
## * arg2: i8, Channel mask [MBC]
1140+
## * arg3: i32, Surface BSS
1141+
## * arg4: vNi32, U pixel indices (overloaded)
1142+
## * arg5: vNi32, V pixel indices
1143+
## * arg6: vNi32, R pixel indices
1144+
## * arg7: vNi32, LOD pixel indices
1145+
## * arg8: vector to take values for masked simd lanes from (load)
1146+
## vector to take values to write (store)
1147+
##
1148+
## * Return value: the value read from memory (load) or void (store, prefetch)
1149+
##
1150+
"lsc_load_quad_tgm_bss": { "result": "anyvector",
1151+
"arguments": [
1152+
"anyint", # vNxi1, predicate
1153+
"anyvector", # cache controls
1154+
"char", # channel mask
1155+
"int", # i32 BSS
1156+
"anyint", # vNi32 U pixel index
1157+
3, # vNi32 V pixel index
1158+
3, # vNi32 R pixel index
1159+
3, # vNi32 LOD pixel index
1160+
0, # passthru value
1161+
],
1162+
"target" : [
1163+
"hasLSCMessages",
1164+
"hasLSCTypedMessages",
1165+
],
1166+
"attributes": "ReadMem", },
1167+
"lsc_store_quad_tgm_bss": { "result": "void",
1168+
"arguments": [
1169+
"anyint", # vNxi1, predicate
1170+
"anyvector", # cache controls
1171+
"char", # channel mask
1172+
"int", # i32 BSS
1173+
"anyint", # vNi32 U pixel index
1174+
2, # vNi32 V pixel index
1175+
2, # vNi32 R pixel index
1176+
2, # vNi32 LOD pixel index
1177+
"anyvector", # data to write
1178+
],
1179+
"target" : [
1180+
"hasLSCMessages",
1181+
"hasLSCTypedMessages",
1182+
],
1183+
"attributes": "WriteMem", },
1184+
"lsc_prefetch_quad_tgm_bss": { "result": "void",
1185+
"arguments": [
1186+
"anyint", # vNxi1, predicate
1187+
"anyvector", # cache controls
1188+
"char", # channel mask
1189+
"int", # i32 BSS
1190+
"anyint", # vNi32 U pixel index
1191+
2, # vNi32 V pixel index
1192+
2, # vNi32 R pixel index
1193+
2, # vNi32 LOD pixel index
1194+
],
1195+
"target" : [
1196+
"hasLSCMessages",
1197+
"hasLSCTypedMessages",
1198+
],
1199+
"attributes": "SideEffects", },
1200+
10921201

10931202
### ----------------------------
10941203
### Low-level sampler intrinsics

IGC/VectorCompiler/include/vc/Support/BackendConfig.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ struct GenXBackendOptions {
127127
// Use bindless mode for buffers.
128128
bool UseBindlessBuffers = false;
129129

130+
// Use bindless mode for images.
131+
bool UseBindlessImages = false;
132+
130133
// Output binary format
131134
vc::BinaryKind Binary = vc::BinaryKind::OpenCL;
132135

@@ -352,6 +355,8 @@ class GenXBackendConfig : public ImmutablePass {
352355

353356
bool useBindlessBuffers() const { return Options.UseBindlessBuffers; }
354357

358+
bool useBindlessImages() const { return Options.UseBindlessImages; }
359+
355360
bool emitZebinVisaSections() const { return Options.EmitZebinVisaSections; }
356361

357362
bool saveStackCallLinkage() const { return Options.SaveStackCallLinkage; }

IGC/VectorCompiler/lib/Driver/Driver.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ static GenXBackendOptions createBackendOptions(const vc::CompileOptions &Opts) {
230230
// disabled because mixed bindless and bindful addressing is not supported
231231
// by NEO (kernel debug leverages BTI #0)
232232
BackendOpts.DebuggabilityEmitDebuggableKernels =
233-
Opts.EmitDebuggableKernels && !Opts.UseBindlessBuffers;
233+
Opts.EmitDebuggableKernels && !Opts.UseBindlessBuffers && !Opts.UseBindlessImages;
234234
BackendOpts.DebuggabilityForLegacyPath =
235235
(Opts.Binary != vc::BinaryKind::CM) && Opts.EmitDebuggableKernels;
236236
BackendOpts.DebuggabilityZeBinCompatibleDWARF =
@@ -261,6 +261,7 @@ static GenXBackendOptions createBackendOptions(const vc::CompileOptions &Opts) {
261261
BackendOpts.GRFSize = IGCLLVM::makeOptional(Opts.GRFSize).value();
262262
BackendOpts.AutoLargeGRF = Opts.EnableAutoLargeGRF;
263263
BackendOpts.UseBindlessBuffers = Opts.UseBindlessBuffers;
264+
BackendOpts.UseBindlessImages = Opts.UseBindlessImages;
264265
if (Opts.SaveStackCallLinkage)
265266
BackendOpts.SaveStackCallLinkage = true;
266267
BackendOpts.UsePlain2DImages = Opts.UsePlain2DImages;
@@ -733,6 +734,8 @@ static Error fillApiOptions(const opt::ArgList &ApiOptions,
733734
Opts.UsePlain2DImages = true;
734735
if (ApiOptions.hasArg(OPT_vc_use_bindless_buffers))
735736
Opts.UseBindlessBuffers = true;
737+
if (ApiOptions.hasArg(OPT_vc_use_bindless_images))
738+
Opts.UseBindlessImages = true;
736739
if (ApiOptions.hasArg(OPT_vc_enable_preemption))
737740
Opts.EnablePreemption = true;
738741
if (ApiOptions.hasArg(OPT_library_compilation_common))
@@ -822,6 +825,8 @@ static Error fillInternalOptions(const opt::ArgList &InternalOptions,
822825
Opts.StatsFile = InternalOptions.getLastArgValue(OPT_stats_file).str();
823826
if (InternalOptions.hasArg(OPT_use_bindless_buffers_common))
824827
Opts.UseBindlessBuffers = true;
828+
if (InternalOptions.hasArg(OPT_use_bindless_images_common))
829+
Opts.UseBindlessImages = true;
825830
if (InternalOptions.hasArg(OPT_emit_zebin_visa_sections_common))
826831
Opts.EmitZebinVisaSections = true;
827832
if (InternalOptions.hasArg(OPT_fdisable_debuggable_kernels))

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3799,26 +3799,28 @@ void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
37993799
auto CreateLscTypedLoadQuad =
38003800
[&](VISA_PredOpnd *Pred, VISA_Exec_Size ExecSize,
38013801
VISA_EMask_Ctrl ExecMask, LSC_CACHE_OPTS CacheOpts,
3802-
LSC_DATA_CHMASK ChMask, LSC_ADDR_TYPE AddrType, VISA_VectorOpnd *Surface, VISA_RawOpnd *Dst,
3803-
VISA_RawOpnd *AddrsU, VISA_RawOpnd *AddrsV, VISA_RawOpnd *AddrsR,
3804-
VISA_RawOpnd *AddrsLOD) {
3802+
LSC_DATA_CHMASK ChMask, LSC_ADDR_TYPE AddrType,
3803+
VISA_VectorOpnd *Surface, VISA_RawOpnd *Dst, VISA_RawOpnd *AddrsU,
3804+
VISA_RawOpnd *AddrsV, VISA_RawOpnd *AddrsR, VISA_RawOpnd *AddrsLOD) {
38053805
LLVM_DEBUG(dbgs() << "CreateLscTypedLoadQuad: " << *CI << "\n");
3806-
IGC_ASSERT(AddrType == LSC_ADDR_TYPE_BTI);
3806+
IGC_ASSERT(AddrType == LSC_ADDR_TYPE_BTI ||
3807+
AddrType == LSC_ADDR_TYPE_BSS);
38073808
LSC_DATA_SHAPE Shape = {LSC_DATA_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE};
38083809
Shape.chmask = ChMask;
38093810
CISA_CALL(Kernel->AppendVISALscTypedLoad(
38103811
LSC_OP::LSC_LOAD_QUAD, Pred, ExecSize, ExecMask, CacheOpts,
3811-
AddrType, LSC_ADDR_SIZE_32b, Shape, Surface, 0, Dst,
3812-
AddrsU, 0, AddrsV, 0, AddrsR, 0, AddrsLOD));
3812+
AddrType, LSC_ADDR_SIZE_32b, Shape, Surface, 0, Dst, AddrsU, 0,
3813+
AddrsV, 0, AddrsR, 0, AddrsLOD));
38133814
};
38143815
auto CreateLscTypedStoreQuad =
38153816
[&](VISA_PredOpnd *Pred, VISA_Exec_Size ExecSize,
38163817
VISA_EMask_Ctrl ExecMask, LSC_CACHE_OPTS CacheOpts,
3817-
LSC_DATA_CHMASK ChMask, LSC_ADDR_TYPE AddrType, VISA_VectorOpnd *Surface,
3818-
VISA_RawOpnd *AddrsU, VISA_RawOpnd *AddrsV, VISA_RawOpnd *AddrsR,
3819-
VISA_RawOpnd *AddrsLOD, VISA_RawOpnd *Data) {
3818+
LSC_DATA_CHMASK ChMask, LSC_ADDR_TYPE AddrType,
3819+
VISA_VectorOpnd *Surface, VISA_RawOpnd *AddrsU, VISA_RawOpnd *AddrsV,
3820+
VISA_RawOpnd *AddrsR, VISA_RawOpnd *AddrsLOD, VISA_RawOpnd *Data) {
38203821
LLVM_DEBUG(dbgs() << "CreateLscTypedStoreQuad: " << *CI << "\n");
3821-
IGC_ASSERT(AddrType == LSC_ADDR_TYPE_BTI);
3822+
IGC_ASSERT(AddrType == LSC_ADDR_TYPE_BTI ||
3823+
AddrType == LSC_ADDR_TYPE_BSS);
38223824
LSC_DATA_SHAPE Shape = {LSC_DATA_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE};
38233825
Shape.chmask = ChMask;
38243826
CISA_CALL(Kernel->AppendVISALscTypedStore(

0 commit comments

Comments
 (0)