Skip to content

Commit 6f65b49

Browse files
mshelegoigcbot
authored andcommitted
Move float control management to a separate pass
Functions with CMFloatControl attribute have to save float controls on entry and restore before return. To do it we need a register, so cr0 read/writes need to be created before regalloc
1 parent de3b29e commit 6f65b49

File tree

12 files changed

+306
-147
lines changed

12 files changed

+306
-147
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#=========================== begin_copyright_notice ============================
22
#
3-
# Copyright (C) 2017-2023 Intel Corporation
3+
# Copyright (C) 2017-2024 Intel Corporation
44
#
55
# SPDX-License-Identifier: MIT
66
#
@@ -43,6 +43,7 @@ set(CODEGEN_SOURCES
4343
GenXEmulate.cpp
4444
GenXExtractVectorizer.cpp
4545
GenXFixInvalidFuncName.cpp
46+
GenXFloatControl.cpp
4647
GenXLegalizeGVLoadUses.cpp
4748
GenXGASCastAnalyzer.cpp
4849
GenXGASDynamicResolution.cpp

IGC/VectorCompiler/lib/GenXCodeGen/GenX.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2023 Intel Corporation
3+
Copyright (C) 2017-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -159,6 +159,7 @@ ModulePass *createGenXSLMResolution();
159159
FunctionPass *createGenXLscAddrCalcFoldingPass();
160160
ModulePass *createGenXDetectPointerArgPass();
161161
FunctionPass *createGenXLCECalculationPass();
162+
FunctionPass *createGenXFloatControlPass();
162163

163164
namespace genx {
164165

IGC/VectorCompiler/lib/GenXCodeGen/GenXBaling.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,18 @@ bool GenXBaling::isSafeToMove(Instruction *Op, Instruction *From, Instruction *T
328328
if (!genx::isSafeToSink_CheckAVLoadKill(Op, To, this))
329329
return false;
330330

331+
// Do not move cr0 reads
332+
if (GenXIntrinsic::isRdRegion(Op)) {
333+
auto *ReadPredef = dyn_cast<Instruction>(
334+
Op->getOperand(GenXIntrinsic::GenXRegion::OldValueOperandNum));
335+
if (ReadPredef && GenXIntrinsic::isReadPredefReg(ReadPredef)) {
336+
uint32_t RegId =
337+
cast<ConstantInt>(ReadPredef->getOperand(0))->getZExtValue();
338+
if (RegId == PreDefined_Vars::PREDEFINED_CR0)
339+
return false;
340+
}
341+
}
342+
331343
if (DisableMemOrderCheck || !Op->mayReadOrWriteMemory())
332344
return true;
333345

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 0 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -375,14 +375,6 @@ class GenXKernelBuilder final {
375375
Function *KernFunc = nullptr;
376376
PreDefined_Surface StackSurf = PreDefined_Surface::PREDEFINED_SURFACE_INVALID;
377377

378-
// The default float control from kernel attribute. Each subroutine may
379-
// overrride this control mask, but it should revert back to the default float
380-
// control mask before exiting from the subroutine.
381-
uint32_t FloatControlKernel = 0;
382-
uint32_t FloatControlMask = 0;
383-
384-
// The hardware-initialization value for the float control register.
385-
static constexpr uint32_t FloatControlDefault = 0x0;
386378

387379
// normally false, set to true if there is any SIMD CF in the func or this is
388380
// (indirectly) called inside any SIMD CF.
@@ -417,7 +409,6 @@ class GenXKernelBuilder final {
417409
bool buildInstruction(Instruction *Inst);
418410
bool buildMainInst(Instruction *Inst, genx::BaleInfo BI, unsigned Mod,
419411
const DstOpndDesc &DstDesc);
420-
void buildControlRegUpdate(unsigned Mask, bool Clear);
421412
void buildJoin(CallInst *Join, BranchInst *Branch);
422413
bool buildBranch(BranchInst *Branch);
423414
void buildIndirectBr(IndirectBrInst *Br);
@@ -1112,17 +1103,6 @@ bool GenXKernelBuilder::run() {
11121103
GrfByteSize = Subtarget->getGRFByteSize();
11131104
StackSurf = Subtarget->stackSurface();
11141105

1115-
using namespace visa;
1116-
FloatControlMask = CRBits::DoublePrecisionDenorm |
1117-
CRBits::SinglePrecisionDenorm |
1118-
CRBits::HalfPrecisionDenorm | CRBits::RoundingBitMask;
1119-
FloatControlKernel = CRBits::RTNE;
1120-
1121-
// If the subtarget supports systolic denorm control, retain denormals for the
1122-
// systolic.
1123-
if (Subtarget->hasSystolicDenormControl())
1124-
FloatControlKernel |= CRBits::SystolicDenorm;
1125-
11261106
StackCallExecSize =
11271107
getExecSizeFromValue(BackendConfig->getInteropSubgroupSize());
11281108

@@ -1314,33 +1294,6 @@ void GenXKernelBuilder::buildInstructions() {
13141294

13151295
beginFunctionLight(Func);
13161296

1317-
// If a float control is specified, emit code to make that happen.
1318-
// Float control contains rounding mode and denorm behaviour. Relevant bits
1319-
// are already set as defined for VISA control reg in header definition on
1320-
// enums.
1321-
uint32_t FloatControl = FloatControlKernel;
1322-
1323-
if (Func->hasFnAttribute(genx::FunctionMD::CMFloatControl)) {
1324-
Func->getFnAttribute(genx::FunctionMD::CMFloatControl)
1325-
.getValueAsString()
1326-
.getAsInteger(0, FloatControl);
1327-
1328-
// Set rounding mode to required state if that isn't zero
1329-
FloatControl &= FloatControlMask;
1330-
FloatControl |= FloatControlKernel & ~FloatControlMask;
1331-
if (FloatControl != (FloatControlKernel & FloatControlMask) &&
1332-
vc::isKernel(Func)) {
1333-
FloatControlKernel &= ~FloatControlMask;
1334-
FloatControlKernel |= FloatControl;
1335-
}
1336-
}
1337-
1338-
if ((vc::isKernel(Func) && FloatControlKernel != 0) ||
1339-
FloatControl != (FloatControlKernel & FloatControlMask)) {
1340-
buildControlRegUpdate(FloatControlMask, true);
1341-
buildControlRegUpdate(FloatControl, false);
1342-
}
1343-
13441297
// Only output a label for the initial basic block if it is used from
13451298
// somewhere else.
13461299
bool NeedsLabel = !Func->front().use_empty();
@@ -4061,39 +4014,6 @@ void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
40614014
#include "GenXIntrinsicsBuildMap.inc"
40624015
}
40634016

4064-
/**************************************************************************************************
4065-
* buildControlRegUpdate : generate an instruction to apply a mask to
4066-
* the control register (V14).
4067-
*
4068-
* Enter: Mask = the mask to apply
4069-
* Clear = false if bits set in Mask should be set in V14,
4070-
* true if bits set in Mask should be cleared in V14.
4071-
*/
4072-
void GenXKernelBuilder::buildControlRegUpdate(unsigned Mask, bool Clear) {
4073-
ISA_Opcode Opcode;
4074-
// write opcode
4075-
if (Clear) {
4076-
Opcode = ISA_AND;
4077-
Mask = ~Mask;
4078-
} else
4079-
Opcode = ISA_OR;
4080-
4081-
Region Single = Region(1, 4);
4082-
4083-
VISA_GenVar *Decl = nullptr;
4084-
CISA_CALL(Kernel->GetPredefinedVar(Decl, PREDEFINED_CR0));
4085-
VISA_VectorOpnd *dst =
4086-
createRegionOperand(&Single, Decl, DONTCARESIGNED, 0, true);
4087-
VISA_VectorOpnd *src0 =
4088-
createRegionOperand(&Single, Decl, DONTCARESIGNED, 0, false);
4089-
4090-
VISA_VectorOpnd *src1 = nullptr;
4091-
CISA_CALL(Kernel->CreateVISAImmediate(src1, &Mask, ISA_TYPE_UD));
4092-
4093-
appendVISALogicOrShiftInst(Opcode, nullptr, false, vISA_EMASK_M1, EXEC_SIZE_1,
4094-
dst, src0, src1);
4095-
}
4096-
40974017
/***********************************************************************
40984018
* buildBranch : build a conditional or unconditional branch
40994019
*
@@ -5493,17 +5413,6 @@ void GenXKernelBuilder::buildCall(CallInst *CI, const DstOpndDesc &DstDesc) {
54935413
}
54945414

54955415
void GenXKernelBuilder::buildRet(ReturnInst *RI) {
5496-
uint32_t FloatControl = 0;
5497-
auto F = RI->getFunction();
5498-
F->getFnAttribute(genx::FunctionMD::CMFloatControl)
5499-
.getValueAsString()
5500-
.getAsInteger(0, FloatControl);
5501-
FloatControl &= FloatControlMask;
5502-
if (FloatControl != (FloatControlKernel & FloatControlMask)) {
5503-
buildControlRegUpdate(FloatControlMask, true);
5504-
if (FloatControlKernel & FloatControlMask)
5505-
buildControlRegUpdate(FloatControlKernel, false);
5506-
}
55075416
if (vc::requiresStackCall(Func)) {
55085417
appendVISACFFunctionRetInst(nullptr, vISA_EMASK_M1, StackCallExecSize);
55095418
} else {
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#include "GenX.h"
10+
#include "GenXTargetMachine.h"
11+
#include "GenXUtil.h"
12+
#include "GenXVisa.h"
13+
14+
#include "llvm/CodeGen/TargetPassConfig.h"
15+
#include "llvm/InitializePasses.h"
16+
17+
#define DEBUG_TYPE "GENX_FLOAT_CONTROL"
18+
19+
namespace llvm {
20+
21+
class GenXFloatControl : public FunctionPass {
22+
uint32_t Mask = 0;
23+
24+
bool getFloatControl(Function &F, uint32_t *Val);
25+
Value *buildCr0Update(uint32_t Value, Instruction *InsertBefore);
26+
void buildCr0Write(Value *V, Instruction *InsertBefore);
27+
28+
public:
29+
static char ID;
30+
31+
explicit GenXFloatControl() : FunctionPass(ID) {}
32+
33+
bool runOnFunction(Function &F) override;
34+
void getAnalysisUsage(AnalysisUsage &AU) const override {
35+
AU.addRequired<TargetPassConfig>();
36+
AU.setPreservesAll();
37+
}
38+
StringRef getPassName() const override { return "GenXFloatControl"; }
39+
};
40+
41+
void initializeGenXFloatControlPass(PassRegistry &);
42+
43+
} // namespace llvm
44+
45+
using namespace llvm;
46+
using namespace genx;
47+
using namespace visa;
48+
49+
char GenXFloatControl::ID = 0;
50+
51+
INITIALIZE_PASS_BEGIN(GenXFloatControl, "GenXFloatControl", "GenXFloatControl",
52+
false, false)
53+
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
54+
INITIALIZE_PASS_END(GenXFloatControl, "GenXFloatControl", "GenXFloatControl",
55+
false, false)
56+
57+
FunctionPass *llvm::createGenXFloatControlPass() {
58+
initializeGenXFloatControlPass(*PassRegistry::getPassRegistry());
59+
return new GenXFloatControl();
60+
}
61+
62+
bool GenXFloatControl::runOnFunction(Function &F) {
63+
// By default allow to specify with the attribute only
64+
// rounding and denorm modes
65+
Mask = CRBits::RoundingBitMask | CRBits::DoublePrecisionDenorm |
66+
CRBits::SinglePrecisionDenorm | CRBits::HalfPrecisionDenorm;
67+
// Default float control:
68+
// rounding mode = nearest even
69+
// denormals = flush
70+
uint32_t FloatControl = CRBits::RTNE;
71+
const auto *Subtarget = &getAnalysis<TargetPassConfig>()
72+
.getTM<GenXTargetMachine>()
73+
.getGenXSubtarget();
74+
if (!getFloatControl(F, &FloatControl) && !fg::isHead(F))
75+
return false;
76+
if (Subtarget->hasSystolicDenormControl()) {
77+
// Always retain denormals in systolic operations
78+
Mask |= CRBits::SystolicDenorm;
79+
FloatControl |= CRBits::SystolicDenorm;
80+
}
81+
// On kernel entry cr0 is set to zero, so in case of zero float control
82+
// we don't need to update it
83+
if (FloatControl == 0 && fg::isGroupHead(F))
84+
return false;
85+
// Kernels, stackcalls and subroutines with attribute set float control on
86+
// entry - provided by the attribute or the default one
87+
auto *OldV = buildCr0Update(FloatControl, F.getEntryBlock().getFirstNonPHI());
88+
if (fg::isGroupHead(F))
89+
return true;
90+
// Stackcalls and subroutines with attribute must save caller's float
91+
// control on entry and restore it before return
92+
for (auto &BB : F)
93+
if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
94+
buildCr0Write(OldV, RI);
95+
return true;
96+
}
97+
98+
bool GenXFloatControl::getFloatControl(Function &F, uint32_t *Val) {
99+
if (!F.hasFnAttribute(FunctionMD::CMFloatControl))
100+
return false;
101+
F.getFnAttribute(FunctionMD::CMFloatControl)
102+
.getValueAsString()
103+
.getAsInteger(0, *Val);
104+
return true;
105+
}
106+
107+
Value *GenXFloatControl::buildCr0Update(uint32_t Value, Instruction *InsertBefore) {
108+
IRBuilder<> B(InsertBefore);
109+
auto &DL = InsertBefore->getDebugLoc();
110+
auto *M = InsertBefore->getModule();
111+
auto *Ty = B.getInt32Ty();
112+
auto *VTy = IGCLLVM::FixedVectorType::get(Ty, 4);
113+
auto *Id = B.getInt32(PreDefined_Vars::PREDEFINED_CR0);
114+
Region R(Ty);
115+
auto *ReadPredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_read_predef_reg, {VTy, VTy});
116+
auto *WritePredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_write_predef_reg, {VTy, VTy});
117+
auto *AndReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)});
118+
auto *AndRdRegion = R.createRdRegion(AndReadPredef, "", InsertBefore, DL, true);
119+
auto *And = B.CreateAnd(AndRdRegion, ~Mask);
120+
auto *AndWrRegion = R.createWrRegion(AndReadPredef, And, "", InsertBefore, DL);
121+
B.CreateCall(WritePredefDecl, {Id, AndWrRegion});
122+
auto *OrReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)});
123+
auto *OrRdRegion = R.createRdRegion(OrReadPredef, "", InsertBefore, DL, true);
124+
auto *Or = B.CreateOr(OrRdRegion, Value & Mask);
125+
auto *OrWrRegion = R.createWrRegion(OrReadPredef, Or, "", InsertBefore, DL);
126+
B.CreateCall(WritePredefDecl, {Id, OrWrRegion});
127+
return AndRdRegion;
128+
}
129+
130+
void GenXFloatControl::buildCr0Write(Value* V, Instruction *InsertBefore) {
131+
IRBuilder<> B(InsertBefore);
132+
auto &DL = InsertBefore->getDebugLoc();
133+
auto *M = InsertBefore->getModule();
134+
auto *Ty = B.getInt32Ty();
135+
auto *VTy = IGCLLVM::FixedVectorType::get(Ty, 4);
136+
auto *Id = B.getInt32(PreDefined_Vars::PREDEFINED_CR0);
137+
Region R(Ty);
138+
auto *ReadPredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_read_predef_reg, {VTy, VTy});
139+
auto *WritePredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_write_predef_reg, {VTy, VTy});
140+
auto *ReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)});
141+
auto *WrRegion = R.createWrRegion(ReadPredef, V, "", InsertBefore, DL);
142+
B.CreateCall(WritePredefDecl, {Id, WrRegion});
143+
}
144+

IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ void initializeGenXPasses(PassRegistry &registry) {
211211
initializeGenXLscAddrCalcFoldingPass(registry);
212212
initializeGenXDetectPointerArgPass(registry);
213213
initializeGenXLCECalculationPass(registry);
214+
initializeGenXFloatControlPass(registry);
214215
// WRITE HERE MORE PASSES IF IT'S NEEDED;
215216
}
216217

@@ -756,6 +757,7 @@ bool GenXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
756757
/// eliminates unreachable internal globals.
757758
///
758759
vc::addPass(PM, createGlobalDCEPass());
760+
vc::addPass(PM, createGenXFloatControlPass());
759761
/// .. include:: GenXModule.h
760762
vc::addPass(PM, createGenXModulePass());
761763
/// .. include:: GenXLiveness.h

IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ void initializeGenXSLMResolutionPass(PassRegistry &);
251251
void initializeGenXLscAddrCalcFoldingPass(PassRegistry &);
252252
void initializeGenXDetectPointerArgPass(PassRegistry &);
253253
void initializeGenXLCECalculationPass(PassRegistry &);
254+
void initializeGenXFloatControlPass(PassRegistry &);
254255
} // End llvm namespace
255256

256257
#endif

0 commit comments

Comments
 (0)