|
| 1 | +/*========================== begin_copyright_notice ============================ |
| 2 | +
|
| 3 | +Copyright (C) 2024 Intel Corporation |
| 4 | +
|
| 5 | +SPDX-License-Identifier: MIT |
| 6 | +
|
| 7 | +============================= end_copyright_notice ===========================*/ |
| 8 | + |
| 9 | +#include "GenX.h" |
| 10 | +#include "GenXTargetMachine.h" |
| 11 | +#include "GenXUtil.h" |
| 12 | +#include "GenXVisa.h" |
| 13 | + |
| 14 | +#include "llvm/CodeGen/TargetPassConfig.h" |
| 15 | +#include "llvm/InitializePasses.h" |
| 16 | + |
| 17 | +#define DEBUG_TYPE "GENX_FLOAT_CONTROL" |
| 18 | + |
| 19 | +namespace llvm { |
| 20 | + |
| 21 | +class GenXFloatControl : public FunctionPass { |
| 22 | + uint32_t Mask = 0; |
| 23 | + |
| 24 | + bool getFloatControl(Function &F, uint32_t *Val); |
| 25 | + Value *buildCr0Update(uint32_t Value, Instruction *InsertBefore); |
| 26 | + void buildCr0Write(Value *V, Instruction *InsertBefore); |
| 27 | + |
| 28 | +public: |
| 29 | + static char ID; |
| 30 | + |
| 31 | + explicit GenXFloatControl() : FunctionPass(ID) {} |
| 32 | + |
| 33 | + bool runOnFunction(Function &F) override; |
| 34 | + void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 35 | + AU.addRequired<TargetPassConfig>(); |
| 36 | + AU.setPreservesAll(); |
| 37 | + } |
| 38 | + StringRef getPassName() const override { return "GenXFloatControl"; } |
| 39 | +}; |
| 40 | + |
| 41 | +void initializeGenXFloatControlPass(PassRegistry &); |
| 42 | + |
| 43 | +} // namespace llvm |
| 44 | + |
| 45 | +using namespace llvm; |
| 46 | +using namespace genx; |
| 47 | +using namespace visa; |
| 48 | + |
| 49 | +char GenXFloatControl::ID = 0; |
| 50 | + |
| 51 | +INITIALIZE_PASS_BEGIN(GenXFloatControl, "GenXFloatControl", "GenXFloatControl", |
| 52 | + false, false) |
| 53 | +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
| 54 | +INITIALIZE_PASS_END(GenXFloatControl, "GenXFloatControl", "GenXFloatControl", |
| 55 | + false, false) |
| 56 | + |
| 57 | +FunctionPass *llvm::createGenXFloatControlPass() { |
| 58 | + initializeGenXFloatControlPass(*PassRegistry::getPassRegistry()); |
| 59 | + return new GenXFloatControl(); |
| 60 | +} |
| 61 | + |
| 62 | +bool GenXFloatControl::runOnFunction(Function &F) { |
| 63 | + // By default allow to specify with the attribute only |
| 64 | + // rounding and denorm modes |
| 65 | + Mask = CRBits::RoundingBitMask | CRBits::DoublePrecisionDenorm | |
| 66 | + CRBits::SinglePrecisionDenorm | CRBits::HalfPrecisionDenorm; |
| 67 | + // Default float control: |
| 68 | + // rounding mode = nearest even |
| 69 | + // denormals = flush |
| 70 | + uint32_t FloatControl = CRBits::RTNE; |
| 71 | + const auto *Subtarget = &getAnalysis<TargetPassConfig>() |
| 72 | + .getTM<GenXTargetMachine>() |
| 73 | + .getGenXSubtarget(); |
| 74 | + if (!getFloatControl(F, &FloatControl) && !fg::isHead(F)) |
| 75 | + return false; |
| 76 | + if (Subtarget->hasSystolicDenormControl()) { |
| 77 | + // Always retain denormals in systolic operations |
| 78 | + Mask |= CRBits::SystolicDenorm; |
| 79 | + FloatControl |= CRBits::SystolicDenorm; |
| 80 | + } |
| 81 | + // On kernel entry cr0 is set to zero, so in case of zero float control |
| 82 | + // we don't need to update it |
| 83 | + if (FloatControl == 0 && fg::isGroupHead(F)) |
| 84 | + return false; |
| 85 | + // Kernels, stackcalls and subroutines with attribute set float control on |
| 86 | + // entry - provided by the attribute or the default one |
| 87 | + auto *OldV = buildCr0Update(FloatControl, F.getEntryBlock().getFirstNonPHI()); |
| 88 | + if (fg::isGroupHead(F)) |
| 89 | + return true; |
| 90 | + // Stackcalls and subroutines with attribute must save caller's float |
| 91 | + // control on entry and restore it before return |
| 92 | + for (auto &BB : F) |
| 93 | + if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) |
| 94 | + buildCr0Write(OldV, RI); |
| 95 | + return true; |
| 96 | +} |
| 97 | + |
| 98 | +bool GenXFloatControl::getFloatControl(Function &F, uint32_t *Val) { |
| 99 | + if (!F.hasFnAttribute(FunctionMD::CMFloatControl)) |
| 100 | + return false; |
| 101 | + F.getFnAttribute(FunctionMD::CMFloatControl) |
| 102 | + .getValueAsString() |
| 103 | + .getAsInteger(0, *Val); |
| 104 | + return true; |
| 105 | +} |
| 106 | + |
| 107 | +Value *GenXFloatControl::buildCr0Update(uint32_t Value, Instruction *InsertBefore) { |
| 108 | + IRBuilder<> B(InsertBefore); |
| 109 | + auto &DL = InsertBefore->getDebugLoc(); |
| 110 | + auto *M = InsertBefore->getModule(); |
| 111 | + auto *Ty = B.getInt32Ty(); |
| 112 | + auto *VTy = IGCLLVM::FixedVectorType::get(Ty, 4); |
| 113 | + auto *Id = B.getInt32(PreDefined_Vars::PREDEFINED_CR0); |
| 114 | + Region R(Ty); |
| 115 | + auto *ReadPredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_read_predef_reg, {VTy, VTy}); |
| 116 | + auto *WritePredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_write_predef_reg, {VTy, VTy}); |
| 117 | + auto *AndReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)}); |
| 118 | + auto *AndRdRegion = R.createRdRegion(AndReadPredef, "", InsertBefore, DL, true); |
| 119 | + auto *And = B.CreateAnd(AndRdRegion, ~Mask); |
| 120 | + auto *AndWrRegion = R.createWrRegion(AndReadPredef, And, "", InsertBefore, DL); |
| 121 | + B.CreateCall(WritePredefDecl, {Id, AndWrRegion}); |
| 122 | + auto *OrReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)}); |
| 123 | + auto *OrRdRegion = R.createRdRegion(OrReadPredef, "", InsertBefore, DL, true); |
| 124 | + auto *Or = B.CreateOr(OrRdRegion, Value & Mask); |
| 125 | + auto *OrWrRegion = R.createWrRegion(OrReadPredef, Or, "", InsertBefore, DL); |
| 126 | + B.CreateCall(WritePredefDecl, {Id, OrWrRegion}); |
| 127 | + return AndRdRegion; |
| 128 | +} |
| 129 | + |
| 130 | +void GenXFloatControl::buildCr0Write(Value* V, Instruction *InsertBefore) { |
| 131 | + IRBuilder<> B(InsertBefore); |
| 132 | + auto &DL = InsertBefore->getDebugLoc(); |
| 133 | + auto *M = InsertBefore->getModule(); |
| 134 | + auto *Ty = B.getInt32Ty(); |
| 135 | + auto *VTy = IGCLLVM::FixedVectorType::get(Ty, 4); |
| 136 | + auto *Id = B.getInt32(PreDefined_Vars::PREDEFINED_CR0); |
| 137 | + Region R(Ty); |
| 138 | + auto *ReadPredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_read_predef_reg, {VTy, VTy}); |
| 139 | + auto *WritePredefDecl = vc::getAnyDeclaration(M, GenXIntrinsic::genx_write_predef_reg, {VTy, VTy}); |
| 140 | + auto *ReadPredef = B.CreateCall(ReadPredefDecl, {Id, UndefValue::get(VTy)}); |
| 141 | + auto *WrRegion = R.createWrRegion(ReadPredef, V, "", InsertBefore, DL); |
| 142 | + B.CreateCall(WritePredefDecl, {Id, WrRegion}); |
| 143 | +} |
| 144 | + |
0 commit comments