diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 4c83f8a580aa0..1f125c2cf87de 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1323,7 +1323,7 @@ def REG_SEQUENCE : StandardPseudoInstruction { } def COPY : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); - let InOperandList = (ins unknown:$src); + let InOperandList = (ins unknown:$src, variable_ops); let AsmString = ""; let hasSideEffects = false; let isAsCheapAsAMove = true; diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 99ba893d6f096..f8d8305368771 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -213,6 +213,8 @@ class VirtRegRewriter { void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; + uint64_t calcLiveRegUnitMask(const MachineOperand &MO, + MCRegister PhysReg) const; void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const; void handleIdentityCopy(MachineInstr &MI); void expandCopyBundle(MachineInstr &MI) const; @@ -474,6 +476,77 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { return true; } +// Return LaneBitmask value as uint64_t for PhysReg assigned to MO, +// representing its live register units at its parent MI. In case of undef or +// fully live MO, return 0u. +uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO, + MCRegister PhysReg) const { + Register Reg = MO.getReg(); + const LiveInterval &LI = LIS->getInterval(Reg); + const MachineInstr &MI = *MO.getParent(); + SlotIndex MIIndex = LIS->getInstructionIndex(MI); + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask UseMask = SubRegIdx + ? TRI->getSubRegIndexLaneMask(SubRegIdx) + : (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg) + : LaneBitmask::getNone()); + + LaneBitmask LiveRegUnitMask; + DenseSet LiveRegUnits; + + // dbgs() << "\n********** " << printReg(Reg, TRI) << "[ " << + // printReg(PhysReg, TRI) << " ]" << " **********\n"; + + if (MO.isUndef()) + return 0u; + + assert(LI.liveAt(MIIndex) && + "Reads of completely dead register should be marked undef already"); + + if (LI.hasSubRanges()) { + for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = (*Units).first; + LaneBitmask Mask = (*Units).second; + for (const LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & UseMask & Mask).any() && S.liveAt(MIIndex)) { + LiveRegUnits.insert(Unit); + } + } + } + } else { + for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = (*Units).first; + const LiveRange &UnitRange = LIS->getRegUnit(Unit); + LaneBitmask Mask = (*Units).second; + + if (UnitRange.liveAt(MIIndex) && (UseMask & Mask).any()) + LiveRegUnits.insert(Unit); + } + } + + // Consider the exact subregister & create new UseMask as per the RC for it. + if (SubRegIdx != 0) { + PhysReg = TRI->getSubReg(PhysReg, SubRegIdx); + UseMask = (TRI->getMinimalPhysRegClass(PhysReg))->getLaneMask(); + } + + for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = (*Units).first; + LaneBitmask Mask = (*Units).second; + if (LiveRegUnits.count(Unit)) { + // dbgs() << "LIVE DEF UNIT : " << printRegUnit(Unit, TRI) << '\n'; + LiveRegUnitMask |= Mask; + } + } + + // dbgs() << "UseMask : " << PrintLaneMask(UseMask) << '\n'; + // dbgs() << "LiveRegUnitMask : " << PrintLaneMask(LiveRegUnitMask) << '\n'; + if (UseMask == LiveRegUnitMask) + return 0u; + + return LiveRegUnitMask.getAsInteger(); +} + void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) { if (!MI.isIdentityCopy()) return; @@ -495,7 +568,11 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) { // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL // instruction to maintain this information. - if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) { + + // Avoid COPY with an exact 3 operand, with third operand be Mask, as + // it same as a COPY with no additional liveness information. + if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 3 || + (MI.getNumOperands() == 3 && !MI.getOperand(2).isImm())) { MI.setDesc(TII->get(TargetOpcode::KILL)); LLVM_DEBUG(dbgs() << " replace by: " << MI); return; @@ -641,11 +718,14 @@ void VirtRegRewriter::rewrite() { SmallVector SuperDeads; SmallVector SuperDefs; SmallVector SuperKills; + uint64_t Mask; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { LLVM_DEBUG(MBBI->print(dbgs(), Indexes)); for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { + // reset for each MI. + Mask = 0u; for (MachineOperand &MO : MI.operands()) { // Make sure MRI knows about registers clobbered by regmasks. if (MO.isRegMask()) @@ -663,6 +743,9 @@ void VirtRegRewriter::rewrite() { RewriteRegs.insert(PhysReg); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); + if (MO.isUse() && MI.isCopy()) + Mask = calcLiveRegUnitMask(MO, PhysReg); + // Preserve semantics of sub-register operands. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { @@ -739,6 +822,10 @@ void VirtRegRewriter::rewrite() { MO.setIsRenamable(true); } + // Add LaneBitmask as MO_Imm + if (MI.isCopy() && Mask) + MI.addOperand(*MF, MachineOperand::CreateImm(Mask)); + // Add any missing super-register kills after rewriting the whole // instruction. while (!SuperKills.empty()) diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir index da1175c02e94a..965c31970404f 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir @@ -86,7 +86,7 @@ body: | ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, 4398046511103 ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr65, 1, implicit-def dead $scc ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} @@ -117,7 +117,7 @@ body: | ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68 ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68 ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr68 - ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec + ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, 17592186044415, implicit $exec ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir index 4a0bb6ceccd3f..09526ea5ac878 100644 --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -50,7 +50,7 @@ body: | ; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0 ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, 12884901888, implicit $exec ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240 ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) @@ -221,7 +221,7 @@ body: | ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83, 3 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr48_sgpr49 ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85