Skip to content

[WIP][CodeGen] Encode liveness for COPY instructions after virtRegRewriter pass. #151123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/Target/Target.td
Original file line number Diff line number Diff line change
Expand Up @@ -1323,7 +1323,7 @@ def REG_SEQUENCE : StandardPseudoInstruction {
}
def COPY : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$src);
let InOperandList = (ins unknown:$src, variable_ops);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should not change the standard copy instruction

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its just for the experimentation purpose, as didn't know exactly where to encode the liveness info as laneMask while working on it! Will go with your suggestion for MO_laneMask now!

let AsmString = "";
let hasSideEffects = false;
let isAsCheapAsAMove = true;
Expand Down
89 changes: 88 additions & 1 deletion llvm/lib/CodeGen/VirtRegMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ class VirtRegRewriter {
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
uint64_t calcLiveRegUnitMask(const MachineOperand &MO,
MCRegister PhysReg) const;
void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const;
void handleIdentityCopy(MachineInstr &MI);
void expandCopyBundle(MachineInstr &MI) const;
Expand Down Expand Up @@ -474,6 +476,77 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}

// Return LaneBitmask value as uint64_t for PhysReg assigned to MO,
// representing its live register units at its parent MI. In case of undef or
// fully live MO, return 0u.
uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
MCRegister PhysReg) const {
Register Reg = MO.getReg();
const LiveInterval &LI = LIS->getInterval(Reg);
const MachineInstr &MI = *MO.getParent();
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
unsigned SubRegIdx = MO.getSubReg();
LaneBitmask UseMask = SubRegIdx
? TRI->getSubRegIndexLaneMask(SubRegIdx)
: (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg)
: LaneBitmask::getNone());

LaneBitmask LiveRegUnitMask;
DenseSet<unsigned> LiveRegUnits;

// dbgs() << "\n********** " << printReg(Reg, TRI) << "[ " <<
// printReg(PhysReg, TRI) << " ]" << " **********\n";

if (MO.isUndef())
return 0u;

assert(LI.liveAt(MIIndex) &&
"Reads of completely dead register should be marked undef already");

if (LI.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
LaneBitmask Mask = (*Units).second;
for (const LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & UseMask & Mask).any() && S.liveAt(MIIndex)) {
LiveRegUnits.insert(Unit);
}
}
}
} else {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
const LiveRange &UnitRange = LIS->getRegUnit(Unit);
LaneBitmask Mask = (*Units).second;

if (UnitRange.liveAt(MIIndex) && (UseMask & Mask).any())
LiveRegUnits.insert(Unit);
}
}

// Consider the exact subregister & create new UseMask as per the RC for it.
if (SubRegIdx != 0) {
PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
UseMask = (TRI->getMinimalPhysRegClass(PhysReg))->getLaneMask();
}

for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
LaneBitmask Mask = (*Units).second;
if (LiveRegUnits.count(Unit)) {
// dbgs() << "LIVE DEF UNIT : " << printRegUnit(Unit, TRI) << '\n';
LiveRegUnitMask |= Mask;
}
}

// dbgs() << "UseMask : " << PrintLaneMask(UseMask) << '\n';
// dbgs() << "LiveRegUnitMask : " << PrintLaneMask(LiveRegUnitMask) << '\n';
if (UseMask == LiveRegUnitMask)
return 0u;

return LiveRegUnitMask.getAsInteger();
}

void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
if (!MI.isIdentityCopy())
return;
Expand All @@ -495,7 +568,11 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {

// Avoid COPY with an exact 3 operand, with third operand be Mask, as
// it same as a COPY with no additional liveness information.
if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 3 ||
(MI.getNumOperands() == 3 && !MI.getOperand(2).isImm())) {
MI.setDesc(TII->get(TargetOpcode::KILL));
LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;
Expand Down Expand Up @@ -641,11 +718,14 @@ void VirtRegRewriter::rewrite() {
SmallVector<Register, 8> SuperDeads;
SmallVector<Register, 8> SuperDefs;
SmallVector<Register, 8> SuperKills;
uint64_t Mask;

for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
// reset for each MI.
Mask = 0u;
for (MachineOperand &MO : MI.operands()) {
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask())
Expand All @@ -663,6 +743,9 @@ void VirtRegRewriter::rewrite() {
RewriteRegs.insert(PhysReg);
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");

if (MO.isUse() && MI.isCopy())
Mask = calcLiveRegUnitMask(MO, PhysReg);

// Preserve semantics of sub-register operands.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
Expand Down Expand Up @@ -739,6 +822,10 @@ void VirtRegRewriter::rewrite() {
MO.setIsRenamable(true);
}

// Add LaneBitmask as MO_Imm
if (MI.isCopy() && Mask)
MI.addOperand(*MF, MachineOperand::CreateImm(Mask));

// Add any missing super-register kills after rewriting the whole
// instruction.
while (!SuperKills.empty())
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ body: |
; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, 4398046511103
; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr65, 1, implicit-def dead $scc
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -117,7 +117,7 @@ body: |
; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68
; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68
; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr68
; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, 17592186044415, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ body: |
; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, 12884901888, implicit $exec
; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240
; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
Expand Down Expand Up @@ -221,7 +221,7 @@ body: |
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83
; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83, 3
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr48_sgpr49
; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85
Expand Down
Loading