28
28
#include " SIRegisterInfo.h"
29
29
#include " llvm/CodeGen/LiveIntervals.h"
30
30
#include " llvm/CodeGen/LiveRegMatrix.h"
31
+ #include " llvm/CodeGen/LiveStacks.h"
31
32
#include " llvm/CodeGen/MachineFunctionPass.h"
32
33
#include " llvm/CodeGen/VirtRegMap.h"
33
34
#include " llvm/InitializePasses.h"
@@ -38,6 +39,9 @@ using namespace llvm;
38
39
39
40
namespace {
40
41
42
+ // / Map from spill slot frame index to list of instructions which reference it.
43
+ using SpillReferenceMap = DenseMap<int , SmallVector<MachineInstr *, 4 >>;
44
+
41
45
class AMDGPURewriteAGPRCopyMFMAImpl {
42
46
MachineFunction &MF;
43
47
const GCNSubtarget &ST;
@@ -47,6 +51,7 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
47
51
VirtRegMap &VRM;
48
52
LiveRegMatrix &LRM;
49
53
LiveIntervals &LIS;
54
+ LiveStacks &LSS;
50
55
const RegisterClassInfo &RegClassInfo;
51
56
52
57
bool attemptReassignmentsToAGPR (SmallSetVector<Register, 4 > &InterferingRegs,
@@ -55,10 +60,11 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
55
60
public:
56
61
AMDGPURewriteAGPRCopyMFMAImpl (MachineFunction &MF, VirtRegMap &VRM,
57
62
LiveRegMatrix &LRM, LiveIntervals &LIS,
63
+ LiveStacks &LSS,
58
64
const RegisterClassInfo &RegClassInfo)
59
65
: MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
60
66
TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
61
- LIS(LIS), RegClassInfo(RegClassInfo) {}
67
+ LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
62
68
63
69
bool isRewriteCandidate (const MachineInstr &MI) const {
64
70
return TII.isMAI (MI) && AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ()) != -1 ;
@@ -106,6 +112,22 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
106
112
107
113
bool tryFoldCopiesToAGPR (Register VReg, MCRegister AssignedAGPR) const ;
108
114
bool tryFoldCopiesFromAGPR (Register VReg, MCRegister AssignedAGPR) const ;
115
+
116
+ // / Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
117
+ // / with a COPY to the replacement register value \p VReg.
118
+ void replaceSpillWithCopyToVReg (MachineInstr &SpillMI, int SpillFI,
119
+ Register VReg) const ;
120
+
121
+ // / Create a map from frame index to use instructions for spills. If a use of
122
+ // / the frame index does not consist only of spill instructions, it will not
123
+ // / be included in the map.
124
+ void collectSpillIndexUses (ArrayRef<LiveInterval *> StackIntervals,
125
+ SpillReferenceMap &Map) const ;
126
+
127
+ // / Attempt to unspill VGPRs by finding a free register and replacing the
128
+ // / spill instructions with copies.
129
+ void eliminateSpillsOfReassignedVGPRs () const ;
130
+
109
131
bool run (MachineFunction &MF) const ;
110
132
};
111
133
@@ -392,6 +414,133 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
392
414
return MadeChange;
393
415
}
394
416
417
+ void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg (
418
+ MachineInstr &SpillMI, int SpillFI, Register VReg) const {
419
+ const DebugLoc &DL = SpillMI.getDebugLoc ();
420
+ MachineBasicBlock &MBB = *SpillMI.getParent ();
421
+ MachineInstr *NewCopy;
422
+ if (SpillMI.mayStore ()) {
423
+ NewCopy = BuildMI (MBB, SpillMI, DL, TII.get (TargetOpcode::COPY), VReg)
424
+ .add (SpillMI.getOperand (0 ));
425
+ } else {
426
+ NewCopy = BuildMI (MBB, SpillMI, DL, TII.get (TargetOpcode::COPY))
427
+ .add (SpillMI.getOperand (0 ))
428
+ .addReg (VReg);
429
+ }
430
+
431
+ LIS.ReplaceMachineInstrInMaps (SpillMI, *NewCopy);
432
+ SpillMI.eraseFromParent ();
433
+ }
434
+
435
+ void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses (
436
+ ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
437
+
438
+ SmallSet<int , 4 > NeededFrameIndexes;
439
+ for (const LiveInterval *LI : StackIntervals)
440
+ NeededFrameIndexes.insert (LI->reg ().stackSlotIndex ());
441
+
442
+ for (MachineBasicBlock &MBB : MF) {
443
+ for (MachineInstr &MI : MBB) {
444
+ for (MachineOperand &MO : MI.operands ()) {
445
+ if (!MO.isFI () || !NeededFrameIndexes.count (MO.getIndex ()))
446
+ continue ;
447
+
448
+ SmallVector<MachineInstr *, 4 > &References = Map[MO.getIndex ()];
449
+ if (TII.isVGPRSpill (MI)) {
450
+ References.push_back (&MI);
451
+ break ;
452
+ }
453
+
454
+ // Verify this was really a spill instruction, if it's not just ignore
455
+ // all uses.
456
+
457
+ // TODO: This should probably be verifier enforced.
458
+ NeededFrameIndexes.erase (MO.getIndex ());
459
+ Map.erase (MO.getIndex ());
460
+ }
461
+ }
462
+ }
463
+ }
464
+
465
+ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs () const {
466
+ unsigned NumSlots = LSS.getNumIntervals ();
467
+ if (NumSlots == 0 )
468
+ return ;
469
+
470
+ MachineFrameInfo &MFI = MF.getFrameInfo ();
471
+
472
+ SmallVector<LiveInterval *, 32 > StackIntervals;
473
+ StackIntervals.reserve (NumSlots);
474
+
475
+ for (auto I = LSS.begin (), E = LSS.end (); I != E; ++I) {
476
+ int Slot = I->first ;
477
+ if (!MFI.isSpillSlotObjectIndex (Slot) || MFI.isDeadObjectIndex (Slot))
478
+ continue ;
479
+
480
+ LiveInterval &LI = I->second ;
481
+ const TargetRegisterClass *RC = LSS.getIntervalRegClass (Slot);
482
+ if (TRI.hasVGPRs (RC))
483
+ StackIntervals.push_back (&LI);
484
+ }
485
+
486
+ // / Sort heaviest intervals first to prioritize their unspilling
487
+ sort (StackIntervals, [](const LiveInterval *A, const LiveInterval *B) {
488
+ return A->weight () > B->weight ();
489
+ });
490
+
491
+ // FIXME: The APIs for dealing with the LiveInterval of a frame index are
492
+ // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
493
+ // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
494
+ // and must create a substitute virtual register to do so. This makes
495
+ // incremental updating here difficult; we need to actually perform the IR
496
+ // mutation to get the new vreg references in place to compute the register
497
+ // LiveInterval to perform an assignment to track the new interference
498
+ // correctly, and we can't simply migrate the LiveInterval we already have.
499
+ //
500
+ // To avoid walking through the entire function for each index, pre-collect
501
+ // all the instructions slot referencess.
502
+
503
+ DenseMap<int , SmallVector<MachineInstr *, 4 >> SpillSlotReferences;
504
+ collectSpillIndexUses (StackIntervals, SpillSlotReferences);
505
+
506
+ for (LiveInterval *LI : StackIntervals) {
507
+ int Slot = LI->reg ().stackSlotIndex ();
508
+ auto SpillReferences = SpillSlotReferences.find (Slot);
509
+ if (SpillReferences == SpillSlotReferences.end ())
510
+ continue ;
511
+
512
+ const TargetRegisterClass *RC = LSS.getIntervalRegClass (Slot);
513
+
514
+ LLVM_DEBUG (dbgs () << " Trying to eliminate " << printReg (Slot, &TRI)
515
+ << " by reassigning\n " );
516
+
517
+ ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder (RC);
518
+
519
+ for (MCPhysReg PhysReg : AllocOrder) {
520
+ if (LRM.checkInterference (*LI, PhysReg) != LiveRegMatrix::IK_Free)
521
+ continue ;
522
+
523
+ LLVM_DEBUG (dbgs () << " Reassigning " << *LI << " to "
524
+ << printReg (PhysReg, &TRI) << ' \n ' );
525
+
526
+ const TargetRegisterClass *RC = LSS.getIntervalRegClass (Slot);
527
+ Register NewVReg = MRI.createVirtualRegister (RC);
528
+
529
+ for (MachineInstr *SpillMI : SpillReferences->second )
530
+ replaceSpillWithCopyToVReg (*SpillMI, Slot, NewVReg);
531
+
532
+ // TODO: We should be able to transfer the information from the stack
533
+ // slot's LiveInterval without recomputing from scratch with the
534
+ // replacement vreg uses.
535
+ LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval (NewVReg);
536
+ VRM.grow ();
537
+ LRM.assign (NewLI, PhysReg);
538
+ MFI.RemoveStackObject (Slot);
539
+ break ;
540
+ }
541
+ }
542
+ }
543
+
395
544
bool AMDGPURewriteAGPRCopyMFMAImpl::run (MachineFunction &MF) const {
396
545
// This only applies on subtargets that have a configurable AGPR vs. VGPR
397
546
// allocation.
@@ -418,6 +567,12 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
418
567
MadeChange = true ;
419
568
}
420
569
570
+ // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
571
+ // pressure. See if we can eliminate some spills now that those registers are
572
+ // more available.
573
+ if (MadeChange)
574
+ eliminateSpillsOfReassignedVGPRs ();
575
+
421
576
return MadeChange;
422
577
}
423
578
@@ -441,10 +596,13 @@ class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
441
596
AU.addRequired <LiveIntervalsWrapperPass>();
442
597
AU.addRequired <VirtRegMapWrapperLegacy>();
443
598
AU.addRequired <LiveRegMatrixWrapperLegacy>();
599
+ AU.addRequired <LiveStacksWrapperLegacy>();
444
600
445
601
AU.addPreserved <LiveIntervalsWrapperPass>();
446
602
AU.addPreserved <VirtRegMapWrapperLegacy>();
447
603
AU.addPreserved <LiveRegMatrixWrapperLegacy>();
604
+ AU.addPreserved <LiveStacksWrapperLegacy>();
605
+
448
606
AU.setPreservesAll ();
449
607
MachineFunctionPass::getAnalysisUsage (AU);
450
608
}
@@ -457,6 +615,7 @@ INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
457
615
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
458
616
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
459
617
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
618
+ INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
460
619
INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
461
620
" AMDGPU Rewrite AGPR-Copy-MFMA" , false , false )
462
621
@@ -475,8 +634,8 @@ bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
475
634
auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM ();
476
635
auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM ();
477
636
auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS ();
478
-
479
- AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, RegClassInfo);
637
+ auto &LSS = getAnalysis<LiveStacksWrapperLegacy>(). getLS ();
638
+ AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, LSS, RegClassInfo);
480
639
return Impl.run (MF);
481
640
}
482
641
@@ -486,13 +645,15 @@ AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
486
645
VirtRegMap &VRM = MFAM.getResult <VirtRegMapAnalysis>(MF);
487
646
LiveRegMatrix &LRM = MFAM.getResult <LiveRegMatrixAnalysis>(MF);
488
647
LiveIntervals &LIS = MFAM.getResult <LiveIntervalsAnalysis>(MF);
648
+ LiveStacks &LSS = MFAM.getResult <LiveStacksAnalysis>(MF);
489
649
RegisterClassInfo RegClassInfo;
490
650
RegClassInfo.runOnMachineFunction (MF);
491
651
492
- AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, RegClassInfo);
652
+ AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, LSS, RegClassInfo);
493
653
if (!Impl.run (MF))
494
654
return PreservedAnalyses::all ();
495
655
auto PA = getMachineFunctionPassPreservedAnalyses ();
496
656
PA.preserveSet <CFGAnalyses>();
657
+ PA.preserve <LiveStacksAnalysis>();
497
658
return PA;
498
659
}
0 commit comments