Skip to content

Commit 22d2495

Browse files
committed
AMDGPU: Handle V->A MFMA copy from case with immediate src2
Handle a special case for copies from AGPR VGPR on the MFMA inputs. If the "input" is really a subregister def, we will not see the usual copy to VGPR for src2, only the read of the subregister def. Not sure if this pattern appears in practice.
1 parent 8a87d16 commit 22d2495

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -377,13 +377,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
377377
Register CopyDstReg = UseMI.getOperand(0).getReg();
378378
if (!CopyDstReg.isVirtual())
379379
continue;
380+
for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
381+
if (!CopyUseMO.readsReg())
382+
continue;
380383

381-
for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) {
384+
MachineInstr &CopyUseMI = *CopyUseMO.getParent();
382385
if (isRewriteCandidate(CopyUseMI)) {
383-
const MachineOperand *Op =
384-
CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr);
385-
if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(),
386-
VRM.getPhys(Op->getReg())))
386+
if (tryReassigningMFMAChain(CopyUseMI, CopyUseMO.getOperandNo(),
387+
VRM.getPhys(CopyUseMO.getReg())))
387388
MadeChange = true;
388389
}
389390
}

llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,8 @@ body: |
187187
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
188188
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
189189
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
190-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
191-
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
190+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
191+
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
192192
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
193193
; CHECK-NEXT: SI_RETURN
194194
%0:vreg_64_align2 = COPY $vgpr4_vgpr5

0 commit comments

Comments
 (0)