Skip to content

Commit 4e9c227

Browse files
committed
AMDGPU: Handle rewriting VGPR MFMAs with immediate src2
1 parent 228b088 commit 4e9c227

File tree

4 files changed

+18
-141
lines changed

4 files changed

+18
-141
lines changed

llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,12 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
242242
continue;
243243

244244
MachineOperand *Src2 = TII.getNamedOperand(*MFMA, AMDGPU::OpName::src2);
245-
if (!Src2->isReg())
246-
continue;
247-
248-
Register Src2Reg = Src2->getReg();
249-
if (!Src2Reg.isVirtual())
250-
continue;
245+
Register Src2Reg;
246+
if (Src2->isReg()) {
247+
Src2Reg = Src2->getReg();
248+
if (!Src2Reg.isVirtual())
249+
continue;
250+
}
251251

252252
// FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
253253
// of an AGPR or VGPR subclass, so we can't simply use the result on the
@@ -256,13 +256,15 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
256256
LLVM_DEBUG({
257257
dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
258258
<< " Dst=[" << printReg(VReg) << " => "
259-
<< printReg(PhysReg, &TRI) << ']';
259+
<< printReg(PhysReg, &TRI);
260260

261261
if (Src2Reg) {
262262
Register Src2PhysReg = VRM.getPhys(Src2Reg);
263263
dbgs() << ", Src2=[" << printReg(Src2Reg, &TRI) << " => "
264-
<< printReg(Src2PhysReg, &TRI) << "]: " << *MFMA;
264+
<< printReg(Src2PhysReg, &TRI);
265265
}
266+
267+
dbgs() << "]: " << *MFMA;
266268
});
267269

268270
const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass(MFMADstReg);

llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir

Lines changed: 0 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
ret void
1717
}
1818

19-
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 {
20-
ret void
21-
}
22-
2319
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2() #0 {
2420
ret void
2521
}
@@ -341,89 +337,6 @@ body: |
341337
342338
...
343339

344-
# Non-mac variant, src2 is an immediate.
345-
---
346-
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
347-
tracksRegLiveness: true
348-
machineFunctionInfo:
349-
isEntryFunction: true
350-
stackPtrOffsetReg: '$sgpr32'
351-
occupancy: 10
352-
sgprForEXECCopy: '$sgpr100_sgpr101'
353-
body: |
354-
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
355-
; CHECK: bb.0:
356-
; CHECK-NEXT: successors: %bb.1(0x80000000)
357-
; CHECK-NEXT: {{ $}}
358-
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
359-
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
360-
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
361-
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
362-
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
363-
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
364-
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
365-
; CHECK-NEXT: {{ $}}
366-
; CHECK-NEXT: bb.1:
367-
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
368-
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
369-
; CHECK-NEXT: {{ $}}
370-
; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
371-
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
372-
; CHECK-NEXT: S_BRANCH %bb.2
373-
; CHECK-NEXT: {{ $}}
374-
; CHECK-NEXT: bb.2:
375-
; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
376-
; CHECK-NEXT: {{ $}}
377-
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
378-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
379-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
380-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
381-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
382-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
383-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
384-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
385-
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
386-
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
387-
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
388-
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
389-
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
390-
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
391-
; CHECK-NEXT: S_ENDPGM 0
392-
bb.0:
393-
S_NOP 0, implicit-def $agpr0
394-
renamable $sgpr0 = S_MOV_B32 0
395-
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
396-
renamable $sgpr1 = COPY renamable $sgpr0
397-
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
398-
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
399-
%0.sub9:vreg_512_align2 = COPY %0.sub8
400-
401-
bb.1:
402-
liveins: $vcc
403-
404-
%0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec
405-
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
406-
S_BRANCH %bb.2
407-
408-
bb.2:
409-
; No VGPRs available for %0
410-
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
411-
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
412-
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
413-
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
414-
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
415-
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
416-
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
417-
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
418-
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
419-
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
420-
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
421-
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
422-
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
423-
S_ENDPGM 0
424-
425-
...
426-
427340
# Non-mac variant, src2 is a physical register
428341
---
429342
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2

llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,19 +1553,18 @@ body: |
15531553
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
15541554
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
15551555
; CHECK-NEXT: {{ $}}
1556-
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
1557-
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
1558-
; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
1559-
; CHECK-NEXT: renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr20_vgpr21, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec
1556+
; CHECK-NEXT: renamable $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
1557+
; CHECK-NEXT: early-clobber renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
1558+
; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
1559+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $agpr0_agpr1, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec
15601560
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
15611561
; CHECK-NEXT: S_BRANCH %bb.2
15621562
; CHECK-NEXT: {{ $}}
15631563
; CHECK-NEXT: bb.2:
1564-
; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
1564+
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
15651565
; CHECK-NEXT: {{ $}}
15661566
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
15671567
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
1568-
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
15691568
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
15701569
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
15711570
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
@@ -1903,14 +1902,13 @@ body: |
19031902
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
19041903
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
19051904
; CHECK-NEXT: {{ $}}
1906-
; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
1905+
; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
19071906
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
19081907
; CHECK-NEXT: S_BRANCH %bb.2
19091908
; CHECK-NEXT: {{ $}}
19101909
; CHECK-NEXT: bb.2:
1911-
; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
1910+
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
19121911
; CHECK-NEXT: {{ $}}
1913-
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
19141912
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
19151913
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
19161914
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23

llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -235,47 +235,11 @@ bb:
235235
ret void
236236
}
237237

238-
; TODO: Handle rewriting this case
239238
define void @test_rewrite_mfma_imm_src2(float %arg0, float %arg1) #0 {
240239
; CHECK-LABEL: test_rewrite_mfma_imm_src2:
241240
; CHECK: ; %bb.0: ; %bb
242241
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243-
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 2.0
244-
; CHECK-NEXT: s_nop 7
245-
; CHECK-NEXT: s_nop 7
246-
; CHECK-NEXT: s_nop 1
247-
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
248-
; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
249-
; CHECK-NEXT: v_accvgpr_write_b32 a2, v2
250-
; CHECK-NEXT: v_accvgpr_write_b32 a3, v3
251-
; CHECK-NEXT: v_accvgpr_write_b32 a4, v4
252-
; CHECK-NEXT: v_accvgpr_write_b32 a5, v5
253-
; CHECK-NEXT: v_accvgpr_write_b32 a6, v6
254-
; CHECK-NEXT: v_accvgpr_write_b32 a7, v7
255-
; CHECK-NEXT: v_accvgpr_write_b32 a8, v8
256-
; CHECK-NEXT: v_accvgpr_write_b32 a9, v9
257-
; CHECK-NEXT: v_accvgpr_write_b32 a10, v10
258-
; CHECK-NEXT: v_accvgpr_write_b32 a11, v11
259-
; CHECK-NEXT: v_accvgpr_write_b32 a12, v12
260-
; CHECK-NEXT: v_accvgpr_write_b32 a13, v13
261-
; CHECK-NEXT: v_accvgpr_write_b32 a14, v14
262-
; CHECK-NEXT: v_accvgpr_write_b32 a15, v15
263-
; CHECK-NEXT: v_accvgpr_write_b32 a16, v16
264-
; CHECK-NEXT: v_accvgpr_write_b32 a17, v17
265-
; CHECK-NEXT: v_accvgpr_write_b32 a18, v18
266-
; CHECK-NEXT: v_accvgpr_write_b32 a19, v19
267-
; CHECK-NEXT: v_accvgpr_write_b32 a20, v20
268-
; CHECK-NEXT: v_accvgpr_write_b32 a21, v21
269-
; CHECK-NEXT: v_accvgpr_write_b32 a22, v22
270-
; CHECK-NEXT: v_accvgpr_write_b32 a23, v23
271-
; CHECK-NEXT: v_accvgpr_write_b32 a24, v24
272-
; CHECK-NEXT: v_accvgpr_write_b32 a25, v25
273-
; CHECK-NEXT: v_accvgpr_write_b32 a26, v26
274-
; CHECK-NEXT: v_accvgpr_write_b32 a27, v27
275-
; CHECK-NEXT: v_accvgpr_write_b32 a28, v28
276-
; CHECK-NEXT: v_accvgpr_write_b32 a29, v29
277-
; CHECK-NEXT: v_accvgpr_write_b32 a30, v30
278-
; CHECK-NEXT: v_accvgpr_write_b32 a31, v31
242+
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, 2.0
279243
; CHECK-NEXT: ;;#ASMSTART
280244
; CHECK-NEXT: ; use a[0:31]
281245
; CHECK-NEXT: ;;#ASMEND

0 commit comments

Comments
 (0)