Skip to content

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 11, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 11, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/153016.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+10-8)
  • (modified) llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir (-87)
  • (modified) llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir (+7-9)
  • (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll (+1-37)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 8c6aee89b7375..a8dfdbe5dd494 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -242,12 +242,12 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
         continue;
 
       MachineOperand *Src2 = TII.getNamedOperand(*MFMA, AMDGPU::OpName::src2);
-      if (!Src2->isReg())
-        continue;
-
-      Register Src2Reg = Src2->getReg();
-      if (!Src2Reg.isVirtual())
-        continue;
+      Register Src2Reg;
+      if (Src2->isReg()) {
+        Src2Reg = Src2->getReg();
+        if (!Src2Reg.isVirtual())
+          continue;
+      }
 
       // FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
       // of an AGPR or VGPR subclass, so we can't simply use the result on the
@@ -256,13 +256,15 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
       LLVM_DEBUG({
         dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
                << " Dst=[" << printReg(VReg) << " => "
-               << printReg(PhysReg, &TRI) << ']';
+               << printReg(PhysReg, &TRI);
 
         if (Src2Reg) {
           Register Src2PhysReg = VRM.getPhys(Src2Reg);
           dbgs() << ", Src2=[" << printReg(Src2Reg, &TRI) << " => "
-                 << printReg(Src2PhysReg, &TRI) << "]: " << *MFMA;
+                 << printReg(Src2PhysReg, &TRI);
         }
+
+        dbgs() << "]: " << *MFMA;
       });
 
       const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass(MFMADstReg);
diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir
index b53bde6bfd281..3103d635200c6 100644
--- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir
@@ -16,10 +16,6 @@
     ret void
   }
 
-  define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 {
-    ret void
-  }
-
   define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2() #0 {
     ret void
   }
@@ -341,89 +337,6 @@ body:             |
 
 ...
 
-# Non-mac variant, src2 is an immediate.
----
-name:            inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
-tracksRegLiveness: true
-machineFunctionInfo:
-  isEntryFunction: true
-  stackPtrOffsetReg: '$sgpr32'
-  occupancy:       10
-  sgprForEXECCopy: '$sgpr100_sgpr101'
-body:             |
-  ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $agpr0
-  ; CHECK-NEXT:   renamable $sgpr0 = S_MOV_B32 0
-  ; CHECK-NEXT:   renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr1 = COPY renamable $sgpr0
-  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
-  ; CHECK-NEXT:   renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
-  ; CHECK-NEXT:   dead renamable $vgpr9 = COPY renamable $vgpr8
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT:   liveins: $vcc, $vgpr0_vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
-  ; CHECK-NEXT:   S_BRANCH %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
-  ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
-  ; CHECK-NEXT:   S_ENDPGM 0
-  bb.0:
-    S_NOP 0, implicit-def $agpr0
-    renamable $sgpr0 = S_MOV_B32 0
-    undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
-    renamable $sgpr1 = COPY renamable $sgpr0
-    %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
-    renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
-    %0.sub9:vreg_512_align2 = COPY %0.sub8
-
-  bb.1:
-    liveins: $vcc
-
-    %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec
-    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
-    S_BRANCH %bb.2
-
-  bb.2:
-    ; No VGPRs available for %0
-    S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
-    S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
-    S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
-    S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
-    S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
-    GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
-    GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
-    GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
-    S_ENDPGM 0
-
-...
-
 # Non-mac variant, src2 is a physical register
 ---
 name:            inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2
diff --git a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
index dcb45cc90ca68..3de86da766af7 100644
--- a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
@@ -1553,19 +1553,18 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $vcc, $vgpr18_vgpr19
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
-  ; CHECK-NEXT:   early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr20_vgpr21, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   renamable $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+  ; CHECK-NEXT:   early-clobber renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $agpr0_agpr1, $vgpr18_vgpr19, 0, 0, 0, 0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
+  ; CHECK-NEXT:   liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-  ; CHECK-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
@@ -1903,14 +1902,13 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $vcc, $vgpr0_vgpr1
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
+  ; CHECK-NEXT:   liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15:0x00000000FFFFFFFF
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 66002fff12155..4bc0270e61a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -235,47 +235,11 @@ bb:
   ret void
 }
 
-; TODO: Handle rewriting this case
 define void @test_rewrite_mfma_imm_src2(float %arg0, float %arg1) #0 {
 ; CHECK-LABEL: test_rewrite_mfma_imm_src2:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 2.0
-; CHECK-NEXT:    s_nop 7
-; CHECK-NEXT:    s_nop 7
-; CHECK-NEXT:    s_nop 1
-; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0
-; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1
-; CHECK-NEXT:    v_accvgpr_write_b32 a2, v2
-; CHECK-NEXT:    v_accvgpr_write_b32 a3, v3
-; CHECK-NEXT:    v_accvgpr_write_b32 a4, v4
-; CHECK-NEXT:    v_accvgpr_write_b32 a5, v5
-; CHECK-NEXT:    v_accvgpr_write_b32 a6, v6
-; CHECK-NEXT:    v_accvgpr_write_b32 a7, v7
-; CHECK-NEXT:    v_accvgpr_write_b32 a8, v8
-; CHECK-NEXT:    v_accvgpr_write_b32 a9, v9
-; CHECK-NEXT:    v_accvgpr_write_b32 a10, v10
-; CHECK-NEXT:    v_accvgpr_write_b32 a11, v11
-; CHECK-NEXT:    v_accvgpr_write_b32 a12, v12
-; CHECK-NEXT:    v_accvgpr_write_b32 a13, v13
-; CHECK-NEXT:    v_accvgpr_write_b32 a14, v14
-; CHECK-NEXT:    v_accvgpr_write_b32 a15, v15
-; CHECK-NEXT:    v_accvgpr_write_b32 a16, v16
-; CHECK-NEXT:    v_accvgpr_write_b32 a17, v17
-; CHECK-NEXT:    v_accvgpr_write_b32 a18, v18
-; CHECK-NEXT:    v_accvgpr_write_b32 a19, v19
-; CHECK-NEXT:    v_accvgpr_write_b32 a20, v20
-; CHECK-NEXT:    v_accvgpr_write_b32 a21, v21
-; CHECK-NEXT:    v_accvgpr_write_b32 a22, v22
-; CHECK-NEXT:    v_accvgpr_write_b32 a23, v23
-; CHECK-NEXT:    v_accvgpr_write_b32 a24, v24
-; CHECK-NEXT:    v_accvgpr_write_b32 a25, v25
-; CHECK-NEXT:    v_accvgpr_write_b32 a26, v26
-; CHECK-NEXT:    v_accvgpr_write_b32 a27, v27
-; CHECK-NEXT:    v_accvgpr_write_b32 a28, v28
-; CHECK-NEXT:    v_accvgpr_write_b32 a29, v29
-; CHECK-NEXT:    v_accvgpr_write_b32 a30, v30
-; CHECK-NEXT:    v_accvgpr_write_b32 a31, v31
+; CHECK-NEXT:    v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, 2.0
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use a[0:31]
 ; CHECK-NEXT:    ;;#ASMEND

Copy link
Contributor

@srpande srpande left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addresses the first MFMA chain where C Matrix is zero.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-rewrite-vgpr-mfma-with-imm-src2-2 branch from c289a31 to 4e9c227 Compare August 12, 2025 01:30
@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-untied-agpr-mfma-rewrite-2 branch 2 times, most recently from 228b088 to 5baebfb Compare August 18, 2025 15:31
@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-rewrite-vgpr-mfma-with-imm-src2-2 branch from 4e9c227 to 58720b8 Compare August 18, 2025 15:31
Base automatically changed from users/arsenm/amdgpu/handle-untied-agpr-mfma-rewrite-2 to main August 20, 2025 23:16
@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-rewrite-vgpr-mfma-with-imm-src2-2 branch from 58720b8 to 25b15ea Compare August 20, 2025 23:23
@arsenm arsenm merged commit 156f3fc into main Aug 21, 2025
9 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/handle-rewrite-vgpr-mfma-with-imm-src2-2 branch August 21, 2025 00:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants