Skip to content

Commit 8b45c13

Browse files
committed
AMDGPU: Add some baseline test for mfma rewrite with subregister copies
Currently only cases rooted at a full copy of an MFMA result are handled. Prepare to relax that by testing more intricate subregister usage. Currently only full copies are handled, add some tests to help work towards handling subregisters.
1 parent 156f3fc commit 8b45c13

File tree

3 files changed

+614
-0
lines changed

3 files changed

+614
-0
lines changed
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -o - %s | FileCheck %s
3+
4+
# V-to-A copy is a subregister insert
5+
---
6+
name: test_rewrite_mfma_copy_subreg_insert
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0:
10+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
11+
12+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_insert
13+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
16+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
17+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
18+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
19+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
20+
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
21+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
22+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
23+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
24+
; CHECK-NEXT: SI_RETURN
25+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
26+
%1:av_64_align2 = COPY $vgpr0_vgpr1
27+
%2:av_64_align2 = COPY $vgpr2_vgpr3
28+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
29+
%4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
30+
undef %5.sub0_sub1:areg_128_align2 = COPY %4
31+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %5
32+
GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1)
33+
GLOBAL_STORE_DWORDX2 %0, %5.sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
34+
SI_RETURN
35+
...
36+
37+
# V-to-A copy is a subregister extract
38+
---
39+
name: test_rewrite_mfma_copy_subreg_extract
40+
tracksRegLiveness: true
41+
body: |
42+
bb.0:
43+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
44+
45+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_extract
46+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
47+
; CHECK-NEXT: {{ $}}
48+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
49+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
50+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
51+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
52+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
53+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
54+
; CHECK-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
55+
; CHECK-NEXT: SI_RETURN
56+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
57+
%1:av_64_align2 = COPY $vgpr0_vgpr1
58+
%2:av_64_align2 = COPY $vgpr2_vgpr3
59+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
60+
%4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
61+
%5:agpr_32 = COPY %4.sub0
62+
GLOBAL_STORE_DWORD %0, %5, 0, 0, implicit $exec :: (store (s32), addrspace 1)
63+
SI_RETURN
64+
...
65+
66+
# V-to-A copy is a subregister-to-subregister copy
67+
---
68+
name: test_rewrite_mfma_copy_subreg_insert_extract_same_subreg
69+
tracksRegLiveness: true
70+
body: |
71+
bb.0:
72+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
73+
74+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_insert_extract_same_subreg
75+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
78+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
79+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
80+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
81+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
82+
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
83+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
84+
; CHECK-NEXT: SI_RETURN
85+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
86+
%1:av_64_align2 = COPY $vgpr0_vgpr1
87+
%2:av_64_align2 = COPY $vgpr2_vgpr3
88+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
89+
%4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
90+
undef %5.sub0:areg_64_align2 = COPY %4.sub0
91+
GLOBAL_STORE_DWORDX2 %0, %5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
92+
SI_RETURN
93+
...
94+
95+
# V-to-A copy is a subregister-to-subregister copy
96+
---
97+
name: test_rewrite_mfma_copy_subreg_insert_extract_different_subreg
98+
tracksRegLiveness: true
99+
body: |
100+
bb.0:
101+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
102+
103+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_insert_extract_different_subreg
104+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
105+
; CHECK-NEXT: {{ $}}
106+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
107+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
108+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
109+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
110+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
111+
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub1
112+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
113+
; CHECK-NEXT: SI_RETURN
114+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
115+
%1:av_64_align2 = COPY $vgpr0_vgpr1
116+
%2:av_64_align2 = COPY $vgpr2_vgpr3
117+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
118+
%4:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
119+
undef %5.sub0:areg_64_align2 = COPY %4.sub1
120+
GLOBAL_STORE_DWORDX2 %0, %5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
121+
SI_RETURN
122+
...
123+
124+
# V-to-A copy is a subregister extract, from a subregister def
125+
---
126+
name: test_rewrite_mfma_copy_subreg_extract_from_subreg_def
127+
tracksRegLiveness: true
128+
body: |
129+
bb.0:
130+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
131+
132+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_extract_from_subreg_def
133+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
134+
; CHECK-NEXT: {{ $}}
135+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
136+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
137+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
138+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
139+
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
140+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
141+
; CHECK-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
142+
; CHECK-NEXT: SI_RETURN
143+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
144+
%1:av_64_align2 = COPY $vgpr0_vgpr1
145+
%2:av_64_align2 = COPY $vgpr2_vgpr3
146+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
147+
undef %4.sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
148+
%5:agpr_32 = COPY %4.sub0
149+
GLOBAL_STORE_DWORD %0, %5, 0, 0, implicit $exec :: (store (s32), addrspace 1)
150+
SI_RETURN
151+
...
152+
153+
# V-to-A copy is a subregister insert from a subregister_def
154+
---
155+
name: test_rewrite_mfma_copy_subreg_insert_from_subreg_def_tuple
156+
tracksRegLiveness: true
157+
body: |
158+
bb.0:
159+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
160+
161+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_insert_from_subreg_def_tuple
162+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
163+
; CHECK-NEXT: {{ $}}
164+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
165+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
166+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
167+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
168+
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
169+
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2_sub3
170+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
171+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
172+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
173+
; CHECK-NEXT: SI_RETURN
174+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
175+
%1:av_64_align2 = COPY $vgpr0_vgpr1
176+
%2:av_64_align2 = COPY $vgpr2_vgpr3
177+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
178+
undef %4.sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
179+
undef %5.sub0_sub1:areg_128_align2 = COPY %4.sub2_sub3
180+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %5
181+
GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1)
182+
GLOBAL_STORE_DWORDX2 %0, %5.sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
183+
SI_RETURN
184+
...
185+
186+
# V-to-A copy is a subregister insert of a subregister from a
187+
# subregister_def
188+
---
189+
name: test_rewrite_mfma_copy_subreg_insert_from_subreg_def_subreg
190+
tracksRegLiveness: true
191+
body: |
192+
bb.0:
193+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
194+
195+
; CHECK-LABEL: name: test_rewrite_mfma_copy_subreg_insert_from_subreg_def_subreg
196+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
197+
; CHECK-NEXT: {{ $}}
198+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
199+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
200+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
201+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
202+
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
203+
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2
204+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
205+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
206+
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
207+
; CHECK-NEXT: SI_RETURN
208+
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
209+
%1:av_64_align2 = COPY $vgpr0_vgpr1
210+
%2:av_64_align2 = COPY $vgpr2_vgpr3
211+
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
212+
undef %4.sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %3, 0, 0, 0, implicit $mode, implicit $exec
213+
undef %5.sub1:areg_128_align2 = COPY %4.sub2
214+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %5
215+
GLOBAL_STORE_DWORDX4 %0, %5, 0, 0, implicit $exec :: (store (s128), addrspace 1)
216+
GLOBAL_STORE_DWORDX2 %0, %5.sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
217+
SI_RETURN
218+
...

0 commit comments

Comments
 (0)