You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// lane is (0-31) mod 16 instead of 0-31 due to matrix replication in RDNA3
77
75
for( intele=0; ele<WMMA_DATA_WIDTH; ++ele )
78
76
{
79
77
b_frag[ele] =b[16*ele+lane];
80
-
}
81
-
82
-
for( intele=0; ele<WMMA_DATA_WIDTH; ++ele )
83
-
{
84
78
a_frag[ele] =a[16*lane+ele];
85
79
}
86
80
#endif
87
81
// call the WMMA compiler intrinsic
88
82
// more details available in the RDNA3 ISA guide - https://developer.amd.com/wp-content/resources/RDNA3_Shader_ISA_December2022.pdf
83
+
// more details available in the RDNA4 ISA guide - https://www.amd.com/content/dam/amd/en/documents/radeon-tech-docs/instruction-set-architectures/rdna4-instruction-set-architecture.pdf
89
84
// the last parameter is called "OPSEL" which decides which half of the VGPRs of c_frag the results are stored into
0 commit comments