Skip to content

Commit a5ee6a1

Browse files
committed
xe: gemm: add 1D block load kernels
1 parent 1b1d658 commit a5ee6a1

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

src/gpu/intel/gemm/jit/selector/db/kernel.db

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,12 @@ auto _CATALOG_ = kcatalog::toArray({
11231123
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 49, -1}, {-1, 128, -1}, {16, 16, 1}, "ABIH"}, "at64x2+m48@40 am32x2+m64@48 aB wg 16x2 sys xaf vav hi sr br sb1 sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {16, 2, 1}, 1, (WGType) 1, 1, 0, 0, {16, 16, 2}, {true, true, true}}, {'E', 17, {112988, 212095, 0, 0, 0, 0, 0.55059, 2.48824, 1.62374, 4.12957, 0.00804382, 0.00804382, 0, 1, 1.18515, 0.998873, 1.5126e-12}}},
11241124
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 129, 1}, {-1, 768, 3583}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
11251125
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {3072, 448, 1}, {-1, -1, 8192}, {3072, 448, 1}, {-1, -1, 8192}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
1126+
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS32x2 aB16+S1,32@32 aB wg 32x1 cb3 ks64 nb 32x0 sys af vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {177711, 326055, 0, 0, 0, 0, 0.679089, 2.52106, 3.64744, 8.82899, 0.00792228, 0.00792228, 0, 1, 1.14501, 0.990098, 9.49038e-13}}},
1127+
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2 aB16+S1,64@24 aB wg 16x2 cb4 ks32 nb 0x2 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 16777216}, {32, 64, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {157777, 362177, 0, 0, 0, 0, 0.572884, 1.78845, 3.62824, 8.7431, 0.00788381, 0.00788381, 0, 0.999766, 1.14094, 0.993462, 9.76979e-13}}},
1128+
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS64x2+S16@24 aB16x2+S1,32@16 aB wg 32x1 cb4 ks64 ql nb 32x0 sys af vav hi sr br dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {157566, 330240, 0, 0, 0, 0, 0.42122, 2.57823, 3.39749, 8.48835, 0.00795599, 0.00795599, 0, 1, 1.18566, 0.994066, 1.5408e-12}}},
1129+
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16 aB16+S1,64@32 aB wg 32x1 cb3 ks64 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {177311, 325429, 0, 0, 0, 0, 0.782317, 2.61771, 3.64801, 8.81517, 0.00792667, 0.00792667, 0, 1, 1.16879, 0.998113, 1.3455e-12}}},
1130+
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2 aB16+S1,64@32 aB wg 32x1 cb3 ks32 nb 32x0 sys xaf rr fx vav hi sr br dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 32}, {32, 1, 1}, 1, (WGType) 1, 257, 24576, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {163390, 386204, 0, 0, 0, 0, 0.734651, 2.51864, 3.59783, 8.77027, 0.00788992, 0.00788992, 0, 1, 1.15024, 0.992801, 1.00385e-12}}},
1131+
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS64+S16@24 aB16+S1,32@24 aB wg 32x1 cb4 ks32 ql sys xaf rr fx vav hi sr br sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {140479, 331595, 0, 0, 0, 0, 0.402177, 2.6016, 3.38947, 8.45442, 0.00828123, 0.00828123, 0, 0.9984, 1.18131, 1.00933, -2.28559e-13}}},
11261132
{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at32+m128@96 am32x2+m64@96 aB wg 2x16 vav hi pt sr br sb128 bk0 grf256 sys acb cr16", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {2097152, 262144, 16777216}, {2097152, 262144, 16777216}, {128, 16, 32}, {2, 16, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {879529, 62860.9, 0, 0, 0, 0, 1.12572, 1.9182, 3.81465, 7.84556, 0.00532516, 0.00532516, 0, 1, 1.01261, 1.00705, -3.00232e-14}}},
11271133
{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at64+m128@32 am128+m64@32 aB wg 2x8 xaf st hi pt sr br sb128 sn grf256 cr0 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 262144, 16777216}, {1048576, 262144, 16777216}, {64, 16, 128}, {2, 8, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {525002, 18498.2, 0, 0, 0, 0, 0.485217, 0.854072, 1.96694, 5.31108, 0.00356541, 0.00161949, 0.00452935, 0.938224, 1.01441, 1.01414, -4.57758e-14}}},
11281134
{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "ABI"}, "at128+m64@48 am128+m32@48 aB wg 2x1x4 ikr af hi pt sr br sb128 grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 4}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}},
@@ -1197,6 +1203,12 @@ auto _CATALOG_ = kcatalog::toArray({
11971203
{{'G', "gemm", {"N", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 129, 1}, {-1, 768, 3583}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 xaf st acb hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
11981204
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys nmk np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}},
11991205
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 64, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys nmk grf256 np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}},
1206+
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2+S64@8 aB16+S1,64@48 aB wg 32x1 cb4 ks32 nb 32x0 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 32}, {32, 1, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {162082, 443372, 0, 0, 0, 0, 0.76118, 2.51186, 3.55751, 8.72228, 0.00790848, 0.00790848, 0, 0.979938, 1.20119, 0.998485, 1.51113e-12}}},
1207+
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2+S64@40 aB16+S1,64@32 aB wg 16x1 cb4 ks16 ql nb 16x0 sys xaf st fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 16}, {16, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {374406, 217482, 0, 0, 0, 0, 0.682632, 1.71074, 3.59009, 8.65312, 0.00785572, 0.0076963, 0.000322578, 1, 1.30683, 1.0247, 2.57093e-12}}},
1208+
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16+S64@8 aB16+S1,64@8 aB wg 16x1 cb4 ks16 nb 16x0 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 16}, {16, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {330322, 171050, 0, 0, 0, 0, 0.820923, 1.80509, 3.64403, 8.71665, 0.00800297, 0.00602537, 0.00217363, 1, 1.24875, 1.00016, 2.09286e-12}}},
1209+
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS32x2+S64@40 aB16x2+S1,32@8 aB wg 16x1 cb4 ks32 ql sys xaf rr fx vav hi sr br dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 32}, {16, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {316428, 148920, 0, 0, 0, 0, 0.39205, 1.56724, 3.44087, 8.51925, 0.00794168, 0.00568289, 0.00221721, 1, 1.2479, 0.990352, 2.86173e-12}}},
1210+
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS32x2 aB32+S64@112 aB wg 32x1 cb4x2 ks64 ql nb 32x0 sys xaf st vav hi sr br dm", {16, (LoopType) 255, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 524288, 16777216}, {262144, 524288, 16777216}, {16, 32, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {306996, 213072, 0, 0, 0, 0, 0.420192, 2.6846, 3.27894, 8.35946, 0.0101853, 0.00869937, 0.000219736, 0.705509, 1.3409, 1.00864, 4.14841e-12}}},
1211+
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2+S16@104 aB16+S1,16@8 aB wg 16x2 cb3 ks32 nb 0x2 sys xaf fx vav hi sr br sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {184248, 434316, 0, 0, 0, 0, 1.58876, 1.89817, 3.55657, 8.66518, 0.007961, 0.007961, 0, 0.997444, 1.22188, 0.996053, 1.66019e-12}}},
12001212
{{'G', "gemm", {"Q", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32+m32@16 am64 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys st np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'E', 17, {231324, 37214.5, 12077.2, 7211.62, 0, 0, 0.533846, 2.1714, 0.354359, 3.98714, 0.0299984, 0.0213193, 0.00594342, 0.55014, 1.01154, 1.13636, -1.55278e-12}}},
12011213
{{'G', "gemm", {"Q", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 32, -1}, {16, 16, 1}, "ABIh"}, "at32+m32@32 am64+m64@64 aB wg 4x1x4 vav sr br sb64 bm0 bk0 sys xaf sm sn st np grf256 ikr", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'E', 17, {225586, 69187.9, 10490.6, 15759.8, 0, 0, 0.601267, 1.84174, 0.789366, 4.16685, 0.0178126, 0.0173575, 0.000615127, 0.366169, 1.06063, 0.98351, 1.79892e-12}}},
12021214
{{'G', "gemm", {"Q", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 32, -1}, {16, 16, 1}, "ABIh"}, "at32+m32@32 am64+m64@64 aB wg 4x1x2 vav sr br sb64 bm0 bk0 sys xaf sm sn st np grf256 ikr", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 2}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'E', 17, {205260, 23043.8, 36245.3, 16259.7, 0, 0, 0.500127, 1.78186, 1.52199, 6.05532, 0.013611, 0.00830487, 0.00315919, 0.504271, 1.18638, 1.05777, 6.06973e-14}}},

0 commit comments

Comments
 (0)