Skip to content

Commit 7d0fde1

Browse files
committed
Vulkan: Remove dedicated aligned matrix matrix multiplication shaders
1 parent 4375415 commit 7d0fde1

File tree

5 files changed

+662
-633
lines changed

5 files changed

+662
-633
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 29 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);
9999

100100
struct vk_matmul_pipeline_struct {
101101
vk_pipeline l, m, s;
102-
vk_pipeline a_l, a_m, a_s;
103102
};
104103

105104
typedef std::shared_ptr<vk_matmul_pipeline_struct> vk_matmul_pipeline;
@@ -1603,7 +1602,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
16031602
l_mmq_wg_denoms_k, m_mmq_wg_denoms_k, s_mmq_wg_denoms_k,
16041603
l_mmqid_wg_denoms, m_mmqid_wg_denoms, s_mmqid_wg_denoms;
16051604

1606-
uint32_t l_align, m_align, s_align;
16071605
if (device->coopmat2) {
16081606
// spec constants and tile sizes for non-quant matmul/matmul_id
16091607
l_warptile = { 256, 128, 256, 64, 1 };
@@ -1636,10 +1634,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
16361634
l_mmqid_wg_denoms = { 128, 64, 1 };
16371635
m_mmqid_wg_denoms = { 128, 64, 1 };
16381636
s_mmqid_wg_denoms = { 128, 64, 1 };
1639-
1640-
l_align = 128;
1641-
m_align = 64;
1642-
s_align = 32;
16431637
} else {
16441638
// Matrix cores require different warp group sizes
16451639
const uint32_t tm_l = device->coopmat_support ? device->coopmat_m : 4;
@@ -1663,9 +1657,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
16631657
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
16641658
m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 };
16651659
s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 };
1666-
l_align = 128;
1667-
m_align = 64;
1668-
s_align = 32;
16691660

16701661
for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) {
16711662
ggml_type t = (ggml_type)i;
@@ -1802,9 +1793,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
18021793
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->l, #NAMELC #F16ACC "_l", NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, 1); \
18031794
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->m, #NAMELC #F16ACC "_m", NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1); \
18041795
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->s, #NAMELC #F16ACC "_s", NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1); \
1805-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_l, #NAMELC #F16ACC "_aligned_l", NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
1806-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_m, #NAMELC #F16ACC "_aligned_m", NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
1807-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_s, #NAMELC #F16ACC "_aligned_s", NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
18081796

18091797
// Create 2 variants, {f16,f32} accumulator
18101798
#define CREATE_MM2(PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT) \
@@ -1866,12 +1854,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
18661854
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->m, #NAMELC #F16ACC "_m", NAMELC ## F16ACC ## _coopmat_len, NAMELC ## F16ACC ## _coopmat_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1, false, true); \
18671855
if (device->mul_mat ## ID ## _s[TYPE]) \
18681856
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->s, #NAMELC #F16ACC "_s", NAMELC ## F16ACC ## _coopmat_len, NAMELC ## F16ACC ## _coopmat_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1, false, true); \
1869-
if (device->mul_mat ## ID ## _l[TYPE]) \
1870-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_l, #NAMELC #F16ACC "_aligned_l", NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align, false, true); \
1871-
if (device->mul_mat ## ID ## _m[TYPE]) \
1872-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_m, #NAMELC #F16ACC "_aligned_m", NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align, false, true); \
1873-
if (device->mul_mat ## ID ## _s[TYPE]) \
1874-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_s, #NAMELC #F16ACC "_aligned_s", NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align, false, true); \
18751857

18761858
// Create 2 variants, {f16,f32} accumulator
18771859
#define CREATE_MM2(TYPE, PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID) \
@@ -1991,12 +1973,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
19911973
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->m, #NAMELC #F16ACC "_m", NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1); \
19921974
if (device->mul_mat ## ID ## _s[TYPE]) \
19931975
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->s, #NAMELC #F16ACC "_s", NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1); \
1994-
if (device->mul_mat ## ID ## _l[TYPE]) \
1995-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_l, #NAMELC #F16ACC "_aligned_l", NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
1996-
if (device->mul_mat ## ID ## _m[TYPE]) \
1997-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_m, #NAMELC #F16ACC "_aligned_m", NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
1998-
if (device->mul_mat ## ID ## _s[TYPE]) \
1999-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_s, #NAMELC #F16ACC "_aligned_s", NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
20001976

20011977
// Create 2 variants, {f16,f32} accumulator
20021978
#define CREATE_MM2(TYPE, PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID) \
@@ -2064,12 +2040,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
20642040
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->m, #NAMELC #F16ACC "_m", NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1); \
20652041
if (device->mul_mat ## ID ## _s[TYPE]) \
20662042
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->s, #NAMELC #F16ACC "_s", NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1); \
2067-
if (device->mul_mat ## ID ## _l[TYPE]) \
2068-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_l, #NAMELC #F16ACC "_aligned_l", NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
2069-
if (device->mul_mat ## ID ## _m[TYPE]) \
2070-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_m, #NAMELC #F16ACC "_aligned_m", NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
2071-
if (device->mul_mat ## ID ## _s[TYPE]) \
2072-
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_s, #NAMELC #F16ACC "_aligned_s", NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
20732043

20742044
CREATE_MM(GGML_TYPE_F32, pipeline_matmul_f32, matmul_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, );
20752045
CREATE_MM(GGML_TYPE_F32, pipeline_matmul_f32_f16, matmul_f32_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, );
@@ -4003,35 +3973,30 @@ static uint32_t ggml_vk_guess_split_k(ggml_backend_vk_context * ctx, int m, int
40033973
return split_k;
40043974
}
40053975

4006-
static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned, ggml_type src0_type) {
4007-
VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline(" << m << ", " << n << ", " << aligned << ", " << ggml_type_name(src0_type) << ")");
3976+
static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
3977+
VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline(" << m << ", " << n << ", " << ggml_type_name(src0_type) << ")");
40083978

40093979
if (ctx->device->coopmat2) {
40103980
// Use large shader when the N dimension is greater than the medium shader's tile size
40113981
uint32_t crossover_large = mmp->m->wg_denoms[1];
40123982
if ((ctx->device->mul_mat_l[src0_type] && (n > crossover_large)) || (!ctx->device->mul_mat_m[src0_type] && !ctx->device->mul_mat_s[src0_type])) {
4013-
return aligned ? mmp->a_l : mmp->l;
3983+
return mmp->l;
40143984
}
40153985
// Use medium shader when the N dimension is greater than the small shader's tile size
40163986
uint32_t crossover_medium = mmp->s->wg_denoms[1];
40173987
if ((ctx->device->mul_mat_m[src0_type] && (n > crossover_medium)) || !ctx->device->mul_mat_s[src0_type]) {
4018-
return aligned ? mmp->a_m : mmp->m;
3988+
return mmp->m;
40193989
}
4020-
return aligned ? mmp->a_s : mmp->s;
3990+
return mmp->s;
40213991
}
40223992

40233993
if ((ctx->device->mul_mat_s[src0_type] && (m <= 32 || n <= 32)) || (!ctx->device->mul_mat_m[src0_type] && !ctx->device->mul_mat_l[src0_type])) {
4024-
return aligned ? mmp->a_s : mmp->s;
3994+
return mmp->s;
40253995
}
40263996
if ((ctx->device->mul_mat_m[src0_type] && (m <= 64 || n <= 64)) || !ctx->device->mul_mat_l[src0_type]) {
4027-
return aligned ? mmp->a_m : mmp->m;
3997+
return mmp->m;
40283998
}
4029-
return aligned ? mmp->a_l : mmp->l;
4030-
}
4031-
4032-
static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
4033-
VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ", " << ggml_type_name(src0_type) << ")");
4034-
return ggml_vk_guess_matmul_pipeline(ctx, mmp, m, n, true, src0_type)->align;
3999+
return mmp->l;
40354000
}
40364001

40374002
static void ggml_vk_matmul(
@@ -4059,35 +4024,30 @@ static void ggml_vk_matmul(
40594024
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2.size() * sizeof(uint32_t), pc2.data(), { m * n * batch, 1, 1 });
40604025
}
40614026

4062-
static vk_pipeline ggml_vk_guess_matmul_id_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned, ggml_type src0_type) {
4063-
VK_LOG_DEBUG("ggml_vk_guess_matmul_id_pipeline(" << m << ", " << n << ", " << aligned << ", " << ggml_type_name(src0_type) << ")");
4027+
static vk_pipeline ggml_vk_guess_matmul_id_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, uint32_t m, uint32_t n, ggml_type src0_type) {
4028+
VK_LOG_DEBUG("ggml_vk_guess_matmul_id_pipeline(" << m << ", " << n << ", " << ggml_type_name(src0_type) << ")");
40644029

40654030
if (ctx->device->coopmat2) {
40664031
// Use large shader when the N dimension is greater than the medium shader's tile size
40674032
uint32_t crossover_large = mmp->m->wg_denoms[1];
40684033
if ((ctx->device->mul_mat_id_l[src0_type] && (n > crossover_large)) || (!ctx->device->mul_mat_id_m[src0_type] && !ctx->device->mul_mat_id_s[src0_type])) {
4069-
return aligned ? mmp->a_l : mmp->l;
4034+
return mmp->l;
40704035
}
40714036
// Use medium shader when the N dimension is greater than the small shader's tile size
40724037
uint32_t crossover_medium = mmp->s->wg_denoms[1];
40734038
if ((ctx->device->mul_mat_id_m[src0_type] && (n > crossover_medium)) || !ctx->device->mul_mat_id_s[src0_type]) {
4074-
return aligned ? mmp->a_m : mmp->m;
4039+
return mmp->m;
40754040
}
4076-
return aligned ? mmp->a_s : mmp->s;
4041+
return mmp->s;
40774042
}
40784043

40794044
if ((ctx->device->mul_mat_id_s[src0_type] && (m <= 32 || n <= 32)) || (!ctx->device->mul_mat_id_m[src0_type] && !ctx->device->mul_mat_id_l[src0_type])) {
4080-
return aligned ? mmp->a_s : mmp->s;
4045+
return mmp->s;
40814046
}
40824047
if ((ctx->device->mul_mat_id_m[src0_type] && (m <= 64 || n <= 64)) || !ctx->device->mul_mat_id_l[src0_type]) {
4083-
return aligned ? mmp->a_m : mmp->m;
4048+
return mmp->m;
40844049
}
4085-
return aligned ? mmp->a_l : mmp->l;
4086-
}
4087-
4088-
static uint32_t ggml_vk_guess_matmul_id_pipeline_align(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
4089-
VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ", " << ggml_type_name(src0_type) << ")");
4090-
return ggml_vk_guess_matmul_id_pipeline(ctx, mmp, m, n, true, src0_type)->align;
4050+
return mmp->l;
40914051
}
40924052

40934053
static void ggml_vk_matmul_id(
@@ -4265,10 +4225,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
42654225
// Not implemented
42664226
GGML_ASSERT(y_non_contig || !qy_needs_dequant); // NOLINT
42674227

4268-
const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ctx, mmp, ne01, ne11, qx_needs_dequant ? GGML_TYPE_F16 : src0->type));
4269-
const bool aligned = ne10 == kpad && ne01 > 8 && ne11 > 8;
4270-
4271-
vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline(ctx, mmp, ne01, ne11, aligned, qx_needs_dequant ? GGML_TYPE_F16 : src0->type);
4228+
vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline(ctx, mmp, ne01, ne11, qx_needs_dequant ? GGML_TYPE_F16 : src0->type);
42724229

42734230
// Reserve extra storage in the N dimension for the Y matrix, so we can avoid bounds-checking
42744231
uint32_t padded_n = qy_needs_dequant ? ROUNDUP_POW2(ne11, pipeline->wg_denoms[1]) :ne11;
@@ -4849,10 +4806,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
48494806
// Not implemented
48504807
GGML_ASSERT(y_non_contig || !qy_needs_dequant); // NOLINT
48514808

4852-
const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_id_pipeline_align(ctx, mmp, ne01, nei1, qx_needs_dequant ? GGML_TYPE_F16 : src0->type));
4853-
const bool aligned = ne10 == kpad && ne01 > 8 && nei1 > 8;
4854-
4855-
vk_pipeline pipeline = ggml_vk_guess_matmul_id_pipeline(ctx, mmp, ne01, nei1, aligned, qx_needs_dequant ? GGML_TYPE_F16 : src0->type);
4809+
vk_pipeline pipeline = ggml_vk_guess_matmul_id_pipeline(ctx, mmp, ne01, nei1, qx_needs_dequant ? GGML_TYPE_F16 : src0->type);
48564810

48574811
// Reserve extra storage in the N dimension for the Y matrix, so we can avoid bounds-checking
48584812
uint32_t padded_n = qy_needs_dequant ? ROUNDUP_POW2(ne11, pipeline->wg_denoms[1]) :ne11;
@@ -7218,34 +7172,24 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
72187172

72197173
vk_pipeline p;
72207174
std::string shname;
7175+
const size_t kpad = ggml_vk_align_size(k, p->align);
7176+
72217177
if (shader_size == 0) {
7222-
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->a_s : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->a_s;
7223-
shname = std::string(ggml_type_name(quant)) + "_ALIGNED_S";
7178+
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->s : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->s;
7179+
shname = std::string(ggml_type_name(quant)) + "_S";
72247180
} else if (shader_size == 1) {
7225-
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->a_m : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->a_m;
7226-
shname = std::string(ggml_type_name(quant)) + "_ALIGNED_M";
7181+
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->m : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->m;
7182+
shname = std::string(ggml_type_name(quant)) + "_M";
72277183
} else if (shader_size == 2) {
7228-
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->a_l : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->a_l;
7229-
shname = std::string(ggml_type_name(quant)) + "_ALIGNED_L";
7184+
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->l : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->l;
7185+
shname = std::string(ggml_type_name(quant)) + "_L";
72307186
} else {
72317187
GGML_ASSERT(0);
72327188
}
72337189

7234-
const size_t kpad = ggml_vk_align_size(k, p->align);
7235-
7236-
if (k != kpad) {
7237-
if (shader_size == 0) {
7238-
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->s : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->s;
7239-
shname = std::string(ggml_type_name(quant)) + "_S";
7240-
} else if (shader_size == 1) {
7241-
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->m : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->m;
7242-
shname = std::string(ggml_type_name(quant)) + "_M";
7243-
} else if (shader_size == 2) {
7244-
p = ctx->device->fp16 ? ctx->device->pipeline_dequant_mul_mat_mat[quant].f16acc->l : ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->l;
7245-
shname = std::string(ggml_type_name(quant)) + "_L";
7246-
} else {
7247-
GGML_ASSERT(0);
7248-
}
7190+
if (p == nullptr) {
7191+
std::cerr << "error: no pipeline for ggml_vk_test_dequant_matmul " << ggml_type_name(quant) << std::endl;
7192+
return;
72497193
}
72507194

72517195
const size_t x_sz = sizeof(float) * x_ne;

0 commit comments

Comments
 (0)