@@ -99,7 +99,6 @@ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);
99
99
100
100
struct vk_matmul_pipeline_struct {
101
101
vk_pipeline l, m, s;
102
- vk_pipeline a_l, a_m, a_s;
103
102
};
104
103
105
104
typedef std::shared_ptr<vk_matmul_pipeline_struct> vk_matmul_pipeline;
@@ -1603,7 +1602,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1603
1602
l_mmq_wg_denoms_k, m_mmq_wg_denoms_k, s_mmq_wg_denoms_k,
1604
1603
l_mmqid_wg_denoms, m_mmqid_wg_denoms, s_mmqid_wg_denoms;
1605
1604
1606
- uint32_t l_align, m_align, s_align;
1607
1605
if (device->coopmat2 ) {
1608
1606
// spec constants and tile sizes for non-quant matmul/matmul_id
1609
1607
l_warptile = { 256 , 128 , 256 , 64 , 1 };
@@ -1636,10 +1634,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1636
1634
l_mmqid_wg_denoms = { 128 , 64 , 1 };
1637
1635
m_mmqid_wg_denoms = { 128 , 64 , 1 };
1638
1636
s_mmqid_wg_denoms = { 128 , 64 , 1 };
1639
-
1640
- l_align = 128 ;
1641
- m_align = 64 ;
1642
- s_align = 32 ;
1643
1637
} else {
1644
1638
// Matrix cores require different warp group sizes
1645
1639
const uint32_t tm_l = device->coopmat_support ? device->coopmat_m : 4 ;
@@ -1663,9 +1657,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1663
1657
l_mmq_wg_denoms = l_wg_denoms = {128 , 128 , 1 };
1664
1658
m_mmq_wg_denoms = m_wg_denoms = { 64 , 64 , 1 };
1665
1659
s_mmq_wg_denoms = s_wg_denoms = { 32 , 32 , 1 };
1666
- l_align = 128 ;
1667
- m_align = 64 ;
1668
- s_align = 32 ;
1669
1660
1670
1661
for (uint32_t i = 0 ; i < GGML_TYPE_COUNT; ++i) {
1671
1662
ggml_type t = (ggml_type)i;
@@ -1802,9 +1793,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1802
1793
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->l , #NAMELC #F16ACC " _l" , NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, 1 ); \
1803
1794
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 ); \
1804
1795
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 ); \
1805
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
1806
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
1807
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
1808
1796
1809
1797
// Create 2 variants, {f16,f32} accumulator
1810
1798
#define CREATE_MM2 (PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT ) \
@@ -1866,12 +1854,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1866
1854
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _coopmat_len, NAMELC ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 , false , true ); \
1867
1855
if (device->mul_mat ## ID ## _s[TYPE]) \
1868
1856
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _coopmat_len, NAMELC ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 , false , true ); \
1869
- if (device->mul_mat ## ID ## _l[TYPE]) \
1870
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align, false , true ); \
1871
- if (device->mul_mat ## ID ## _m[TYPE]) \
1872
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align, false , true ); \
1873
- if (device->mul_mat ## ID ## _s[TYPE]) \
1874
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align, false , true ); \
1875
1857
1876
1858
// Create 2 variants, {f16,f32} accumulator
1877
1859
#define CREATE_MM2 (TYPE, PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID ) \
@@ -1991,12 +1973,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1991
1973
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 ); \
1992
1974
if (device->mul_mat ## ID ## _s[TYPE]) \
1993
1975
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 ); \
1994
- if (device->mul_mat ## ID ## _l[TYPE]) \
1995
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
1996
- if (device->mul_mat ## ID ## _m[TYPE]) \
1997
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
1998
- if (device->mul_mat ## ID ## _s[TYPE]) \
1999
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
2000
1976
2001
1977
// Create 2 variants, {f16,f32} accumulator
2002
1978
#define CREATE_MM2 (TYPE, PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID ) \
@@ -2064,12 +2040,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
2064
2040
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 ); \
2065
2041
if (device->mul_mat ## ID ## _s[TYPE]) \
2066
2042
ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 ); \
2067
- if (device->mul_mat ## ID ## _l[TYPE]) \
2068
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
2069
- if (device->mul_mat ## ID ## _m[TYPE]) \
2070
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
2071
- if (device->mul_mat ## ID ## _s[TYPE]) \
2072
- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
2073
2043
2074
2044
CREATE_MM (GGML_TYPE_F32, pipeline_matmul_f32, matmul_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3 , );
2075
2045
CREATE_MM (GGML_TYPE_F32, pipeline_matmul_f32_f16, matmul_f32_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3 , );
@@ -4003,35 +3973,30 @@ static uint32_t ggml_vk_guess_split_k(ggml_backend_vk_context * ctx, int m, int
4003
3973
return split_k;
4004
3974
}
4005
3975
4006
- static vk_pipeline ggml_vk_guess_matmul_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned, ggml_type src0_type) {
4007
- VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline(" << m << " , " << n << " , " << aligned << " , " << ggml_type_name (src0_type) << " )" );
3976
+ static vk_pipeline ggml_vk_guess_matmul_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
3977
+ VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
4008
3978
4009
3979
if (ctx->device ->coopmat2 ) {
4010
3980
// Use large shader when the N dimension is greater than the medium shader's tile size
4011
3981
uint32_t crossover_large = mmp->m ->wg_denoms [1 ];
4012
3982
if ((ctx->device ->mul_mat_l [src0_type] && (n > crossover_large)) || (!ctx->device ->mul_mat_m [src0_type] && !ctx->device ->mul_mat_s [src0_type])) {
4013
- return aligned ? mmp-> a_l : mmp->l ;
3983
+ return mmp->l ;
4014
3984
}
4015
3985
// Use medium shader when the N dimension is greater than the small shader's tile size
4016
3986
uint32_t crossover_medium = mmp->s ->wg_denoms [1 ];
4017
3987
if ((ctx->device ->mul_mat_m [src0_type] && (n > crossover_medium)) || !ctx->device ->mul_mat_s [src0_type]) {
4018
- return aligned ? mmp-> a_m : mmp->m ;
3988
+ return mmp->m ;
4019
3989
}
4020
- return aligned ? mmp-> a_s : mmp->s ;
3990
+ return mmp->s ;
4021
3991
}
4022
3992
4023
3993
if ((ctx->device ->mul_mat_s [src0_type] && (m <= 32 || n <= 32 )) || (!ctx->device ->mul_mat_m [src0_type] && !ctx->device ->mul_mat_l [src0_type])) {
4024
- return aligned ? mmp-> a_s : mmp->s ;
3994
+ return mmp->s ;
4025
3995
}
4026
3996
if ((ctx->device ->mul_mat_m [src0_type] && (m <= 64 || n <= 64 )) || !ctx->device ->mul_mat_l [src0_type]) {
4027
- return aligned ? mmp-> a_m : mmp->m ;
3997
+ return mmp->m ;
4028
3998
}
4029
- return aligned ? mmp->a_l : mmp->l ;
4030
- }
4031
-
4032
- static uint32_t ggml_vk_guess_matmul_pipeline_align (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
4033
- VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline_align(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
4034
- return ggml_vk_guess_matmul_pipeline (ctx, mmp, m, n, true , src0_type)->align ;
3999
+ return mmp->l ;
4035
4000
}
4036
4001
4037
4002
static void ggml_vk_matmul (
@@ -4059,35 +4024,30 @@ static void ggml_vk_matmul(
4059
4024
ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_matmul_split_k_reduce , { split_k_buffer, d }, pc2.size () * sizeof (uint32_t ), pc2.data (), { m * n * batch, 1 , 1 });
4060
4025
}
4061
4026
4062
- static vk_pipeline ggml_vk_guess_matmul_id_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned , ggml_type src0_type) {
4063
- VK_LOG_DEBUG (" ggml_vk_guess_matmul_id_pipeline(" << m << " , " << n << " , " << aligned << " , " << ggml_type_name (src0_type) << " )" );
4027
+ static vk_pipeline ggml_vk_guess_matmul_id_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, uint32_t m, uint32_t n , ggml_type src0_type) {
4028
+ VK_LOG_DEBUG (" ggml_vk_guess_matmul_id_pipeline(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
4064
4029
4065
4030
if (ctx->device ->coopmat2 ) {
4066
4031
// Use large shader when the N dimension is greater than the medium shader's tile size
4067
4032
uint32_t crossover_large = mmp->m ->wg_denoms [1 ];
4068
4033
if ((ctx->device ->mul_mat_id_l [src0_type] && (n > crossover_large)) || (!ctx->device ->mul_mat_id_m [src0_type] && !ctx->device ->mul_mat_id_s [src0_type])) {
4069
- return aligned ? mmp-> a_l : mmp->l ;
4034
+ return mmp->l ;
4070
4035
}
4071
4036
// Use medium shader when the N dimension is greater than the small shader's tile size
4072
4037
uint32_t crossover_medium = mmp->s ->wg_denoms [1 ];
4073
4038
if ((ctx->device ->mul_mat_id_m [src0_type] && (n > crossover_medium)) || !ctx->device ->mul_mat_id_s [src0_type]) {
4074
- return aligned ? mmp-> a_m : mmp->m ;
4039
+ return mmp->m ;
4075
4040
}
4076
- return aligned ? mmp-> a_s : mmp->s ;
4041
+ return mmp->s ;
4077
4042
}
4078
4043
4079
4044
if ((ctx->device ->mul_mat_id_s [src0_type] && (m <= 32 || n <= 32 )) || (!ctx->device ->mul_mat_id_m [src0_type] && !ctx->device ->mul_mat_id_l [src0_type])) {
4080
- return aligned ? mmp-> a_s : mmp->s ;
4045
+ return mmp->s ;
4081
4046
}
4082
4047
if ((ctx->device ->mul_mat_id_m [src0_type] && (m <= 64 || n <= 64 )) || !ctx->device ->mul_mat_id_l [src0_type]) {
4083
- return aligned ? mmp-> a_m : mmp->m ;
4048
+ return mmp->m ;
4084
4049
}
4085
- return aligned ? mmp->a_l : mmp->l ;
4086
- }
4087
-
4088
- static uint32_t ggml_vk_guess_matmul_id_pipeline_align (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
4089
- VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline_align(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
4090
- return ggml_vk_guess_matmul_id_pipeline (ctx, mmp, m, n, true , src0_type)->align ;
4050
+ return mmp->l ;
4091
4051
}
4092
4052
4093
4053
static void ggml_vk_matmul_id (
@@ -4265,10 +4225,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
4265
4225
// Not implemented
4266
4226
GGML_ASSERT (y_non_contig || !qy_needs_dequant); // NOLINT
4267
4227
4268
- const uint32_t kpad = ggml_vk_align_size (ne10, ggml_vk_guess_matmul_pipeline_align (ctx, mmp, ne01, ne11, qx_needs_dequant ? GGML_TYPE_F16 : src0->type ));
4269
- const bool aligned = ne10 == kpad && ne01 > 8 && ne11 > 8 ;
4270
-
4271
- vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline (ctx, mmp, ne01, ne11, aligned, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
4228
+ vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline (ctx, mmp, ne01, ne11, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
4272
4229
4273
4230
// Reserve extra storage in the N dimension for the Y matrix, so we can avoid bounds-checking
4274
4231
uint32_t padded_n = qy_needs_dequant ? ROUNDUP_POW2 (ne11, pipeline->wg_denoms [1 ]) :ne11;
@@ -4849,10 +4806,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
4849
4806
// Not implemented
4850
4807
GGML_ASSERT (y_non_contig || !qy_needs_dequant); // NOLINT
4851
4808
4852
- const uint32_t kpad = ggml_vk_align_size (ne10, ggml_vk_guess_matmul_id_pipeline_align (ctx, mmp, ne01, nei1, qx_needs_dequant ? GGML_TYPE_F16 : src0->type ));
4853
- const bool aligned = ne10 == kpad && ne01 > 8 && nei1 > 8 ;
4854
-
4855
- vk_pipeline pipeline = ggml_vk_guess_matmul_id_pipeline (ctx, mmp, ne01, nei1, aligned, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
4809
+ vk_pipeline pipeline = ggml_vk_guess_matmul_id_pipeline (ctx, mmp, ne01, nei1, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
4856
4810
4857
4811
// Reserve extra storage in the N dimension for the Y matrix, so we can avoid bounds-checking
4858
4812
uint32_t padded_n = qy_needs_dequant ? ROUNDUP_POW2 (ne11, pipeline->wg_denoms [1 ]) :ne11;
@@ -7218,34 +7172,24 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
7218
7172
7219
7173
vk_pipeline p;
7220
7174
std::string shname;
7175
+ const size_t kpad = ggml_vk_align_size (k, p->align );
7176
+
7221
7177
if (shader_size == 0 ) {
7222
- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->a_s : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->a_s ;
7223
- shname = std::string (ggml_type_name (quant)) + " _ALIGNED_S " ;
7178
+ p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->s : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->s ;
7179
+ shname = std::string (ggml_type_name (quant)) + " _S " ;
7224
7180
} else if (shader_size == 1 ) {
7225
- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->a_m : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->a_m ;
7226
- shname = std::string (ggml_type_name (quant)) + " _ALIGNED_M " ;
7181
+ p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->m : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->m ;
7182
+ shname = std::string (ggml_type_name (quant)) + " _M " ;
7227
7183
} else if (shader_size == 2 ) {
7228
- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->a_l : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->a_l ;
7229
- shname = std::string (ggml_type_name (quant)) + " _ALIGNED_L " ;
7184
+ p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->l : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->l ;
7185
+ shname = std::string (ggml_type_name (quant)) + " _L " ;
7230
7186
} else {
7231
7187
GGML_ASSERT (0 );
7232
7188
}
7233
7189
7234
- const size_t kpad = ggml_vk_align_size (k, p->align );
7235
-
7236
- if (k != kpad) {
7237
- if (shader_size == 0 ) {
7238
- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->s : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->s ;
7239
- shname = std::string (ggml_type_name (quant)) + " _S" ;
7240
- } else if (shader_size == 1 ) {
7241
- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->m : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->m ;
7242
- shname = std::string (ggml_type_name (quant)) + " _M" ;
7243
- } else if (shader_size == 2 ) {
7244
- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->l : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->l ;
7245
- shname = std::string (ggml_type_name (quant)) + " _L" ;
7246
- } else {
7247
- GGML_ASSERT (0 );
7248
- }
7190
+ if (p == nullptr ) {
7191
+ std::cerr << " error: no pipeline for ggml_vk_test_dequant_matmul " << ggml_type_name (quant) << std::endl;
7192
+ return ;
7249
7193
}
7250
7194
7251
7195
const size_t x_sz = sizeof (float ) * x_ne;
0 commit comments