@@ -132,21 +132,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
132132 compile_end_time = conversion_end_time;
133133 } else {
134134 std::shared_ptr<ov::Model> model;
135- std::map<ggml_type, ExtraQuantType> types_to_requantize;
136- if (is_static) {
137- types_to_requantize = {
138- {GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
139- {GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
140- {GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
141- {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
142- };
143- } else if (device == " GPU" ) {
144- types_to_requantize = {
145- // CVS-166739
146- {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
147- };
148- }
149- auto model_weights = GgmlOvDecoder::create_weight_nodes (cgraph, types_to_requantize);
135+ auto model_weights = GgmlOvDecoder::create_weight_nodes (cgraph, get_types_to_requant (device));
150136
151137 if (is_static) {
152138 ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, model_weights, is_static, true );
@@ -275,6 +261,23 @@ ov::AnyMap get_npu_prefill_config() {
275261 return config;
276262}
277263
264+ std::map<ggml_type, ExtraQuantType> get_types_to_requant (const std::string& device) {
265+ if (device == " NPU" ) {
266+ return {
267+ {GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
268+ {GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
269+ {GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
270+ {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
271+ };
272+ }
273+ if (device == " GPU" ) {
274+ return {
275+ // CVS-166739
276+ {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
277+ };
278+ }
279+ }
280+
278281ov::AnyMap get_npu_generate_config () {
279282 ov::AnyMap config = get_npu_prefill_config ();
280283 config.emplace (" NPUW_UNFOLD_IREQS" , " YES" );
0 commit comments