88#include < vector>
99
1010#include " ggml-backend-impl.h"
11+ #include " ggml-backend.h"
1112#include " ggml-impl.h"
1213#include " ggml-openvino/utils.h"
1314#include " ggml.h"
@@ -248,17 +249,30 @@ static bool is_op_unsupported_case(const ggml_tensor* op) {
248249 const auto * op_params = op->op_params ;
249250 memcpy (&scale, (const float *) op_params + 0 , sizeof (float ));
250251 memcpy (&max_bias, (const float *) op_params + 1 , sizeof (float ));
251- const uint32_t h = op->src [0 ]->ne [2 ];
252- const uint32_t n_head = op->src [0 ]->ne [0 ];
253- const uint32_t n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
254-
255- const float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
256- const float m1 = powf (2 .0f , -(max_bias / 2 .0f ) / n_head_log2);
257- const float slope =
258- (max_bias > 0 .0f ) ? h < n_head_log2 ? powf (m0, h + 1 ) : powf (m1, 2 * (h - n_head_log2) + 1 ) : 1 .0f ;
252+ if (max_bias > 0 ) {
253+ GGML_LOG_WARN (" OpenVINO backend does not support SOFT_MAX with max_bias > 0\n " );
254+ return true ;
255+ }
256+ }
259257
260- if (slope != 1 .0f ) {
261- GGML_LOG_WARN (" OpenVINO backend does not support SOFT_MAX with slope != 1.0f\n " );
258+ if (op->op == GGML_OP_FLASH_ATTN_EXT) {
259+ if (op->src [4 ] != nullptr ) {
260+ GGML_LOG_WARN (" OpenVINO backend does not support FLASH_ATTN_EXT with sinks\n " );
261+ return true ;
262+ }
263+ float scale = 1 .0f ;
264+ float max_bias = 0 .0f ;
265+ float logit_softcap = 0 .0f ;
266+ const auto * op_params = op->op_params ;
267+ memcpy (&scale, (const float *) op_params + 0 , sizeof (float ));
268+ memcpy (&max_bias, (const float *) op_params + 1 , sizeof (float ));
269+ memcpy (&logit_softcap, (const float *) op_params + 2 , sizeof (float ));
270+ if (max_bias > 0 ) {
271+ GGML_LOG_WARN (" OpenVINO backend does not support FLASH_ATTN_EXT with max_bias > 0\n " );
272+ return true ;
273+ }
274+ if (logit_softcap != 0 ) {
275+ GGML_LOG_WARN (" OpenVINO backend does not support FLASH_ATTN_EXT with logit_softcap != 0\n " );
262276 return true ;
263277 }
264278 }
@@ -305,12 +319,8 @@ static bool is_op_unsupported_case(const ggml_tensor* op) {
305319 return true ;
306320 }
307321 float freq_scale;
308- memcpy (&freq_scale, op_params + 6 , sizeof (float ));
309- if (freq_scale != 0 .0f && freq_scale != 1 .0f ) {
310- GGML_LOG_WARN (" OpenVINO backend does not support ROPE with freq_scale %f != 1.0f\n " , freq_scale);
311- return true ;
312- }
313322 float ext_factor;
323+ memcpy (&freq_scale, op_params + 6 , sizeof (float ));
314324 memcpy (&ext_factor, op_params + 7 , sizeof (float ));
315325 if (ext_factor != 0 .0f ) {
316326 GGML_LOG_WARN (" OpenVINO backend does not support ROPE with ext_factor %f != 0.0f\n " , ext_factor);
@@ -332,8 +342,17 @@ static bool is_op_unsupported_case(const ggml_tensor* op) {
332342static bool ggml_backend_openvino_device_supports_op (ggml_backend_dev_t dev, const ggml_tensor* op) {
333343 GGML_ASSERT (dev->reg != nullptr );
334344
335- static const std::set<ggml_type> supported_types{
336- GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_I64, GGML_TYPE_I32};
345+ static std::set<ggml_type> supported_types{GGML_TYPE_F32,
346+ GGML_TYPE_F16,
347+ GGML_TYPE_BF16,
348+ GGML_TYPE_I64,
349+ GGML_TYPE_I32,
350+ GGML_TYPE_Q4_0,
351+ GGML_TYPE_Q4_1,
352+ GGML_TYPE_Q4_K,
353+ GGML_TYPE_Q5_K,
354+ GGML_TYPE_Q8_0,
355+ GGML_TYPE_Q6_K};
337356
338357 static const std::set<ggml_op> supported_ops{GGML_OP_NONE,
339358 GGML_OP_ADD,
@@ -348,7 +367,8 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
348367 GGML_OP_ROPE,
349368 GGML_OP_RMS_NORM,
350369 GGML_OP_SCALE,
351- GGML_OP_SOFT_MAX,
370+ // softmax is not updated due to replaced by flash_attn_ext
371+ // GGML_OP_SOFT_MAX,
352372 GGML_OP_SET_ROWS,
353373 GGML_OP_FLASH_ATTN_EXT,
354374 GGML_OP_CPY};
@@ -357,6 +377,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
357377 };
358378 static const std::set<ggml_glu_op> supported_glu_ops{
359379 GGML_GLU_OP_SWIGLU,
380+ GGML_GLU_OP_GEGLU,
360381 };
361382
362383 switch (op->op ) {
@@ -394,14 +415,22 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
394415 return false ;
395416 }
396417 for (int i = 0 ; i < GGML_MAX_SRC; i++) {
397- if (supported_types.find (op->type ) == supported_types.end ()) {
398- GGML_LOG_WARN (" OpenVINO backend does not support tensor type %s\n " , ggml_type_name (op->type ));
418+ auto * src = op->src [i];
419+ if (src == nullptr ) {
420+ break ;
421+ }
422+ if (supported_types.find (src->type ) == supported_types.end ()) {
423+ GGML_LOG_WARN (" OpenVINO backend does not support tensor type %s\n " , ggml_type_name (src->type ));
399424 return false ;
400425 }
401- if (op-> src [i] != nullptr && op-> src [i] ->ne [3 ] != 1 ) {
426+ if (src->ne [3 ] != 1 ) {
402427 GGML_LOG_WARN (" OpenVINO backend does not support tensors with ne[3] != 1\n " );
403428 return false ;
404429 }
430+ if (ggml_is_quantized (src->type ) && src->ne [2 ] != 1 ) {
431+ GGML_LOG_WARN (" OpenVINO backend does not support 3D quantized tensors\n " );
432+ return false ;
433+ }
405434 }
406435
407436 if (is_op_unsupported_case (op)) {
0 commit comments