@@ -249,17 +249,30 @@ static bool is_op_unsupported_case(const ggml_tensor* op) {
249249 const auto * op_params = op->op_params ;
250250 memcpy (&scale, (const float *) op_params + 0 , sizeof (float ));
251251 memcpy (&max_bias, (const float *) op_params + 1 , sizeof (float ));
252- const uint32_t h = op->src [0 ]->ne [2 ];
253- const uint32_t n_head = op->src [0 ]->ne [0 ];
254- const uint32_t n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
255-
256- const float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
257- const float m1 = powf (2 .0f , -(max_bias / 2 .0f ) / n_head_log2);
258- const float slope =
259- (max_bias > 0 .0f ) ? h < n_head_log2 ? powf (m0, h + 1 ) : powf (m1, 2 * (h - n_head_log2) + 1 ) : 1 .0f ;
252+ if (max_bias > 0 ) {
253+ GGML_LOG_WARN (" OpenVINO backend does not support SOFT_MAX with max_bias > 0\n " );
254+ return true ;
255+ }
256+ }
260257
261- if (slope != 1 .0f ) {
262- GGML_LOG_WARN (" OpenVINO backend does not support SOFT_MAX with slope != 1.0f\n " );
258+ if (op->op == GGML_OP_FLASH_ATTN_EXT) {
259+ if (op->src [4 ] != nullptr ) {
260+ GGML_LOG_WARN (" OpenVINO backend does not support FLASH_ATTN_EXT with sinks\n " );
261+ return true ;
262+ }
263+ float scale = 1 .0f ;
264+ float max_bias = 0 .0f ;
265+ float logit_softcap = 0 .0f ;
266+ const auto * op_params = op->op_params ;
267+ memcpy (&scale, (const float *) op_params + 0 , sizeof (float ));
268+ memcpy (&max_bias, (const float *) op_params + 1 , sizeof (float ));
269+ memcpy (&logit_softcap, (const float *) op_params + 2 , sizeof (float ));
270+ if (max_bias > 0 ) {
271+ GGML_LOG_WARN (" OpenVINO backend does not support FLASH_ATTN_EXT with max_bias > 0\n " );
272+ return true ;
273+ }
274+ if (logit_softcap != 0 ) {
275+ GGML_LOG_WARN (" OpenVINO backend does not support FLASH_ATTN_EXT with logit_softcap != 0\n " );
263276 return true ;
264277 }
265278 }
@@ -357,7 +370,8 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
357370 GGML_OP_ROPE,
358371 GGML_OP_RMS_NORM,
359372 GGML_OP_SCALE,
360- GGML_OP_SOFT_MAX,
373+ // softmax is not updated due to replaced by flash_attn_ext
374+ // GGML_OP_SOFT_MAX,
361375 GGML_OP_SET_ROWS,
362376 GGML_OP_FLASH_ATTN_EXT,
363377 GGML_OP_CPY};
@@ -366,6 +380,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
366380 };
367381 static const std::set<ggml_glu_op> supported_glu_ops{
368382 GGML_GLU_OP_SWIGLU,
383+ GGML_GLU_OP_GEGLU,
369384 };
370385
371386 switch (op->op ) {
0 commit comments