Skip to content

Commit fa237a1

Browse files
committed
Apply EliminateZP only for npu
1 parent 3346a33 commit fa237a1

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ namespace ggml {
1919
namespace pass {
2020

2121
FuseToSDPA::FuseToSDPA() {
22+
// Not maintained since FLASH_ATTN_EXT has replaced this pattern
2223
const auto m_k = ov::pass::pattern::any_input();
2324
const auto m_q = ov::pass::pattern::any_input();
2425
const auto m_qk = ov::pass::pattern::wrap_type<ov::op::v0::MatMul>({m_q, m_k});

ggml/src/ggml-openvino/openvino/translate_session.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include "ggml-openvino/openvino/utils.hpp"
2828
#include "input_model.hpp"
2929
#include "pass/eliminate_zp.hpp"
30-
#include "pass/fuse_to_sdpa.hpp"
3130
#include "pass/mark_decompression_convert_constant_folding.hpp"
3231

3332
namespace ov {
@@ -220,8 +219,9 @@ std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<M
220219
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
221220
}
222221

223-
manager.register_pass<pass::EliminateZeroPoints>();
224-
manager.register_pass<pass::FuseToSDPA>();
222+
if (ggml_model_decoder->is_static()) {
223+
manager.register_pass<pass::EliminateZeroPoints>();
224+
}
225225
manager.run_passes(model);
226226
}
227227
return model;

0 commit comments

Comments
 (0)