Skip to content

Commit 97d1ddd

Browse files
committed
env variable GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION added
1 parent f6d7802 commit 97d1ddd

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

ggml/src/ggml-openvino/utils.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
8080

8181
bool is_static = device == "NPU" ? true : false;
8282
ov::AnyMap config;
83-
if (device == "GPU") {
84-
config = {
85-
{"GPU_ENABLE_SDPA_OPTIMIZATION", "0"}
86-
};
87-
}
8883

8984
if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
9085
std::string filename = "cgraph.txt";
@@ -186,6 +181,13 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
186181
ov::serialize(model, timestamped_filename);
187182
}
188183

184+
auto* disable_sdpa_optimization = getenv("GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION");
185+
if (disable_sdpa_optimization && std::string(disable_sdpa_optimization) != "0") {
186+
config = {
187+
{"GPU_ENABLE_SDPA_OPTIMIZATION", "0"}
188+
};
189+
}
190+
189191
auto compiled_model = core.compile_model(model, device, config);
190192
compile_end_time = ggml_time_us();
191193
infer_request_cache[cgraph] = std::make_shared<ov::InferRequest>(compiled_model.create_infer_request());

0 commit comments

Comments
 (0)