File tree Expand file tree Collapse file tree 1 file changed +7
-5
lines changed Expand file tree Collapse file tree 1 file changed +7
-5
lines changed Original file line number Diff line number Diff line change @@ -80,11 +80,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
8080
8181 bool is_static = device == " NPU" ? true : false ;
8282 ov::AnyMap config;
83- if (device == " GPU" ) {
84- config = {
85- {" GPU_ENABLE_SDPA_OPTIMIZATION" , " 0" }
86- };
87- }
8883
8984 if (getenv (" GGML_OPENVINO_DUMP_CGRAPH" )) {
9085 std::string filename = " cgraph.txt" ;
@@ -186,6 +181,13 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
186181 ov::serialize (model, timestamped_filename);
187182 }
188183
184+ auto * disable_sdpa_optimization = getenv (" GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION" );
185+ if (disable_sdpa_optimization && std::string (disable_sdpa_optimization) != " 0" ) {
186+ config = {
187+ {" GPU_ENABLE_SDPA_OPTIMIZATION" , " 0" }
188+ };
189+ }
190+
189191 auto compiled_model = core.compile_model (model, device, config);
190192 compile_end_time = ggml_time_us ();
191193 infer_request_cache[cgraph] = std::make_shared<ov::InferRequest>(compiled_model.create_infer_request ());
You can’t perform that action at this time.
0 commit comments