@@ -79,7 +79,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, ggml_cgraph *
7979
8080 ov::AnyMap config;
8181 if (device == " GPU" ) {
82- auto * disable_sdpa_optimization = getenv (" GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION" );
82+ auto * disable_sdpa_optimization = getenv (" GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION" );
8383 if (disable_sdpa_optimization && std::string (disable_sdpa_optimization) != " 0" ) {
8484 config = {
8585 {" GPU_ENABLE_SDPA_OPTIMIZATION" , " 0" }
@@ -104,17 +104,17 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, ggml_cgraph *
104104 }
105105
106106 static std::mutex cache_mutex;
107- static std::unordered_map<ggml_cgraph*, std::shared_ptr<ov::InferRequest>> infer_request_cache;
108- static std::unordered_map<ggml_cgraph*, std::vector<std::string>> ov_input_names_cache;
109- static std::unordered_map<ggml_cgraph*, std::vector<std::string>> ov_output_names_cache;
107+ static std::unordered_map<ggml_cgraph *, std::shared_ptr<ov::InferRequest>> infer_request_cache;
108+ static std::unordered_map<ggml_cgraph *, std::vector<std::string>> ov_input_names_cache;
109+ static std::unordered_map<ggml_cgraph *, std::vector<std::string>> ov_output_names_cache;
110110 // For NPU
111- static std::unordered_map<ggml_cgraph*, std::shared_ptr<ov::InferRequest>> decode_infer_request_cache;
111+ static std::unordered_map<ggml_cgraph *, std::shared_ptr<ov::InferRequest>> decode_infer_request_cache;
112112
113113 auto kv_tensors = get_kv_tensors (cgraph);
114114 std::shared_ptr<GgmlOvDecoder> ggml_decoder;
115115 std::shared_ptr<ov::InferRequest> infer_request;
116116
117- const ggml_tensor* inp_pos = get_inp_pos_tensor (cgraph);
117+ const ggml_tensor * inp_pos = get_inp_pos_tensor (cgraph);
118118 bool is_first_token = get_is_first_token (inp_pos);
119119
120120 int64_t decoder_end_time;
@@ -210,7 +210,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, ggml_cgraph *
210210
211211 if (!is_static) {
212212 auto states = infer_request->query_state ();
213- int32_t kv_len = *(int32_t *) inp_pos->data ;
213+ int32_t kv_len = *(int32_t *) inp_pos->data ;
214214 int32_t kv_len_in_state = states[0 ].get_state ().get_shape ()[1 ];
215215
216216 // outdated if:
@@ -222,22 +222,21 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, ggml_cgraph *
222222 auto state_name = states[0 ].get_name ();
223223 state_name = state_name.substr (0 , state_name.size () / 2 );
224224 auto state_shape = state_tensor.get_shape ();
225- auto * ggml_tensor = kv_tensors[state_name];
225+ auto * ggml_tensor = kv_tensors[state_name];
226226 auto offset = (kv_len - 1 ) * state_shape[2 ] * state_shape[3 ] * ggml_type_size (ggml_tensor->type );
227227 auto size = state_shape[2 ] * state_shape[3 ] * ggml_type_size (ggml_tensor->type );
228228 state_outdated =
229- std::memcmp ((char *) ggml_tensor->data + offset, (char *) state_tensor.data () + offset, size) != 0 ;
229+ std::memcmp ((char *) ggml_tensor->data + offset, (char *) state_tensor.data () + offset, size) != 0 ;
230230 }
231231
232232 if (state_outdated) {
233233 GGML_LOG_DEBUG (
234234 " GGML OpenVINO Backend: updating kv cache states from ggml tensors (kv_len: %d, kv_len_in_state: %d)\n " ,
235- kv_len,
236- kv_len_in_state);
237- for (auto & state : states) {
235+ kv_len, kv_len_in_state);
236+ for (auto & state : states) {
238237 auto state_name = state.get_name ();
239238 state_name = state_name.substr (0 , state_name.size () / 2 );
240- auto * ggml_tensor = kv_tensors[state_name];
239+ auto * ggml_tensor = kv_tensors[state_name];
241240 auto state_shape = state.get_state ().get_shape ();
242241 state_shape[1 ] = kv_len;
243242 ov::Tensor state_tensor (state.get_state ().get_element_type (), state_shape, ggml_tensor->data );
@@ -277,15 +276,16 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, ggml_cgraph *
277276 }
278277 }
279278
280- for (auto & state : infer_request->query_state ()) {
279+ for (auto & state : infer_request->query_state ()) {
281280 auto state_name = state.get_name ();
282281 state_name = state_name.substr (0 , state_name.size () / 2 );
283282 auto state_tensor = state.get_state ();
284283 auto state_shape = state_tensor.get_shape ();
285- auto * ggml_tensor = kv_tensors[state_name];
284+ auto * ggml_tensor = kv_tensors[state_name];
286285 auto size = state_shape[2 ] * state_shape[3 ] * inp_pos->ne [0 ] * ggml_type_size (ggml_tensor->type );
287- auto offset = state_shape[2 ] * state_shape[3 ] * (*(int32_t *) inp_pos->data ) * ggml_type_size (ggml_tensor->type );
288- std::memcpy ((char *) ggml_tensor->data + offset, (char *) state_tensor.data () + offset, size);
286+ auto offset =
287+ state_shape[2 ] * state_shape[3 ] * (*(int32_t *) inp_pos->data ) * ggml_type_size (ggml_tensor->type );
288+ std::memcpy ((char *) ggml_tensor->data + offset, (char *) state_tensor.data () + offset, size);
289289 }
290290
291291 auto end_time = ggml_time_us ();
@@ -547,7 +547,7 @@ void set_zero_diagonal(std::vector<float> & matrix, size_t dim) {
547547 }
548548}
549549
550- const ggml_tensor* get_inp_pos_tensor (ggml_cgraph* cgraph) {
550+ const ggml_tensor * get_inp_pos_tensor (ggml_cgraph * cgraph) {
551551 for (int i = 0 ; i < cgraph->n_nodes ; ++i) {
552552 auto * op = cgraph->nodes [i];
553553 for (int j = 0 ; j < GGML_MAX_SRC; ++j) {
@@ -564,21 +564,21 @@ const ggml_tensor* get_inp_pos_tensor(ggml_cgraph* cgraph) {
564564 throw std::runtime_error (" get_inp_pos_tensor: inp_pos not found in cgraph" );
565565}
566566
567- bool get_is_first_token (const ggml_tensor* inp_pos) {
568- return *(int32_t *) inp_pos->data == 0 ;
567+ bool get_is_first_token (const ggml_tensor * inp_pos) {
568+ return *(int32_t *) inp_pos->data == 0 ;
569569}
570570
571- std::unordered_map<std::string, ggml_tensor*> get_kv_tensors (struct ggml_cgraph * cgraph) {
572- static std::unordered_map<struct ggml_cgraph *, std::unordered_map<std::string, ggml_tensor*>> kv_tensors_cache;
571+ std::unordered_map<std::string, ggml_tensor *> get_kv_tensors (struct ggml_cgraph * cgraph) {
572+ static std::unordered_map<struct ggml_cgraph *, std::unordered_map<std::string, ggml_tensor *>> kv_tensors_cache;
573573
574574 auto it = kv_tensors_cache.find (cgraph);
575575 if (it != kv_tensors_cache.end ()) {
576576 return it->second ;
577577 }
578578
579- std::unordered_map<std::string, ggml_tensor*> kv_tensors;
579+ std::unordered_map<std::string, ggml_tensor *> kv_tensors;
580580 for (int i = 0 ; i < cgraph->n_nodes ; ++i) {
581- auto * op = cgraph->nodes [i];
581+ auto * op = cgraph->nodes [i];
582582 if (op->op == GGML_OP_SET_ROWS) {
583583 assert (std::string (op->src [2 ]->name ).find (" cache_" ) == 0 );
584584 kv_tensors[std::string (op->src [2 ]->name )] = op->src [2 ];
0 commit comments