@@ -176,15 +176,15 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
176176 break ;
177177 }
178178 case GGML_OP_CPY: {
179- if (ggml_is_contiguous (node) ) {
179+ if (std::string (node-> src [ 1 ]-> name ). find ( " cache_k " ) == 0 ) {
180180 // Write K to cache_k
181181 m_op_case = 1 ;
182182 } else {
183183 // Write V to cache_v
184184 m_op_case = 2 ;
185185 }
186186 break ;
187- }
187+ }
188188 case GGML_OP_PERMUTE: {
189189 if (node->src [0 ]->view_src == nullptr ) {
190190 // Permute Qcur
@@ -198,23 +198,21 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
198198 }
199199 break ;
200200 }
201- case GGML_OP_GET_ROWS:
202- {
203- if (node->src [1 ]->op == GGML_OP_VIEW) {
204- m_op_case = 2 ;
205- } else {
206- m_op_case = 1 ;
207- }
208- break ;
201+ case GGML_OP_GET_ROWS: {
202+ if (node->src [1 ]->op == GGML_OP_VIEW) {
203+ m_op_case = 2 ;
204+ } else {
205+ m_op_case = 1 ;
209206 }
210- case GGML_OP_ROPE:
211- {
212- if (node-> src [ 0 ]-> op == GGML_OP_VIEW) {
213- m_op_case = 2 ;
214- } else {
215- m_op_case = 1 ;
216- }
207+ break ;
208+ }
209+ case GGML_OP_ROPE: {
210+ if (node-> src [ 0 ]-> op == GGML_OP_VIEW) {
211+ m_op_case = 2 ;
212+ } else {
213+ m_op_case = 1 ;
217214 }
215+ }
218216 default :
219217 break ;
220218 }
@@ -405,17 +403,16 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor)
405403 weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_f16);
406404 break ;
407405 }
408- case GGML_TYPE_BF16:
409- {
410- const auto * ptr = reinterpret_cast <const uint16_t *>(tensor->data );
411- std::vector<ov::bfloat16> data_bf16;
412- data_bf16.reserve (ne_total);
413- for (int i = 0 ; i < ne_total; ++i) {
414- data_bf16.push_back (ov::bfloat16::from_bits (ptr[i]));
415- }
416- weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_bf16);
417- break ;
406+ case GGML_TYPE_BF16: {
407+ const auto * ptr = reinterpret_cast <const uint16_t *>(tensor->data );
408+ std::vector<ov::bfloat16> data_bf16;
409+ data_bf16.reserve (ne_total);
410+ for (int i = 0 ; i < ne_total; ++i) {
411+ data_bf16.push_back (ov::bfloat16::from_bits (ptr[i]));
418412 }
413+ weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_bf16);
414+ break ;
415+ }
419416 default :
420417 throw std::invalid_argument (" Unsupported tensor type" );
421418 }
@@ -614,8 +611,8 @@ int32_t* GgmlOvDecoder::get_output_op_params(const std::string& name) const {
614611
615612void GgmlOvDecoder::visit_subgraph (std::function<void (std::shared_ptr<GgmlDecoder>)> node_visitor) const {
616613 for (const auto & node : m_nodes) {
617- auto decoder = std::make_shared<GgmlOvDecoder>(node, m_cgraph, m_is_static, m_is_first_token, m_context_size,
618- m_num_heads, m_num_heads_kv, m_head_size);
614+ auto decoder = std::make_shared<GgmlOvDecoder>(
615+ node, m_cgraph, m_is_static, m_is_first_token, m_context_size, m_num_heads, m_num_heads_kv, m_head_size);
619616 node_visitor (decoder);
620617 }
621618}
@@ -667,12 +664,12 @@ const std::string& GgmlOvDecoder::get_op_type() const {
667664 };
668665
669666 switch (m_node->op ) {
670- case GGML_OP_UNARY:
671- return unary_ops.at (ggml_get_unary_op (m_node));
672- case GGML_OP_GLU:
673- return glu_ops.at (ggml_get_glu_op (m_node));
674- default :
675- return ops.at (m_node->op );
667+ case GGML_OP_UNARY:
668+ return unary_ops.at (ggml_get_unary_op (m_node));
669+ case GGML_OP_GLU:
670+ return glu_ops.at (ggml_get_glu_op (m_node));
671+ default :
672+ return ops.at (m_node->op );
676673 }
677674 static const std::string unknown_op = " UNKNOWN_GGML_OP" ;
678675 return unknown_op;
0 commit comments