55
66#include < algorithm>
77#include < cassert>
8+ #include < cstddef>
89#include < cstdint>
910#include < cstdlib>
1011#include < execution>
1516#include < openvino/core/dimension.hpp>
1617#include < openvino/core/node.hpp>
1718#include < openvino/core/partial_shape.hpp>
19+ #include < openvino/core/type/bfloat16.hpp>
20+ #include < openvino/core/type/element_type.hpp>
1821#include < openvino/core/type/float16.hpp>
1922#include < openvino/op/constant.hpp>
2023#include < openvino/op/parameter.hpp>
@@ -71,9 +74,19 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap
7174 }
7275}
7376
77+ GgmlOvDecoder::GgmlOvDecoder (struct ggml_cgraph * cgraph) {
78+ m_cgraph = cgraph;
79+ for (int node_n = 0 ; node_n < cgraph->n_nodes ; node_n++) {
80+ auto * cur_node = cgraph->nodes [node_n];
81+ m_nodes.push_back (cur_node);
82+ set_input_output (cur_node, true );
83+ }
84+ }
85+
7486// Called in GgmlOvDecoder constructor. Two cases: 1. constructing a decoder for the whole graph;
75- // 2. constructing a decoder for a node.
76- void GgmlOvDecoder::set_input_output (ggml_tensor* node) {
87+ // 2. constructing a decoder for a node;
88+ // 3. constructing a decoder for the whole graph naively (op test case)
89+ void GgmlOvDecoder::set_input_output (ggml_tensor* node, bool naive) {
7790 std::string node_name;
7891 if (node->op == GGML_OP_CPY) {
7992 // CPY updates the input tensor in place. For later ov op that uses the
@@ -98,8 +111,14 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
98111 m_inputs[src_name] = src;
99112 m_op_node_name.emplace_back (src_name, ggml_op_name (node->op ));
100113
101- // If called for the whole graph, create constant nodes for weights and param nodes for inputs
102- if (!m_node && !src->view_src ) {
114+ // Add model inputs and weights constants, if called for the whole graph
115+ if (naive) {
116+ auto param_node = std::make_shared<ov::op::v0::Parameter>(get_ov_type (src), get_graph_input_shape (src));
117+ param_node->set_friendly_name (src_name);
118+ param_node->output (0 ).get_tensor ().set_names ({src_name});
119+ m_model_inputs[src_name] = param_node;
120+
121+ } else if (!m_node && !src->view_src ) {
103122 ggml_backend_buffer* buffer = src->buffer ;
104123
105124 if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || src->flags & GGML_TENSOR_FLAG_INPUT) {
@@ -118,7 +137,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
118137 }
119138 }
120139
121- if (!m_node) {
140+ // Add model outputs, if called for the whole graph
141+ if (naive) {
142+ m_model_output_names.push_back (node->name );
143+ } else if (!m_node) {
122144 static std::set<std::string> debug_output_names = {};
123145 // Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph
124146 if (node->buffer ->usage == GGML_BACKEND_BUFFER_USAGE_ANY || node->flags & GGML_TENSOR_FLAG_OUTPUT ||
@@ -164,17 +186,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
164186 m_op_case = 2 ;
165187 }
166188 break ;
167- }
168- case GGML_OP_MUL_MAT: {
169- if (node->src [0 ]->view_src == nullptr ) {
170- m_op_case = 1 ;
171- } else if (std::string (node->src [0 ]->name ).find (" cache_k" ) == 0 ) {
172- m_op_case = 2 ;
173- } else if (std::string (node->src [0 ]->name ).find (" cache_v" ) == 0 ) {
174- m_op_case = 3 ;
175189 }
176- break ;
177- }
178190 case GGML_OP_PERMUTE: {
179191 if (node->src [0 ]->view_src == nullptr ) {
180192 // Permute Qcur
@@ -188,6 +200,23 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
188200 }
189201 break ;
190202 }
203+ case GGML_OP_GET_ROWS:
204+ {
205+ if (node->src [1 ]->op == GGML_OP_VIEW) {
206+ m_op_case = 2 ;
207+ } else {
208+ m_op_case = 1 ;
209+ }
210+ break ;
211+ }
212+ case GGML_OP_ROPE:
213+ {
214+ if (node->src [0 ]->op == GGML_OP_VIEW) {
215+ m_op_case = 2 ;
216+ } else {
217+ m_op_case = 1 ;
218+ }
219+ }
191220 default :
192221 break ;
193222 }
@@ -237,6 +266,9 @@ ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor* src) co
237266 input_shape = ov::PartialShape{m_context_size, m_num_heads_kv, m_head_size};
238267 } else if (std::string (src->name ).find (" cache_v" ) == 0 ) {
239268 input_shape = ov::PartialShape{m_num_heads_kv, m_head_size, m_context_size};
269+ } else if (src->op == GGML_OP_VIEW) {
270+ // This case is added to make test-backend-ops work
271+ input_shape = ov::PartialShape{get_shape (src->view_src )};
240272 } else {
241273 input_shape = ov::PartialShape{get_shape (src)};
242274 }
@@ -373,6 +405,17 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor)
373405 weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_f16);
374406 break ;
375407 }
408+ case GGML_TYPE_BF16:
409+ {
410+ const auto * ptr = reinterpret_cast <const uint16_t *>(tensor->data );
411+ std::vector<ov::bfloat16> data_bf16;
412+ data_bf16.reserve (ne_total);
413+ for (int i = 0 ; i < ne_total; ++i) {
414+ data_bf16.push_back (ov::bfloat16::from_bits (ptr[i]));
415+ }
416+ weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_bf16);
417+ break ;
418+ }
376419 default :
377420 throw std::invalid_argument (" Unsupported tensor type" );
378421 }
@@ -496,6 +539,9 @@ ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor* tensor) {
496539 case GGML_TYPE_F16:
497540 type = ov::element::f16 ;
498541 break ;
542+ case GGML_TYPE_BF16:
543+ type = ov::element::bf16 ;
544+ break ;
499545 case GGML_TYPE_I64:
500546 type = ov::element::i64 ;
501547 break ;
@@ -576,6 +622,7 @@ void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecode
576622
577623const std::string& GgmlOvDecoder::get_op_type () const {
578624 static const std::map<ggml_op, std::string> ops = {
625+ {GGML_OP_NONE, " GGML_OP_NONE" },
579626 {GGML_OP_ACC, " GGML_OP_ACC" },
580627 {GGML_OP_ADD, " GGML_OP_ADD" },
581628 {GGML_OP_ADD1, " GGML_OP_ADD1" },
0 commit comments