Skip to content

Commit cc07086

Browse files
committed
Fix Phi3 ROPE; Add test-backend-ops
1 parent 1dd723d commit cc07086

18 files changed

+550
-202
lines changed

ggml/src/ggml-openvino/.clang-format

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ AlignConsecutiveDeclarations: false
55
ReferenceAlignment: Left
66
PointerAlignment: Left
77
Cpp11BracedListStyle: true
8+
AccessModifierOffset: -4
9+
BinPackArguments: false
10+
BinPackParameters: false
11+
BreakBeforeBraces: Attach
812

913
Language: Cpp
1014
AlignAfterOpenBracket: Align
@@ -27,29 +31,7 @@ AllowShortIfStatementsOnASingleLine: Never
2731
AllowShortLambdasOnASingleLine: Inline
2832
AllowShortLoopsOnASingleLine: false
2933
AlwaysBreakBeforeMultilineStrings: true
30-
BinPackArguments: true
31-
BinPackParameters: true # OnePerLine
3234
BitFieldColonSpacing: Both
33-
BreakBeforeBraces: Custom # Attach
34-
BraceWrapping:
35-
AfterCaseLabel: true
36-
AfterClass: false
37-
AfterControlStatement: false
38-
AfterEnum: false
39-
AfterFunction: false
40-
AfterNamespace: false
41-
AfterObjCDeclaration: false
42-
AfterStruct: false
43-
AfterUnion: false
44-
AfterExternBlock: false
45-
BeforeCatch: false
46-
BeforeElse: false
47-
BeforeLambdaBody: false
48-
BeforeWhile: false
49-
IndentBraces: false
50-
SplitEmptyFunction: false
51-
SplitEmptyRecord: false
52-
SplitEmptyNamespace: false
5335
# BreakAdjacentStringLiterals: true
5436
BreakAfterAttributes: Never
5537
BreakBeforeBinaryOperators: None

ggml/src/ggml-openvino/ggml-decoder.cpp

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <algorithm>
77
#include <cassert>
8+
#include <cstddef>
89
#include <cstdint>
910
#include <cstdlib>
1011
#include <execution>
@@ -15,6 +16,8 @@
1516
#include <openvino/core/dimension.hpp>
1617
#include <openvino/core/node.hpp>
1718
#include <openvino/core/partial_shape.hpp>
19+
#include <openvino/core/type/bfloat16.hpp>
20+
#include <openvino/core/type/element_type.hpp>
1821
#include <openvino/core/type/float16.hpp>
1922
#include <openvino/op/constant.hpp>
2023
#include <openvino/op/parameter.hpp>
@@ -71,9 +74,19 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap
7174
}
7275
}
7376

77+
GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph) {
78+
m_cgraph = cgraph;
79+
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
80+
auto* cur_node = cgraph->nodes[node_n];
81+
m_nodes.push_back(cur_node);
82+
set_input_output(cur_node, true);
83+
}
84+
}
85+
7486
// Called in GgmlOvDecoder constructor. Two cases: 1. constructing a decoder for the whole graph;
75-
// 2. constructing a decoder for a node.
76-
void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
87+
// 2. constructing a decoder for a node;
88+
// 3. constructing a decoder for the whole graph naively (op test case)
89+
void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
7790
std::string node_name;
7891
if (node->op == GGML_OP_CPY) {
7992
// CPY updates the input tensor in place. For later ov op that uses the
@@ -98,8 +111,14 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
98111
m_inputs[src_name] = src;
99112
m_op_node_name.emplace_back(src_name, ggml_op_name(node->op));
100113

101-
// If called for the whole graph, create constant nodes for weights and param nodes for inputs
102-
if (!m_node && !src->view_src) {
114+
// Add model inputs and weights constants, if called for the whole graph
115+
if (naive) {
116+
auto param_node = std::make_shared<ov::op::v0::Parameter>(get_ov_type(src), get_graph_input_shape(src));
117+
param_node->set_friendly_name(src_name);
118+
param_node->output(0).get_tensor().set_names({src_name});
119+
m_model_inputs[src_name] = param_node;
120+
121+
} else if (!m_node && !src->view_src) {
103122
ggml_backend_buffer* buffer = src->buffer;
104123

105124
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || src->flags & GGML_TENSOR_FLAG_INPUT) {
@@ -118,7 +137,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
118137
}
119138
}
120139

121-
if (!m_node) {
140+
// Add model outputs, if called for the whole graph
141+
if (naive) {
142+
m_model_output_names.push_back(node->name);
143+
} else if (!m_node) {
122144
static std::set<std::string> debug_output_names = {};
123145
// Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph
124146
if (node->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || node->flags & GGML_TENSOR_FLAG_OUTPUT ||
@@ -164,17 +186,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
164186
m_op_case = 2;
165187
}
166188
break;
167-
}
168-
case GGML_OP_MUL_MAT: {
169-
if (node->src[0]->view_src == nullptr) {
170-
m_op_case = 1;
171-
} else if (std::string(node->src[0]->name).find("cache_k") == 0) {
172-
m_op_case = 2;
173-
} else if (std::string(node->src[0]->name).find("cache_v") == 0) {
174-
m_op_case = 3;
175189
}
176-
break;
177-
}
178190
case GGML_OP_PERMUTE: {
179191
if (node->src[0]->view_src == nullptr) {
180192
// Permute Qcur
@@ -188,6 +200,23 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node) {
188200
}
189201
break;
190202
}
203+
case GGML_OP_GET_ROWS:
204+
{
205+
if (node->src[1]->op == GGML_OP_VIEW) {
206+
m_op_case = 2;
207+
} else {
208+
m_op_case = 1;
209+
}
210+
break;
211+
}
212+
case GGML_OP_ROPE:
213+
{
214+
if (node->src[0]->op == GGML_OP_VIEW) {
215+
m_op_case = 2;
216+
} else {
217+
m_op_case = 1;
218+
}
219+
}
191220
default:
192221
break;
193222
}
@@ -237,6 +266,9 @@ ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor* src) co
237266
input_shape = ov::PartialShape{m_context_size, m_num_heads_kv, m_head_size};
238267
} else if (std::string(src->name).find("cache_v") == 0) {
239268
input_shape = ov::PartialShape{m_num_heads_kv, m_head_size, m_context_size};
269+
} else if (src->op == GGML_OP_VIEW) {
270+
// This case is added to make test-backend-ops work
271+
input_shape = ov::PartialShape{get_shape(src->view_src)};
240272
} else {
241273
input_shape = ov::PartialShape{get_shape(src)};
242274
}
@@ -359,6 +391,17 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor)
359391
weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_f16);
360392
break;
361393
}
394+
case GGML_TYPE_BF16:
395+
{
396+
const auto* ptr = reinterpret_cast<const uint16_t*>(tensor->data);
397+
std::vector<ov::bfloat16> data_bf16;
398+
data_bf16.reserve(ne_total);
399+
for (int i = 0; i < ne_total; ++i) {
400+
data_bf16.push_back(ov::bfloat16::from_bits(ptr[i]));
401+
}
402+
weight_node = std::make_shared<ov::op::v0::Constant>(node_type, node_shape, data_bf16);
403+
break;
404+
}
362405
default:
363406
throw std::invalid_argument("Unsupported tensor type");
364407
}
@@ -482,6 +525,9 @@ ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor* tensor) {
482525
case GGML_TYPE_F16:
483526
type = ov::element::f16;
484527
break;
528+
case GGML_TYPE_BF16:
529+
type = ov::element::bf16;
530+
break;
485531
case GGML_TYPE_I64:
486532
type = ov::element::i64;
487533
break;
@@ -562,6 +608,7 @@ void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecode
562608

563609
const std::string& GgmlOvDecoder::get_op_type() const {
564610
static const std::map<ggml_op, std::string> ops = {
611+
{GGML_OP_NONE, "GGML_OP_NONE" },
565612
{GGML_OP_ACC, "GGML_OP_ACC" },
566613
{GGML_OP_ADD, "GGML_OP_ADD" },
567614
{GGML_OP_ADD1, "GGML_OP_ADD1" },

ggml/src/ggml-openvino/ggml-decoder.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
1515
GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgraph, bool is_static, bool is_first_token,
1616
int context_size, int num_heads, int num_heads_kv, int head_size);
1717

18+
// Naive decoder
19+
GgmlOvDecoder(struct ggml_cgraph* cgraph);
1820
virtual ov::Any get_attribute(const std::string& name) const override {
1921
return nullptr;
2022
GGML_UNUSED(name);
@@ -111,7 +113,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
111113
void clear_model_weights() { m_model_weights.clear(); }
112114

113115
private:
114-
void set_input_output(ggml_tensor* node);
116+
void set_input_output(ggml_tensor* node, bool naive = false);
115117
void add_extra_inputs();
116118
static void dump_cgraph(const struct ggml_cgraph* cgraph, std::string& filename);
117119
static std::vector<size_t> get_shape(const ggml_tensor* tensor);
@@ -124,13 +126,13 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
124126
static std::shared_ptr<ov::Node> create_weight_node(ggml_tensor* tensor);
125127
void add_weight_const_parallel(std::map<std::string, std::shared_ptr<ov::Node>>& model_weights);
126128

127-
struct ggml_cgraph* m_cgraph;
129+
struct ggml_cgraph* m_cgraph = nullptr;
130+
ggml_tensor* m_node = nullptr;
131+
std::vector<ggml_tensor*> m_nodes;
128132
std::map<std::string, ggml_tensor*> m_inputs;
129133
std::vector<std::string> m_input_names;
130134
std::map<std::string, ggml_tensor*> m_outputs;
131135
std::vector<std::string> m_output_names;
132-
ggml_tensor* m_node;
133-
std::vector<ggml_tensor*> m_nodes;
134136
std::string m_op_name;
135137
mutable std::string m_name;
136138
int m_op_case;

0 commit comments

Comments
 (0)