Skip to content

Commit 2f77e57

Browse files
committed
Support op SET_ROWS
1 parent 38a5658 commit 2f77e57

File tree

8 files changed

+93
-7
lines changed

8 files changed

+93
-7
lines changed

ggml/src/ggml-openvino/ggml-decoder.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph) {
9090
// 3. constructing a decoder for the whole graph naively (op test case)
9191
void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
9292
std::string node_name;
93-
if (node->op == GGML_OP_CPY) {
93+
if (node->op == GGML_OP_CPY || node->op == GGML_OP_SET_ROWS) {
9494
// CPY updates the input tensor in place. For later ov op that uses the
9595
// input tensor of CPY, we need to make sure they get the updated tensor
9696
// by putting the src tensor name in the tensor_map in
@@ -151,9 +151,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
151151
if (node->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY) {
152152
assert(name.find("cache_k") == 0 || name.find("cache_v") == 0);
153153
}
154-
auto it = std::find(m_model_output_names.begin(), m_model_output_names.end(), name);
155-
if (it == m_model_output_names.end()) {
154+
if (auto it = std::find(m_model_output_names.begin(), m_model_output_names.end(), name);
155+
it == m_model_output_names.end()) {
156156
m_model_output_names.push_back(name);
157+
}
158+
if (auto it = std::find(m_kv_names.begin(), m_kv_names.end(), name); it == m_kv_names.end()) {
157159
m_kv_names.push_back(name);
158160
}
159161
}
@@ -166,6 +168,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
166168
m_op_case = 1;
167169
} else if (node->src[0]->ne[0] * node->src[0]->ne[1] == node->ne[0]) {
168170
m_op_case = 2;
171+
} else if (node->src[0]->ne[0] * node->src[0]->ne[1] == node->ne[1]) {
172+
m_op_case = 3;
169173
}
170174
break;
171175
}
@@ -270,6 +274,8 @@ ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor* src) co
270274
input_shape = ov::PartialShape{m_context_size, m_num_heads_kv, m_head_size};
271275
} else if (name.find("cache_v") == 0) {
272276
input_shape = ov::PartialShape{m_num_heads_kv, m_head_size, m_context_size};
277+
} else if (get_tensor_used_op(src)->op == GGML_OP_SET_ROWS) {
278+
input_shape = ov::PartialShape{1, 1, -1};
273279
} else if (src->op == GGML_OP_VIEW) {
274280
// This case is added to make test-backend-ops work
275281
input_shape = ov::PartialShape{get_shape(src->view_src)};
@@ -283,6 +289,8 @@ void GgmlOvDecoder::add_extra_inputs() {
283289
// Extra inputs:
284290
// 1. `past_token_len`, used to create indices for updating kv cache. Usually equal to inp_pos[0], except for
285291
// llama-perplexity.
292+
// Update: SET_ROWS replaces CPY for updating kv cache. The indices creation is not needed anymore. See:
293+
// https://github.com/ggml-org/llama.cpp/pull/14285
286294
// 2. `attention_size`, used in matmul's in the attention block. The shape of those matmul's are 32 aligned,
287295
// see llama_kv_cache_unified::get_n_kv and llama_kv_cache_unified::get_padding.
288296
// Not used for NPU
@@ -305,6 +313,10 @@ void GgmlOvDecoder::add_extra_inputs() {
305313
(int64_t) (node->src[1]->op_params[0] / node->src[1]->nb[0] / m_head_size / m_num_heads_kv);
306314
break;
307315
}
316+
if (node->op == GGML_OP_SET_ROWS && std::string(node->name).find("cache_k") == 0) {
317+
assert(node->src[1]->type == GGML_TYPE_I64);
318+
past_token_len = *(int64_t*) (node->src[1]->data);
319+
}
308320
}
309321

310322
if (past_token_len == -1) {
@@ -342,6 +354,18 @@ void GgmlOvDecoder::add_extra_inputs() {
342354
}
343355
}
344356

357+
const ggml_tensor* GgmlOvDecoder::get_tensor_used_op(const ggml_tensor* tensor) const {
358+
for (int i = 0; i < m_cgraph->n_nodes; i++) {
359+
const auto* node = m_cgraph->nodes[i];
360+
for (int j = 0; j < GGML_MAX_SRC; j++) {
361+
if (node->src[j] == tensor) {
362+
return node;
363+
}
364+
}
365+
}
366+
throw std::runtime_error("Tensor not found in cgraph");
367+
}
368+
345369
std::map<std::string, std::string> GgmlOvDecoder::get_kv_param_res_names() const {
346370
std::map<std::string, std::string> kv_param_res_names;
347371
for (const auto& name : m_kv_names) {
@@ -618,7 +642,8 @@ const std::string& GgmlOvDecoder::get_op_type() const {
618642
{GGML_OP_SOFT_MAX, "GGML_OP_SOFT_MAX" },
619643
{GGML_OP_SUB, "GGML_OP_SUB" },
620644
{GGML_OP_TRANSPOSE, "GGML_OP_TRANSPOSE"},
621-
{GGML_OP_VIEW, "GGML_OP_VIEW" }
645+
{GGML_OP_VIEW, "GGML_OP_VIEW" },
646+
{GGML_OP_SET_ROWS, "GGML_OP_SET_ROWS" },
622647
};
623648
static const std::map<ggml_unary_op, std::string> unary_ops = {
624649
{GGML_UNARY_OP_ABS, "GGML_UNARY_OP_ABS" },

ggml/src/ggml-openvino/ggml-decoder.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
117117

118118
static std::shared_ptr<ov::Node> create_weight_node(ggml_tensor* tensor);
119119
static std::map<std::string, std::shared_ptr<ov::Node>> create_weight_nodes(struct ggml_cgraph* cgraph);
120+
121+
const ggml_tensor* get_tensor_used_op(const ggml_tensor* tensor) const;
122+
120123
void clear_model_weights() { m_model_weights.clear(); }
121124

122125
private:

ggml/src/ggml-openvino/ggml-openvino.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
331331
static const std::set<ggml_op> supported_ops{GGML_OP_NONE, GGML_OP_ADD, GGML_OP_MUL, GGML_OP_MUL_MAT,
332332
GGML_OP_VIEW, GGML_OP_CONT, GGML_OP_CPY, GGML_OP_RESHAPE,
333333
GGML_OP_PERMUTE, GGML_OP_TRANSPOSE, GGML_OP_GET_ROWS, GGML_OP_ROPE,
334-
GGML_OP_RMS_NORM, GGML_OP_SCALE, GGML_OP_SOFT_MAX};
334+
GGML_OP_RMS_NORM, GGML_OP_SCALE, GGML_OP_SOFT_MAX, GGML_OP_SET_ROWS};
335335
static const std::set<ggml_unary_op> supported_unary_ops{
336336
GGML_UNARY_OP_SILU,
337337
};

ggml/src/ggml-openvino/openvino/node_context.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class NodeContext : public frontend::NodeContext {
4646
return m_decoder->get_input_stride(m_input_names[index]);
4747
}
4848

49+
std::string get_output_name() const { return m_output_names[0]; }
50+
4951
PartialShape get_output_shape(size_t index) const {
5052
return m_decoder->get_output_shape(m_output_names[index]);
5153
}

ggml/src/ggml-openvino/openvino/op/reshape.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ OutputVector translate_reshape(const NodeContext& context) {
2323
}
2424

2525
int op_case = context.get_op_case();
26-
FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported RESHAPE case");
26+
FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3, "Unsupported RESHAPE case");
2727

2828
auto output_shape = context.get_output_shape(0).to_shape();
2929
std::shared_ptr<ov::Node> new_shape_node;
@@ -32,11 +32,14 @@ OutputVector translate_reshape(const NodeContext& context) {
3232
ov::op::v0::Constant::create(ov::element::i64,
3333
{3},
3434
std::vector<int64_t>{-1, (int64_t)output_shape[1], (int64_t)output_shape[2]});
35-
} else {
35+
} else if (op_case == 2) {
3636
new_shape_node =
3737
ov::op::v0::Constant::create(ov::element::i64,
3838
{3},
3939
std::vector<int64_t>{(int64_t)output_shape[0], -1, (int64_t)output_shape[2]});
40+
} else {
41+
new_shape_node =
42+
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{(int64_t) output_shape[0], -1, 1});
4043
}
4144
auto res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
4245
return rename_outputs_with_suffix({res}, context.get_name());
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#include <cstdint>
2+
#include <memory>
3+
#include <openvino/core/node.hpp>
4+
#include <openvino/core/node_output.hpp>
5+
#include <openvino/frontend/exception.hpp>
6+
#include <openvino/op/constant.hpp>
7+
#include <openvino/op/convert.hpp>
8+
#include <openvino/op/gather.hpp>
9+
#include <openvino/op/reshape.hpp>
10+
#include <openvino/op/scatter_update.hpp>
11+
#include <openvino/op/shape_of.hpp>
12+
#include <openvino/op/slice.hpp>
13+
#include <openvino/op/squeeze.hpp>
14+
15+
#include "../node_context.hpp"
16+
#include "../op_table.hpp"
17+
#include "../utils.hpp"
18+
19+
namespace ov {
20+
namespace frontend {
21+
namespace ggml {
22+
namespace op {
23+
24+
OutputVector translate_set_rows(const NodeContext& context) {
25+
num_inputs_check(context, 2, 2);
26+
27+
auto data = context.get_input(0);
28+
auto indices = context.get_input(1);
29+
auto dst = context.get_input(context.get_output_name());
30+
auto dst_shape = context.get_output_shape(0).to_shape();
31+
FRONT_END_OP_CONVERSION_CHECK(dst_shape[0] == 1, "Unsupported shape in SET_ROWS");
32+
33+
auto zero = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {0});
34+
35+
auto dst_reshaped = std::make_shared<ov::op::v1::Reshape>(
36+
dst,
37+
ov::op::v0::Constant::create(ov::element::i64, {2}, {(int64_t) dst_shape[1], (int64_t) dst_shape[2]}),
38+
false);
39+
auto indices_reshaped =
40+
std::make_shared<ov::op::v0::Squeeze>(indices, ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
41+
auto data_converted = std::make_shared<ov::op::v0::Convert>(data, context.get_output_type(0));
42+
auto data_reshaped = std::make_shared<ov::op::v0::Squeeze>(data_converted, zero);
43+
auto updated = std::make_shared<ov::op::v3::ScatterUpdate>(dst_reshaped, indices_reshaped, data_reshaped, zero);
44+
auto res = std::make_shared<ov::op::v1::Reshape>(updated, std::make_shared<ov::op::v0::ShapeOf>(dst), false);
45+
return rename_outputs_with_suffix({res}, context.get_name());
46+
}
47+
48+
} // namespace op
49+
} // namespace ggml
50+
} // namespace frontend
51+
} // namespace ov

ggml/src/ggml-openvino/openvino/op_table.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ std::unordered_map<std::string, CreatorFunction> get_supported_ops() {
3535
{"GGML_UNARY_OP_SILU", op::translate_unary_silu },
3636
{"GGML_OP_VIEW", op::translate_view },
3737
{"GGML_GLU_OP_SWIGLU", op::translate_glu_swiglu },
38+
{"GGML_OP_SET_ROWS", op::translate_set_rows },
3839
};
3940
}
4041

ggml/src/ggml-openvino/openvino/op_table.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ GGML_OP_CONVERTER(translate_soft_max);
2626
GGML_OP_CONVERTER(translate_transpose);
2727
GGML_OP_CONVERTER(translate_view);
2828
GGML_OP_CONVERTER(translate_glu_swiglu);
29+
GGML_OP_CONVERTER(translate_set_rows);
2930

3031
} // namespace op
3132

0 commit comments

Comments
 (0)