Skip to content

Commit 1dd723d

Browse files
committed
Fix CPY due to cgraph change
1 parent 585e18b commit 1dd723d

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

ggml/src/ggml-openvino/openvino/op/cpy.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ OutputVector translate_cpy(const NodeContext& context) {
3939

4040
if (op_case == 1) {
4141
// Write K to cache_k
42+
int64_t head_size = context.get_head_size();
43+
int64_t num_heads_kv = context.get_num_heads_kv();
44+
auto src0_reshape_shape =
45+
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{-1, num_heads_kv, head_size});
46+
src0 = std::make_shared<ov::op::v1::Reshape>(src0, src0_reshape_shape, false);
4247
auto indices = context.get_input("update_indices_k");
4348
auto updated = std::make_shared<ov::op::v3::ScatterNDUpdate>(src1, indices, src0);
4449
res = std::make_shared<ov::op::v1::Reshape>(updated, std::make_shared<ov::op::v0::ShapeOf>(src1), false);

src/llama-graph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,7 @@ static std::unique_ptr<llm_graph_input_attn_kv_unified> build_attn_inp_kv_unifie
11751175
inp->self_v_idxs = mctx_cur->build_input_v_idxs(ctx0, ubatch);
11761176

11771177
inp->self_kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens/n_stream, GGML_KQ_MASK_PAD), 1, n_stream);
1178-
cb(inp->self_kq_mask, "KQ_mask", -1);
1178+
ggml_set_name(inp->self_kq_mask, "KQ_mask");
11791179
ggml_set_input(inp->self_kq_mask);
11801180

11811181
inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask;

0 commit comments

Comments
 (0)