Skip to content

Commit cd073e9

Browse files
authored
Merge pull request #5 from ravi9/fp32_matmul
matmul cpu optimization
2 parents a8fa0e5 + fdd8a38 commit cd073e9

File tree

7 files changed

+65
-27
lines changed

7 files changed

+65
-27
lines changed

ggml/src/ggml-openvino/ggml-decoder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
212212
} else {
213213
m_op_case = 1;
214214
}
215+
break;
215216
}
216217
default:
217218
break;

ggml/src/ggml-openvino/ggml-decoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
139139
std::vector<std::string> m_output_names;
140140
std::string m_op_name;
141141
mutable std::string m_name;
142-
int m_op_case;
142+
int m_op_case = 0;
143143
std::vector<std::pair<std::string, std::string>> m_op_node_name;
144144
std::map<std::string, std::shared_ptr<ov::Node>> m_model_inputs;
145145
std::map<std::string, std::shared_ptr<ov::Node>> m_model_extra_inputs;

ggml/src/ggml-openvino/openvino/op/mulmat.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <openvino/op/slice.hpp>
1313
#include <openvino/op/transpose.hpp>
1414
#include <openvino/op/unsqueeze.hpp>
15+
#include <openvino/op/util/op_types.hpp>
1516
#include <vector>
1617

1718
#include "../node_context.hpp"
@@ -28,7 +29,15 @@ OutputVector translate_mulmat(const NodeContext& context) {
2829

2930
ov::Output<Node> res;
3031
ov::Output<ov::Node> B = context.get_input(0);
31-
ov::Output<ov::Node> A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
32+
ov::Output<ov::Node> A = context.get_input(1);
33+
34+
bool convert_out_type = false;
35+
if (ov::op::util::is_constant(B.get_node()) && context.get_input_type(0) != context.get_input_type(1)) {
36+
B = std::make_shared<ov::op::v0::Convert>(context.get_input(0), context.get_input_type(1));
37+
} else if (context.get_input_type(0) != context.get_input_type(1)) {
38+
A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
39+
convert_out_type = true;
40+
}
3241

3342
auto B_shape = context.get_input_shape(0).to_shape();
3443
auto A_shape = context.get_input_shape(1).to_shape();
@@ -62,8 +71,12 @@ OutputVector translate_mulmat(const NodeContext& context) {
6271
A = Z;
6372
}
6473

65-
auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
66-
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
74+
if (convert_out_type) {
75+
auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
76+
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
77+
} else {
78+
res = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
79+
}
6780

6881
return rename_outputs_with_suffix({res}, context.get_name());
6982
}

ggml/src/ggml-openvino/openvino/op/soft_max.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,7 @@ OutputVector translate_soft_max(const NodeContext& context) {
5353

5454
auto mask_node = context.get_input(1);
5555

56-
std::shared_ptr<ov::Node> token_len = get_dimensions(input_node, {1});
57-
// Try using Q-cur to retrieve the token length, so that the translation of SOFT_MAX
58-
// does not depend on the result of the QK MatMul, so that QK matmul + softmax + qkv matmul
59-
// can be fused into SDPA.
60-
if (input_node->get_type_info() == ov::op::v0::Convert::get_type_info_static()) {
61-
auto qk = input_node->get_input_node_shared_ptr(0);
62-
if (qk->get_type_info() == ov::op::v0::MatMul::get_type_info_static()) {
63-
token_len = get_dimensions(qk->get_input_node_shared_ptr(0), {1});
64-
}
65-
}
56+
auto token_len = context.get_input("token_len");
6657
auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
6758
auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
6859
std::shared_ptr<ov::Node> mask_node_sliced =
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#pragma once
2+
3+
#include "mark_decompression_convert_constant_folding.hpp"
4+
#include "openvino/pass/matcher_pass.hpp"
5+
#include "openvino/core/visibility.hpp"
6+
7+
#ifdef OPENVINO_STATIC_LIBRARY
8+
# define TRANSFORMATIONS_API
9+
#else
10+
# ifdef IMPLEMENT_OPENVINO_API
11+
# define TRANSFORMATIONS_API OPENVINO_CORE_EXPORTS
12+
# else
13+
# define TRANSFORMATIONS_API OPENVINO_CORE_IMPORTS
14+
# endif // IMPLEMENT_OPENVINO_API
15+
#endif // OPENVINO_STATIC_LIBRARY
16+
17+
namespace ov {
18+
namespace pass {
19+
20+
class TRANSFORMATIONS_API MarkCompressedFloatConstants;
21+
22+
} // namespace pass
23+
} // namespace ov
24+
25+
class ov::pass::MarkCompressedFloatConstants : public MatcherPass {
26+
public:
27+
OPENVINO_MATCHER_PASS_RTTI("MarkCompressedFloatConstants");
28+
MarkCompressedFloatConstants();
29+
};

ggml/src/ggml-openvino/openvino/translate_session.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "ggml-openvino/openvino/utils.hpp"
2828
#include "input_model.hpp"
2929
#include "pass/fuse_to_sdpa.hpp"
30+
#include "pass/mark_decompression_convert_constant_folding.hpp"
3031

3132
namespace ov {
3233
namespace frontend {
@@ -253,21 +254,24 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
253254
return resulting_model;
254255
}
255256

256-
void TranslateSession::apply_transformations(const std::shared_ptr<Model>& model) {
257+
std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<Model> model) {
257258
auto ggml_model_decoder = std::dynamic_pointer_cast<InputModel>(m_input_model)->get_model_decoder();
259+
{
260+
ov::pass::Manager manager;
261+
manager.set_per_pass_validation(true);
262+
manager.register_pass<ov::pass::MarkCompressedFloatConstants>();
263+
manager.register_pass<ov::pass::ConstantFolding>();
264+
265+
if (!ggml_model_decoder->is_static()) {
266+
const auto kv_param_res_names = ggml_model_decoder->get_kv_param_res_names();
267+
const auto kv_param_res_pairs = get_kv_param_res_pairs(model, kv_param_res_names);
268+
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
269+
}
258270

259-
ov::pass::Manager manager;
260-
manager.set_per_pass_validation(true);
261-
manager.register_pass<ov::pass::ConstantFolding>();
262-
263-
if (!ggml_model_decoder->is_static()) {
264-
const auto kv_param_res_names = ggml_model_decoder->get_kv_param_res_names();
265-
const auto kv_param_res_pairs = get_kv_param_res_pairs(model, kv_param_res_names);
266-
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
271+
manager.register_pass<pass::FuseToSDPA>();
272+
manager.run_passes(model);
267273
}
268-
269-
manager.register_pass<pass::FuseToSDPA>();
270-
manager.run_passes(model);
274+
return model;
271275
}
272276

273277
} // namespace ggml

ggml/src/ggml-openvino/openvino/translate_session.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class TranslateSession {
1616
std::shared_ptr<Model> translate_graph(const frontend::InputModel::Ptr& input_model);
1717

1818
private:
19-
void apply_transformations(const std::shared_ptr<Model>& model);
19+
std::shared_ptr<Model> apply_transformations(std::shared_ptr<Model> model);
2020
const frontend::InputModel::Ptr m_input_model;
2121
const std::unordered_map<std::string, CreatorFunction>& m_translator_map;
2222
std::shared_ptr<Model> m_ov_model;

0 commit comments

Comments
 (0)