Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
} else {
m_op_case = 1;
}
break;
}
default:
break;
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-openvino/ggml-decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
std::vector<std::string> m_output_names;
std::string m_op_name;
mutable std::string m_name;
int m_op_case;
int m_op_case = 0;
std::vector<std::pair<std::string, std::string>> m_op_node_name;
std::map<std::string, std::shared_ptr<ov::Node>> m_model_inputs;
std::map<std::string, std::shared_ptr<ov::Node>> m_model_extra_inputs;
Expand Down
19 changes: 16 additions & 3 deletions ggml/src/ggml-openvino/openvino/op/mulmat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <openvino/op/slice.hpp>
#include <openvino/op/transpose.hpp>
#include <openvino/op/unsqueeze.hpp>
#include <openvino/op/util/op_types.hpp>
#include <vector>

#include "../node_context.hpp"
Expand All @@ -28,7 +29,15 @@ OutputVector translate_mulmat(const NodeContext& context) {

ov::Output<Node> res;
ov::Output<ov::Node> B = context.get_input(0);
ov::Output<ov::Node> A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
ov::Output<ov::Node> A = context.get_input(1);

bool convert_out_type = false;
if (ov::op::util::is_constant(B.get_node()) && context.get_input_type(0) != context.get_input_type(1)) {
B = std::make_shared<ov::op::v0::Convert>(context.get_input(0), context.get_input_type(1));
} else if (context.get_input_type(0) != context.get_input_type(1)) {
A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
convert_out_type = true;
}

auto B_shape = context.get_input_shape(0).to_shape();
auto A_shape = context.get_input_shape(1).to_shape();
Expand Down Expand Up @@ -62,8 +71,12 @@ OutputVector translate_mulmat(const NodeContext& context) {
A = Z;
}

auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
if (convert_out_type) {
auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
} else {
res = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
}

return rename_outputs_with_suffix({res}, context.get_name());
}
Expand Down
11 changes: 1 addition & 10 deletions ggml/src/ggml-openvino/openvino/op/soft_max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,7 @@ OutputVector translate_soft_max(const NodeContext& context) {

auto mask_node = context.get_input(1);

std::shared_ptr<ov::Node> token_len = get_dimensions(input_node, {1});
// Try using Q-cur to retrieve the token length, so that the translation of SOFT_MAX
// does not depend on the result of the QK MatMul, so that QK matmul + softmax + qkv matmul
// can be fused into SDPA.
if (input_node->get_type_info() == ov::op::v0::Convert::get_type_info_static()) {
auto qk = input_node->get_input_node_shared_ptr(0);
if (qk->get_type_info() == ov::op::v0::MatMul::get_type_info_static()) {
token_len = get_dimensions(qk->get_input_node_shared_ptr(0), {1});
}
}
auto token_len = context.get_input("token_len");
auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
std::shared_ptr<ov::Node> mask_node_sliced =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once

#include "mark_decompression_convert_constant_folding.hpp"
#include "openvino/pass/matcher_pass.hpp"
#include "openvino/core/visibility.hpp"

#ifdef OPENVINO_STATIC_LIBRARY
# define TRANSFORMATIONS_API
#else
# ifdef IMPLEMENT_OPENVINO_API
# define TRANSFORMATIONS_API OPENVINO_CORE_EXPORTS
# else
# define TRANSFORMATIONS_API OPENVINO_CORE_IMPORTS
# endif // IMPLEMENT_OPENVINO_API
#endif // OPENVINO_STATIC_LIBRARY

namespace ov {
namespace pass {

class TRANSFORMATIONS_API MarkCompressedFloatConstants;

} // namespace pass
} // namespace ov

class ov::pass::MarkCompressedFloatConstants : public MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("MarkCompressedFloatConstants");
MarkCompressedFloatConstants();
};
28 changes: 16 additions & 12 deletions ggml/src/ggml-openvino/openvino/translate_session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "ggml-openvino/openvino/utils.hpp"
#include "input_model.hpp"
#include "pass/fuse_to_sdpa.hpp"
#include "pass/mark_decompression_convert_constant_folding.hpp"

namespace ov {
namespace frontend {
Expand Down Expand Up @@ -253,21 +254,24 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
return resulting_model;
}

void TranslateSession::apply_transformations(const std::shared_ptr<Model>& model) {
std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<Model> model) {
auto ggml_model_decoder = std::dynamic_pointer_cast<InputModel>(m_input_model)->get_model_decoder();
{
ov::pass::Manager manager;
manager.set_per_pass_validation(true);
manager.register_pass<ov::pass::MarkCompressedFloatConstants>();
manager.register_pass<ov::pass::ConstantFolding>();

if (!ggml_model_decoder->is_static()) {
const auto kv_param_res_names = ggml_model_decoder->get_kv_param_res_names();
const auto kv_param_res_pairs = get_kv_param_res_pairs(model, kv_param_res_names);
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
}

ov::pass::Manager manager;
manager.set_per_pass_validation(true);
manager.register_pass<ov::pass::ConstantFolding>();

if (!ggml_model_decoder->is_static()) {
const auto kv_param_res_names = ggml_model_decoder->get_kv_param_res_names();
const auto kv_param_res_pairs = get_kv_param_res_pairs(model, kv_param_res_names);
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
manager.register_pass<pass::FuseToSDPA>();
manager.run_passes(model);
}

manager.register_pass<pass::FuseToSDPA>();
manager.run_passes(model);
return model;
}

} // namespace ggml
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-openvino/openvino/translate_session.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class TranslateSession {
std::shared_ptr<Model> translate_graph(const frontend::InputModel::Ptr& input_model);

private:
void apply_transformations(const std::shared_ptr<Model>& model);
std::shared_ptr<Model> apply_transformations(std::shared_ptr<Model> model);
const frontend::InputModel::Ptr m_input_model;
const std::unordered_map<std::string, CreatorFunction>& m_translator_map;
std::shared_ptr<Model> m_ov_model;
Expand Down
Loading