Skip to content

Commit 585e18b

Browse files
committed
Reduce memory: free ov weights node after graph conversion
1 parent f74a0d3 commit 585e18b

File tree

3 files changed

+8
-14
lines changed

3 files changed

+8
-14
lines changed

ggml/src/ggml-openvino/ggml-decoder.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,36 +42,30 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap
4242
m_op_name(m_node ? std::string(m_node->name) : "NONE_OP"),
4343
m_is_static(is_static),
4444
m_is_first_token(is_first_token) {
45-
// TODO avoid static
46-
static std::map<std::string, std::shared_ptr<ov::Node>> model_weights;
4745
if (m_node) {
4846
set_input_output(m_node);
4947
} else {
50-
static bool printed = false;
51-
if (!printed && getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS")) {
48+
if (is_first_token && getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS")) {
5249
print_tensor_address_map(cgraph);
53-
printed = true;
5450
}
5551

5652
if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
57-
std::string filename = "cgraph.txt";
53+
auto timestamp = (long long) ggml_time_us();
54+
std::string filename = "cgraph_" + std::to_string(timestamp) + ".txt";
5855
dump_cgraph(cgraph, filename);
5956
}
6057

6158
set_llm_params();
6259

63-
static bool weight_created = false;
64-
if (!weight_created) {
65-
add_weight_const_parallel(model_weights);
66-
weight_created = true;
60+
if (is_first_token) {
61+
add_weight_const_parallel(m_model_weights);
6762
}
6863

6964
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
7065
auto* cur_node = cgraph->nodes[node_n];
7166
m_nodes.push_back(cur_node);
7267
set_input_output(cur_node);
7368
}
74-
m_model_weights = model_weights;
7569

7670
add_extra_inputs();
7771
}

ggml/src/ggml-openvino/ggml-decoder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
108108

109109
ov::PartialShape get_graph_input_shape(const ggml_tensor* src) const;
110110

111+
void clear_model_weights() { m_model_weights.clear(); }
112+
111113
private:
112114
void set_input_output(ggml_tensor* node);
113115
void add_extra_inputs();

ggml/src/ggml-openvino/utils.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99
#include <memory>
1010
#include <openvino/core/any.hpp>
1111
#include <openvino/core/graph_util.hpp>
12-
#include <openvino/core/partial_shape.hpp>
1312
#include <openvino/core/type/float16.hpp>
1413
#include <openvino/frontend/manager.hpp>
15-
#include <openvino/op/parameter.hpp>
1614
#include <openvino/openvino.hpp>
1715
#include <openvino/runtime/compiled_model.hpp>
1816
#include <openvino/runtime/infer_request.hpp>
@@ -89,7 +87,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
8987
if (cache_dir && !is_static) {
9088
core.set_property(ov::cache_dir(cache_dir));
9189
}
92-
// core.set_property(ov::enable_profiling(true));
9390

9491
static std::unordered_map<struct ggml_cgraph*, std::shared_ptr<ov::InferRequest>> infer_request_cache;
9592
static std::unordered_map<struct ggml_cgraph*, std::vector<std::string>> ov_input_names_cache;
@@ -157,6 +154,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
157154

158155
auto input_model = std::make_shared<ov::frontend::ggml::InputModel>(ggml_decoder);
159156
model = ov::frontend::ggml::FrontEnd::convert(input_model);
157+
ggml_decoder->clear_model_weights();
160158
conversion_end_time = ggml_time_us();
161159

162160
auto compiled_model = core.compile_model(model, device, config);

0 commit comments

Comments
 (0)