From bfc235df123ecd73083d243325ead57f94174104 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 9 Jul 2025 20:35:28 +0200 Subject: [PATCH] llama : remove llm_graph_input_one --- src/llama-graph.cpp | 7 ------- src/llama-graph.h | 11 ----------- src/llama-model.cpp | 12 +----------- 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 7f0e8c67f1325..292bbaadbd7f4 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -354,13 +354,6 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) { } } -void llm_graph_input_one::set_input(const llama_ubatch * ubatch) { - GGML_UNUSED(ubatch); - GGML_ASSERT(one && ggml_nelements(one) == 1); - float f_one = 1.0f; - ggml_backend_tensor_set(one, &f_one, 0, sizeof(float)); -} - // // llm_graph_context // diff --git a/src/llama-graph.h b/src/llama-graph.h index 7bdf656768a0c..e6978466d7a9a 100644 --- a/src/llama-graph.h +++ b/src/llama-graph.h @@ -352,17 +352,6 @@ class llm_graph_input_mem_hybrid : public llm_graph_input_i { const llama_memory_hybrid_context * mctx; }; -// TODO: remove this when ggml_scale_add is implemented -class llm_graph_input_one : public llm_graph_input_i { -public: - llm_graph_input_one() {} - virtual ~llm_graph_input_one() = default; - - void set_input(const llama_ubatch * ubatch) override; - - ggml_tensor * one = nullptr; // F32 -}; - // // llm_graph_result // diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 4468c837f1c28..4148823d72959 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -9382,8 +9382,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context { const int n_layer_sparsity = 10; // number of layers using activation sparsity const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) - ggml_tensor * one; // containing single element 1.0f - llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params), model(model), @@ -9395,14 +9393,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context { ggml_tensor * cur; ggml_tensor * inpL; - // TODO: remove this when ggml_scale_add is implemented - one = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1); - { - auto inp = std::make_unique(); - inp->one = one; - res->add_input(std::move(inp)); - } - inpL = build_inp_embd(model.tok_embd); // important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings) @@ -9792,7 +9782,7 @@ struct llm_build_gemma3n_iswa : public llm_graph_context { cb(innovation, "innovation", il); ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens] - all_coefs = ggml_add(ctx0, all_coefs, one); + all_coefs = ggml_scale_bias(ctx0, all_coefs, 1.0f, 1.0f); // + 1.0 cb(all_coefs, "all_coefs", il); all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup] all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]