tests : add rms_norm + mul + add test

ggerganov · ggerganov · commit 012fb7194296 · 2025-07-18T09:20:03.000+03:00
ggml-ci
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
@@ -138,13 +138,14 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
 
     if (ctx->mtl_device_ref_count == 0) {
         if (ctx->debug_fusion > 0) {
+            fprintf(stderr, "%s: fusion stats:\n", __func__);
             for (int i = 0; i < GGML_OP_COUNT; i++) {
                 if (ctx->fuse_cnt[i] == 0) {
                     continue;
                 }
 
                 // note: cannot use ggml_log here
-                fprintf(stderr, "%s: %s: %" PRIu64 "\n", __func__, ggml_op_name((enum ggml_op) i), ctx->fuse_cnt[i]);
+                fprintf(stderr, "%s: - %s: %" PRIu64 "\n", __func__, ggml_op_name((enum ggml_op) i), ctx->fuse_cnt[i]);
             }
         }
 
@@ -2212,8 +2213,6 @@ static int ggml_metal_encode_node(
                     }
                 }
 
-                //GGML_LOG_INFO("%s: XXXXXXXXXXXXXXXXXXX n_fuse = %d\n", __func__, n_fuse);
-
                 if (ggml_nelements(src1) == ne10 && ggml_is_contiguous(src1) && ne00 % 4 == 0 && ne10 % 4 == 0) {
                     GGML_ASSERT(ggml_is_contiguous(src0));
 
@@ -4335,8 +4334,6 @@ static int ggml_metal_encode_node(
                     }
                 }
 
-                //GGML_LOG_INFO("%s: RRRRRRRRRRRRRRRRRRRRRRRRRRRRR n_fuse = %d\n", __func__, n_fuse);
-
                 if (n_fuse > 1) {
                     id_dst = ggml_metal_get_buffer(nodes[n_fuse - 1], &offs_dst);
                 }
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
@@ -2636,15 +2636,15 @@ struct test_rms_norm_back : public test_case {
     }
 };
 
-// GGML_OP_RMS_NORM + GGML_OP_MUL
-struct test_rms_norm_mul : public test_case {
+// GGML_OP_RMS_NORM + GGML_OP_MUL + GGML_OP_ADD
+struct test_rms_norm_mul_add : public test_case {
     const ggml_type type;
     const std::array<int64_t, 4> ne;
     const float eps;
 
     std::string op_desc(ggml_tensor * t) override {
         GGML_UNUSED(t);
-        return "RMS_NORM_MUL";
+        return "RMS_NORM_MUL_ADD";
     }
 
     bool run_whole_graph() override { return true; }
@@ -2653,22 +2653,25 @@ struct test_rms_norm_mul : public test_case {
         return VARS_TO_STR3(type, ne, eps);
     }
 
-    test_rms_norm_mul(ggml_type type = GGML_TYPE_F32,
+    test_rms_norm_mul_add(ggml_type type = GGML_TYPE_F32,
             std::array<int64_t, 4> ne = {64, 5, 4, 3},
             float eps = 1e-6f)
         : type(type), ne(ne), eps(eps) {}
 
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
         ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_tensor * c = ggml_new_tensor(ctx, type, 4, ne.data());
         ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_set_param(b);
         ggml_set_name(b, "b");
+        ggml_set_param(c);
+        ggml_set_name(c, "c");
 
-        // Use a and b early, so we don't end up with an OP_NONE between rms_norm and mul
-        a = ggml_add(ctx, a, b);
-        ggml_tensor * out = ggml_mul(ctx, ggml_rms_norm(ctx, a, eps), b);
+        // Use a, b and c early, so we don't end up with an OP_NONE between rms_norm and mul
+        a = ggml_add(ctx, ggml_add(ctx, a, b), c);
+        ggml_tensor * out = ggml_add(ctx, ggml_mul(ctx, ggml_rms_norm(ctx, a, eps), b), c);
         ggml_set_name(out, "out");
 
         return out;
@@ -5188,7 +5191,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_l2_norm      (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
     }
     for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f}) {
-        test_cases.emplace_back(new test_rms_norm_mul(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
+        test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
     }
 
     test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f));

Original file line number	Diff line number	Diff line change
`@@ -138,13 +138,14 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte`
`138`	`138`
`139`	`139`	`if (ctx->mtl_device_ref_count == 0) {`
`140`	`140`	`if (ctx->debug_fusion > 0) {`
	`141`	`+ fprintf(stderr, "%s: fusion stats:\n", __func__);`
`141`	`142`	`for (int i = 0; i < GGML_OP_COUNT; i++) {`
`142`	`143`	`if (ctx->fuse_cnt[i] == 0) {`
`143`	`144`	`continue;`
`144`	`145`	`}`
`145`	`146`
`146`	`147`	`// note: cannot use ggml_log here`
`147`		`- fprintf(stderr, "%s: %s: %" PRIu64 "\n", __func__, ggml_op_name((enum ggml_op) i), ctx->fuse_cnt[i]);`
	`148`	`+ fprintf(stderr, "%s: - %s: %" PRIu64 "\n", __func__, ggml_op_name((enum ggml_op) i), ctx->fuse_cnt[i]);`
`148`	`149`	`}`
`149`	`150`	`}`
`150`	`151`
`@@ -2212,8 +2213,6 @@ static int ggml_metal_encode_node(`
`2212`	`2213`	`}`
`2213`	`2214`	`}`
`2214`	`2215`
`2215`		`- //GGML_LOG_INFO("%s: XXXXXXXXXXXXXXXXXXX n_fuse = %d\n", __func__, n_fuse);`
`2216`		`-`
`2217`	`2216`	`if (ggml_nelements(src1) == ne10 && ggml_is_contiguous(src1) && ne00 % 4 == 0 && ne10 % 4 == 0) {`
`2218`	`2217`	`GGML_ASSERT(ggml_is_contiguous(src0));`
`2219`	`2218`
`@@ -4335,8 +4334,6 @@ static int ggml_metal_encode_node(`
`4335`	`4334`	`}`
`4336`	`4335`	`}`
`4337`	`4336`
`4338`		`- //GGML_LOG_INFO("%s: RRRRRRRRRRRRRRRRRRRRRRRRRRRRR n_fuse = %d\n", __func__, n_fuse);`
`4339`		`-`
`4340`	`4337`	`if (n_fuse > 1) {`
`4341`	`4338`	`id_dst = ggml_metal_get_buffer(nodes[n_fuse - 1], &offs_dst);`
`4342`	`4339`	`}`