From cb43b7c75fbcfaf197f1acad54c62145c6029974 Mon Sep 17 00:00:00 2001
From: Max Buckley <maxwbuckley@gmail.com>
Date: Wed, 20 May 2026 16:18:17 +0200
Subject: [PATCH 1/5] [CoreML EP] Support bool Cast in ML Program

Two changes to the ML Program Cast builder:

1. Accept BOOL as a source and target dtype in HasSupportedInputsImpl. The
   ML Program `cast` op already handles bool, and AddToModelBuilderImpl
   already maps `to == BOOL`; only the input/output type gate omitted it.
   This lets int64<->bool<->float casts (transformer attention-mask graphs)
   stay on CoreML.

2. Move the "no preceding node" check after the ML Program early-return. It
   was legacy gating for the NeuralNetwork ArgMax-only path (which
   dereferences InputEdgesBegin()); on the ML Program path a Cast fed
   directly by a graph input is fine, and rejecting it forced needless CPU
   fallback.

Tests (coreml_basic_test.cc):
- CastBoolRoundTrip_MLProgram: an int64->bool->float cast chain runs fully
  on CoreML and matches the CPU reference. The bool tensor is internal (a
  CoreML partition cannot have bool I/O) and the first Cast is graph-input
  fed.
- CastNonArgMaxNeuralNetworkNotSupported: the same chain falls back to CPU
  on the NeuralNetwork format, guarding the IsOpSupportedImpl reordering.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../coreml/builders/impl/cast_op_builder.cc   | 18 +++--
 .../providers/coreml/coreml_basic_test.cc     | 69 +++++++++++++++++++
 2 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
index e0665f5c2a5ec..890d2b12db917 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
@@ -77,15 +77,19 @@ Status CastOpBuilder::AddToModelBuilderImpl([[maybe_unused]] ModelBuilder& model
 
 bool CastOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                       const logging::Logger& logger) const {
+  if (input_params.create_mlprogram) {
+    // The ML Program 'cast' op stands alone, so a Cast fed directly by a graph
+    // input (no preceding node) is fine here.
+    return true;
+  }
+
+  // The NeuralNetwork path only supports a Cast that consumes an ArgMax, so it
+  // needs a preceding node to inspect (InputEdgesBegin() must be dereferenceable).
   if (node.GetInputEdgesCount() == 0) {
     LOGS(logger, VERBOSE) << "Cast has no preceding nodes.";
     return false;
   }
 
-  if (input_params.create_mlprogram) {
-    return true;
-  }
-
   const auto& prec_node = node.InputEdgesBegin()->GetNode();
 
   /*Cast node is only aimed for supporting argmax and we are only handling the case where an argmax
@@ -135,11 +139,13 @@ bool CastOpBuilder::HasSupportedInputsImpl(const Node& node, [[maybe_unused]] co
     if ((input_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 ||
          input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64 ||
          input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
-         input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) &&
+         input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 ||
+         input_type == ONNX_NAMESPACE::TensorProto_DataType_BOOL) &&
         (output_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 ||
          output_type == ONNX_NAMESPACE::TensorProto_DataType_INT64 ||
          output_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
-         output_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16)) {
+         output_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 ||
+         output_type == ONNX_NAMESPACE::TensorProto_DataType_BOOL)) {
       return true;
     } else {
       LOGS(logger, VERBOSE) << "[" << node.OpType()
diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
index b6e1545d6f319..01e82cf7fdf30 100644
--- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc
+++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
@@ -1911,6 +1911,75 @@ TEST(CoreMLExecutionProviderTest, Split11SingleOutputNotSupported) {
   TestModelLoad(model_span, MakeCoreMLExecutionProvider("MLProgram"), ExpectedEPNodeAssignment::None);
 }
 
+namespace {
+// int64 -> Cast(bool) -> Cast(float) round-trip. The bool tensor stays
+// internal to the CoreML partition (a partition cannot have bool I/O), and
+// the first Cast is fed directly by a graph input -- so this exercises both
+// the new bool dtype support and acceptance of a Cast with no preceding node.
+std::string MakeCastBoolModelData() {
+  onnxruntime::Model model("cast_bool_test", false, DefaultLoggingManager().DefaultLogger());
+  auto& graph = model.MainGraph();
+
+  auto make_type = [](int32_t elem_type) {
+    ONNX_NAMESPACE::TypeProto t;
+    t.mutable_tensor_type()->set_elem_type(elem_type);
+    for (int64_t d : {1, 4}) t.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(d);
+    return t;
+  };
+  const auto int64_type = make_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
+  const auto bool_type = make_type(ONNX_NAMESPACE::TensorProto_DataType_BOOL);
+  const auto float_type = make_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+
+  auto& x = graph.GetOrCreateNodeArg("X", &int64_type);
+  auto& b = graph.GetOrCreateNodeArg("B", &bool_type);
+  auto& y = graph.GetOrCreateNodeArg("Y", &float_type);
+
+  auto& to_bool = graph.AddNode("cast_to_bool", "Cast", "int64 -> bool", {&x}, {&b});
+  to_bool.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_BOOL));
+  auto& to_float = graph.AddNode("cast_to_float", "Cast", "bool -> float", {&b}, {&y});
+  to_float.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT));
+
+  ORT_THROW_IF_ERROR(graph.Resolve());
+  std::string model_data;
+  model.ToProto().SerializeToString(&model_data);
+  return model_data;
+}
+}  // namespace
+
+// ML Program Cast supports bool as both a source and a target dtype.
+TEST(CoreMLExecutionProviderTest, CastBoolRoundTrip_MLProgram) {
+  const std::string model_data = MakeCastBoolModelData();
+  gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()),
+                                        model_data.size()};
+
+#if defined(__APPLE__)
+  std::vector<int64_t> dims = {1, 4};
+  std::vector<int64_t> values = {0, 5, 0, -3};  // -> bool {F,T,F,T} -> float {0,1,0,1}
+  OrtValue x_val;
+  CreateMLValue<int64_t>(CPUAllocator::DefaultInstance(), dims, values, &x_val);
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("X", x_val));
+
+  EPVerificationParams params{};
+  params.ep_node_assignment = ExpectedEPNodeAssignment::All;
+  RunAndVerifyOutputsWithEP(model_span, CurrentTestName(),
+                            MakeCoreMLExecutionProvider("MLProgram"), feeds, params);
+#else
+  TestModelLoad(model_span, MakeCoreMLExecutionProvider("MLProgram"), ExpectedEPNodeAssignment::All);
+#endif
+}
+
+// On the NeuralNetwork format the Cast builder only supports a Cast that
+// consumes an ArgMax, so these graph-input / Cast-fed Casts must fall back to
+// CPU. Guards the IsOpSupportedImpl reordering that moved the preceding-node
+// check into the NeuralNetwork branch.
+TEST(CoreMLExecutionProviderTest, CastNonArgMaxNeuralNetworkNotSupported) {
+  const std::string model_data = MakeCastBoolModelData();
+  gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()),
+                                        model_data.size()};
+  TestModelLoad(model_span, MakeCoreMLExecutionProvider(), ExpectedEPNodeAssignment::None);
+}
+
 #endif  // !(ORT_MINIMAL_BUILD)
 }  // namespace test
 }  // namespace onnxruntime

From a1240877ff3dd010c9a8b4dbadf6cafbb857f8fc Mon Sep 17 00:00:00 2001
From: Max Buckley <maxwbuckley@gmail.com>
Date: Thu, 21 May 2026 09:34:33 +0200
Subject: [PATCH 2/5] [CoreML EP] Drop the standalone bool-Cast round-trip test

CastBoolRoundTrip_MLProgram exercised int64 -> Cast(bool) -> Cast(float).
CoreML's compiler fuses the two back-to-back `cast` ops and drops the bool
clamp (cast(cast(x,bool),fp32) collapses to cast(x,fp32)), so the round-trip
produces the raw input value instead of 0/1 -- the test can't be numerically
verified standalone.

The bool-Cast support itself is correct: it is exercised end to end by the
dependent PRs, where a non-Cast op sits between the int<->bool casts so no
fusion occurs -- Cast->And->Cast (Where/And PR) and Cast->GatherND->Cast
(GatherND PR), both numerically verified against the CPU EP.

CastNonArgMaxNeuralNetworkNotSupported (the NeuralNetwork-format negative
test) is kept; it guards the IsOpSupportedImpl reordering.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../providers/coreml/coreml_basic_test.cc     | 33 ++++---------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
index 01e82cf7fdf30..35c33edb7ece6 100644
--- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc
+++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
@@ -1912,10 +1912,12 @@ TEST(CoreMLExecutionProviderTest, Split11SingleOutputNotSupported) {
 }
 
 namespace {
-// int64 -> Cast(bool) -> Cast(float) round-trip. The bool tensor stays
-// internal to the CoreML partition (a partition cannot have bool I/O), and
-// the first Cast is fed directly by a graph input -- so this exercises both
-// the new bool dtype support and acceptance of a Cast with no preceding node.
+// int64 -> Cast(bool) -> Cast(float); the first Cast is fed directly by a
+// graph input (no preceding node). Used by the NeuralNetwork negative test
+// below. Positive bool-Cast coverage lives in the dependent Where/And and
+// GatherND PRs, where a non-Cast op sits between the int<->bool casts -- a
+// standalone bool round-trip can't be numerically verified here because
+// CoreML fuses back-to-back cast ops (dropping the bool clamp).
 std::string MakeCastBoolModelData() {
   onnxruntime::Model model("cast_bool_test", false, DefaultLoggingManager().DefaultLogger());
   auto& graph = model.MainGraph();
@@ -1946,29 +1948,6 @@ std::string MakeCastBoolModelData() {
 }
 }  // namespace
 
-// ML Program Cast supports bool as both a source and a target dtype.
-TEST(CoreMLExecutionProviderTest, CastBoolRoundTrip_MLProgram) {
-  const std::string model_data = MakeCastBoolModelData();
-  gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()),
-                                        model_data.size()};
-
-#if defined(__APPLE__)
-  std::vector<int64_t> dims = {1, 4};
-  std::vector<int64_t> values = {0, 5, 0, -3};  // -> bool {F,T,F,T} -> float {0,1,0,1}
-  OrtValue x_val;
-  CreateMLValue<int64_t>(CPUAllocator::DefaultInstance(), dims, values, &x_val);
-  NameMLValMap feeds;
-  feeds.insert(std::make_pair("X", x_val));
-
-  EPVerificationParams params{};
-  params.ep_node_assignment = ExpectedEPNodeAssignment::All;
-  RunAndVerifyOutputsWithEP(model_span, CurrentTestName(),
-                            MakeCoreMLExecutionProvider("MLProgram"), feeds, params);
-#else
-  TestModelLoad(model_span, MakeCoreMLExecutionProvider("MLProgram"), ExpectedEPNodeAssignment::All);
-#endif
-}
-
 // On the NeuralNetwork format the Cast builder only supports a Cast that
 // consumes an ArgMax, so these graph-input / Cast-fed Casts must fall back to
 // CPU. Guards the IsOpSupportedImpl reordering that moved the preceding-node

From 202825b36fd3ff657f2cd9a69d1302aa570f7f91 Mon Sep 17 00:00:00 2001
From: Max Buckley <maxwbuckley@gmail.com>
Date: Wed, 27 May 2026 15:11:50 +0100
Subject: [PATCH 3/5] Add CastBoolMLProgramPartition load-time test

yuslepukhin asked on PR #28595 for a positive ML-Program-side test that
confirms the partitioner claims the bool Cast nodes, even though we can't
numerically verify the round-trip (CoreML fuses back-to-back cast ops and
drops the bool clamp, so a value-checking test would silently pass even
when the bool dtype is ignored).

Adds CastBoolMLProgramPartition, a sibling to
CastNonArgMaxNeuralNetworkNotSupported: same MakeCastBoolModelData()
graph, but with TestModelLoad + MakeCoreMLExecutionProvider("MLProgram")
and ExpectedEPNodeAssignment::All. Together the two tests guard:
  - HasSupportedInputsImpl now accepts bool (positive),
  - the "no preceding node" rejection now only applies to NeuralNetwork
    (negative).

Positive numerical coverage continues to live in the dependent #28597
(Where/And) and #28598 (GatherND) PRs, where a non-Cast op sits between
the int<->bool casts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../test/providers/coreml/coreml_basic_test.cc     | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
index eca4ece912495..1d8576aa0f7df 100644
--- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc
+++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
@@ -2408,6 +2408,20 @@ TEST(CoreMLExecutionProviderTest, CastNonArgMaxNeuralNetworkNotSupported) {
   TestModelLoad(model_span, MakeCoreMLExecutionProvider(), ExpectedEPNodeAssignment::None);
 }
 
+// Load-time partition check on the ML Program path: confirms the EP claims
+// both bool Casts (the relaxed "no preceding node" branch + the bool dtype
+// gate added in HasSupportedInputsImpl). Numerical verification isn't
+// possible here because CoreML fuses back-to-back cast ops and drops the
+// bool clamp; the positive numerical coverage lives in the dependent
+// Where/And (#28597) and GatherND (#28598) PRs, where a non-Cast op sits
+// between the int<->bool casts.
+TEST(CoreMLExecutionProviderTest, CastBoolMLProgramPartition) {
+  const std::string model_data = MakeCastBoolModelData();
+  gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()),
+                                        model_data.size()};
+  TestModelLoad(model_span, MakeCoreMLExecutionProvider("MLProgram"), ExpectedEPNodeAssignment::All);
+}
+
 TEST(CoreMLExecutionProviderTest, GatherScalarIndicesAxis1) {
   // ai.onnx:Gather with rank-0 (scalar) 'indices'. ONNX output rank =
   // data_rank + indices_rank - 1 = 2. The CoreML builder internally promotes

From 27383f7a6bfc75931a15703c768f329d6ab7818d Mon Sep 17 00:00:00 2001
From: Max Buckley <maxwbuckley@gmail.com>
Date: Fri, 29 May 2026 21:25:16 +0100
Subject: [PATCH 4/5] Fix CastBoolMLProgramPartition: append non-trivial op so
 partition survives

The test built an int64->Cast(bool)->Cast(float) graph and asserted
ExpectedEPNodeAssignment::All on the ML Program path, but Cast is marked
IsTrivial and GetCapability drops any partition made up entirely of trivial
ops. The all-Cast partition was therefore dropped (0 nodes on CoreML), failing
the assertion on the arm64 CoreML runner.

Append a non-trivial Sqrt to the graph (via a new append_nontrivial flag on
MakeCastBoolModelData) so the partition is retained, letting the test assert
that both bool Casts are actually claimed by the EP. The NeuralNetwork negative
test keeps using the pure all-Cast graph (default flag) and is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../providers/coreml/coreml_basic_test.cc     | 40 +++++++++++++------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
index 1d8576aa0f7df..7db419592bf8f 100644
--- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc
+++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
@@ -2361,13 +2361,20 @@ TEST(CoreMLExecutionProviderTest, Split11SingleOutputNotSupported) {
 }
 
 namespace {
-// int64 -> Cast(bool) -> Cast(float); the first Cast is fed directly by a
-// graph input (no preceding node). Used by the NeuralNetwork negative test
-// below. Positive bool-Cast coverage lives in the dependent Where/And and
-// GatherND PRs, where a non-Cast op sits between the int<->bool casts -- a
-// standalone bool round-trip can't be numerically verified here because
-// CoreML fuses back-to-back cast ops (dropping the bool clamp).
-std::string MakeCastBoolModelData() {
+// int64 -> Cast(bool) -> Cast(float) [-> Sqrt]; the first Cast is fed directly
+// by a graph input (no preceding node).
+//
+// With append_nontrivial=false this is the all-Cast graph used by the
+// NeuralNetwork negative test below. With append_nontrivial=true a non-trivial
+// Sqrt is appended so the ML Program partition survives the all-trivial drop in
+// CoreMLExecutionProvider::GetCapability (Cast is marked IsTrivial, and a
+// partition made up only of trivial ops is dropped because it can't amortise
+// the CPU<->CoreML marshalling cost). That lets the partition test below assert
+// the bool Casts are actually claimed. A standalone bool round-trip still can't
+// be verified numerically here because CoreML fuses back-to-back cast ops
+// (dropping the bool clamp); positive numerical coverage lives in the dependent
+// Where/And (#28597) and GatherND (#28598) PRs.
+std::string MakeCastBoolModelData(bool append_nontrivial = false) {
   onnxruntime::Model model("cast_bool_test", false, DefaultLoggingManager().DefaultLogger());
   auto& graph = model.MainGraph();
 
@@ -2390,6 +2397,11 @@ std::string MakeCastBoolModelData() {
   auto& to_float = graph.AddNode("cast_to_float", "Cast", "bool -> float", {&b}, {&y});
   to_float.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT));
 
+  if (append_nontrivial) {
+    auto& z = graph.GetOrCreateNodeArg("Z", &float_type);
+    graph.AddNode("sqrt", "Sqrt", "float -> float", {&y}, {&z});
+  }
+
   ORT_THROW_IF_ERROR(graph.Resolve());
   std::string model_data;
   model.ToProto().SerializeToString(&model_data);
@@ -2410,13 +2422,15 @@ TEST(CoreMLExecutionProviderTest, CastNonArgMaxNeuralNetworkNotSupported) {
 
 // Load-time partition check on the ML Program path: confirms the EP claims
 // both bool Casts (the relaxed "no preceding node" branch + the bool dtype
-// gate added in HasSupportedInputsImpl). Numerical verification isn't
-// possible here because CoreML fuses back-to-back cast ops and drops the
-// bool clamp; the positive numerical coverage lives in the dependent
-// Where/And (#28597) and GatherND (#28598) PRs, where a non-Cast op sits
-// between the int<->bool casts.
+// gate added in HasSupportedInputsImpl). A non-trivial Sqrt is appended so the
+// partition isn't dropped as all-trivial (see MakeCastBoolModelData); all three
+// nodes -- both Casts and the Sqrt -- must land on CoreML. Numerical
+// verification isn't possible here because CoreML fuses back-to-back cast ops
+// and drops the bool clamp; the positive numerical coverage lives in the
+// dependent Where/And (#28597) and GatherND (#28598) PRs, where a non-Cast op
+// sits between the int<->bool casts.
 TEST(CoreMLExecutionProviderTest, CastBoolMLProgramPartition) {
-  const std::string model_data = MakeCastBoolModelData();
+  const std::string model_data = MakeCastBoolModelData(/*append_nontrivial=*/true);
   gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()),
                                         model_data.size()};
   TestModelLoad(model_span, MakeCoreMLExecutionProvider("MLProgram"), ExpectedEPNodeAssignment::All);

From 0d556b3f362eaff50cb1fe097c26ba4fe44df7ca Mon Sep 17 00:00:00 2001
From: Max Buckley <maxwbuckley@gmail.com>
Date: Sat, 30 May 2026 11:10:09 +0100
Subject: [PATCH 5/5] [CoreML EP] Trim cast-bool test comments

Drop the cross-PR references and internal-implementation context from the
MakeCastBoolModelData / CastBoolMLProgramPartition comments; keep just the
self-contained explanation of the append_nontrivial flag and the partition
assertion.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../providers/coreml/coreml_basic_test.cc     | 28 ++++++-------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
index b0fb66129349f..77f43b60dd6f8 100644
--- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc
+++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
@@ -2364,16 +2364,11 @@ namespace {
 // int64 -> Cast(bool) -> Cast(float) [-> Sqrt]; the first Cast is fed directly
 // by a graph input (no preceding node).
 //
-// With append_nontrivial=false this is the all-Cast graph used by the
-// NeuralNetwork negative test below. With append_nontrivial=true a non-trivial
-// Sqrt is appended so the ML Program partition survives the all-trivial drop in
-// CoreMLExecutionProvider::GetCapability (Cast is marked IsTrivial, and a
-// partition made up only of trivial ops is dropped because it can't amortise
-// the CPU<->CoreML marshalling cost). That lets the partition test below assert
-// the bool Casts are actually claimed. A standalone bool round-trip still can't
-// be verified numerically here because CoreML fuses back-to-back cast ops
-// (dropping the bool clamp); positive numerical coverage lives in the dependent
-// Where/And (#28597) and GatherND (#28598) PRs.
+// append_nontrivial=false gives the all-Cast graph used by the NeuralNetwork
+// negative test below. append_nontrivial=true appends a Sqrt: a CoreML partition
+// made up only of trivial ops (Cast is marked trivial) is dropped, so the extra
+// non-trivial op keeps the partition and lets the test below assert the bool
+// Casts are claimed.
 std::string MakeCastBoolModelData(bool append_nontrivial = false) {
   onnxruntime::Model model("cast_bool_test", false, DefaultLoggingManager().DefaultLogger());
   auto& graph = model.MainGraph();
@@ -2444,15 +2439,10 @@ TEST(CoreMLExecutionProviderTest, CastNonArgMaxNeuralNetworkNotSupported) {
   TestModelLoad(model_span, MakeCoreMLExecutionProvider(), ExpectedEPNodeAssignment::None);
 }
 
-// Load-time partition check on the ML Program path: confirms the EP claims
-// both bool Casts (the relaxed "no preceding node" branch + the bool dtype
-// gate added in HasSupportedInputsImpl). A non-trivial Sqrt is appended so the
-// partition isn't dropped as all-trivial (see MakeCastBoolModelData); all three
-// nodes -- both Casts and the Sqrt -- must land on CoreML. Numerical
-// verification isn't possible here because CoreML fuses back-to-back cast ops
-// and drops the bool clamp; the positive numerical coverage lives in the
-// dependent Where/And (#28597) and GatherND (#28598) PRs, where a non-Cast op
-// sits between the int<->bool casts.
+// Load-time partition check on the ML Program path: confirms the EP claims both
+// bool Casts. A non-trivial Sqrt is appended so the partition isn't dropped as
+// all-trivial (see MakeCastBoolModelData); all three nodes -- both Casts and the
+// Sqrt -- must land on CoreML.
 TEST(CoreMLExecutionProviderTest, CastBoolMLProgramPartition) {
   const std::string model_data = MakeCastBoolModelData(/*append_nontrivial=*/true);
   gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()),