Add some detection of non-sequential neural networks

pobonomo · pobonomo · commit d068839874a4 · 2025-11-07T12:38:08.000+01:00
diff --git a/src/gurobi_ml/onnx/onnx_model.py b/src/gurobi_ml/onnx/onnx_model.py
@@ -81,6 +81,69 @@ def __init__(self, gp_model, predictor, input_vars, output_vars=None, **kwargs):
 
         super().__init__(gp_model, predictor, input_vars, output_vars, **kwargs)
 
+    def _validate_sequential_architecture(self, graph, init_map):
+        """Validate that the graph has a sequential architecture.
+
+        Raises NoModel if the graph contains:
+        - Skip connections (same intermediate value used by multiple nodes)
+        - Residual connections (Add nodes combining non-bias values)
+        - Non-sequential topology
+        """
+        # Build usage map: which nodes use each tensor
+        tensor_usage = {}
+        for node in graph.node:
+            for inp in node.input:
+                if inp not in tensor_usage:
+                    tensor_usage[inp] = []
+                tensor_usage[inp].append(node.name)
+
+        # Check 1: Input should only be used by one node (first layer)
+        for graph_input in graph.input:
+            input_name = graph_input.name
+            if input_name in tensor_usage and len(tensor_usage[input_name]) > 1:
+                raise NoModel(
+                    graph,
+                    f"Non-sequential architecture detected: input '{input_name}' is used by multiple nodes {tensor_usage[input_name]}. "
+                    "Skip connections and residual architectures are not supported.",
+                )
+
+        # Check 2: Each intermediate node output should be used by at most one node
+        # (except for the final output which may not be used by any node)
+        for node in graph.node:
+            for output in node.output:
+                if output in tensor_usage and len(tensor_usage[output]) > 1:
+                    raise NoModel(
+                        graph,
+                        f"Non-sequential architecture detected: node '{node.name}' output '{output}' is used by multiple nodes {tensor_usage[output]}. "
+                        "Skip connections and residual architectures are not supported.",
+                    )
+
+        # Check 3: Add nodes should only be used for bias addition (MatMul+Add pattern)
+        # Not for combining two computed branches (residual connections)
+        for node in graph.node:
+            if node.op_type == "Add":
+                # An Add is valid if one of its inputs is an initializer (bias)
+                # and the other is from a MatMul
+                inputs = list(node.input)
+                if len(inputs) != 2:
+                    continue
+
+                # Check if this is a MatMul+Add pattern (one input from MatMul, one is initializer)
+                is_bias_add = False
+                for inp in inputs:
+                    if inp in init_map:
+                        # One input is a constant (bias)
+                        is_bias_add = True
+                        break
+
+                if not is_bias_add:
+                    # Both inputs are computed values - this is a residual connection
+                    raise NoModel(
+                        graph,
+                        f"Non-sequential architecture detected: Add node '{node.name}' combines two computed values {inputs}. "
+                        "Residual connections are not supported.",
+                    )
+
     def _parse_mlp(self, model: onnx.ModelProto) -> list[_ONNXLayer]:
         """Parse a limited subset of ONNX graphs representing MLPs.
 
@@ -106,6 +169,9 @@ def _get_attr(node, name, default=None):
                         return float(a.f)
             return default
 
+        # Validate that the graph is sequential (no skip connections or residual adds)
+        self._validate_sequential_architecture(graph, init_map)
+
         # Build a map from output name to node for easier traversal
         output_to_node = {}
         for node in graph.node:
diff --git a/tests/test_onnx/test_onnx_exceptions.py b/tests/test_onnx/test_onnx_exceptions.py
@@ -27,3 +27,125 @@ def test_unsupported_op(self):
         x = m.addMVar(example.shape, lb=0.0, ub=1.0, name="x")
         with self.assertRaises(NoModel):
             add_predictor_constr(m, model, x)
+
+    def test_skip_connection_rejected(self):
+        # Build a model with skip connection: input used by multiple nodes
+        n_in, n_hidden, n_out = 4, 8, 2
+
+        W1 = np.random.randn(n_in, n_hidden).astype(np.float32)
+        b1 = np.random.randn(n_hidden).astype(np.float32)
+        W2 = np.random.randn(n_hidden, n_out).astype(np.float32)
+        b2 = np.random.randn(n_out).astype(np.float32)
+        W_skip = np.random.randn(n_in, n_out).astype(np.float32)
+        b_skip = np.random.randn(n_out).astype(np.float32)
+
+        X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, n_in])
+        Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None, n_out])
+
+        init_W1 = helper.make_tensor(
+            "W1", TensorProto.FLOAT, W1.T.shape, W1.T.flatten()
+        )
+        init_b1 = helper.make_tensor("b1", TensorProto.FLOAT, b1.shape, b1)
+        init_W2 = helper.make_tensor(
+            "W2", TensorProto.FLOAT, W2.T.shape, W2.T.flatten()
+        )
+        init_b2 = helper.make_tensor("b2", TensorProto.FLOAT, b2.shape, b2)
+        init_W_skip = helper.make_tensor(
+            "W_skip", TensorProto.FLOAT, W_skip.T.shape, W_skip.T.flatten()
+        )
+        init_b_skip = helper.make_tensor(
+            "b_skip", TensorProto.FLOAT, b_skip.shape, b_skip
+        )
+
+        # Main path
+        gemm1 = helper.make_node("Gemm", ["X", "W1", "b1"], ["H1"], transB=1)
+        relu1 = helper.make_node("Relu", ["H1"], ["A1"])
+        gemm2 = helper.make_node("Gemm", ["A1", "W2", "b2"], ["branch1"], transB=1)
+
+        # Skip connection path - uses X again!
+        gemm_skip = helper.make_node(
+            "Gemm", ["X", "W_skip", "b_skip"], ["branch2"], transB=1
+        )
+
+        # Combine branches (residual add)
+        add = helper.make_node("Add", ["branch1", "branch2"], ["Y"])
+
+        graph = helper.make_graph(
+            [gemm1, relu1, gemm2, gemm_skip, add],
+            "SkipConnectionMLP",
+            [X],
+            [Y],
+            [init_W1, init_b1, init_W2, init_b2, init_W_skip, init_b_skip],
+        )
+
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
+        model.ir_version = 9
+        onnx.checker.check_model(model)
+
+        m = gp.Model()
+        x = m.addMVar((n_in,), lb=-1.0, ub=1.0, name="x")
+        with self.assertRaises(NoModel) as cm:
+            add_predictor_constr(m, model, x)
+
+        # Verify the error message mentions skip connections
+        self.assertIn("skip connection", str(cm.exception).lower())
+
+    def test_residual_connection_rejected(self):
+        # Build a model with residual connection: intermediate value used by multiple nodes
+        n_in, n_hidden, n_out = 4, 8, 2
+
+        W1 = np.random.randn(n_in, n_hidden).astype(np.float32)
+        b1 = np.random.randn(n_hidden).astype(np.float32)
+        W2a = np.random.randn(n_hidden, n_out).astype(np.float32)
+        b2a = np.random.randn(n_out).astype(np.float32)
+        W2b = np.random.randn(n_hidden, n_out).astype(np.float32)
+        b2b = np.random.randn(n_out).astype(np.float32)
+
+        X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, n_in])
+        Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None, n_out])
+
+        init_W1 = helper.make_tensor(
+            "W1", TensorProto.FLOAT, W1.T.shape, W1.T.flatten()
+        )
+        init_b1 = helper.make_tensor("b1", TensorProto.FLOAT, b1.shape, b1)
+        init_W2a = helper.make_tensor(
+            "W2a", TensorProto.FLOAT, W2a.T.shape, W2a.T.flatten()
+        )
+        init_b2a = helper.make_tensor("b2a", TensorProto.FLOAT, b2a.shape, b2a)
+        init_W2b = helper.make_tensor(
+            "W2b", TensorProto.FLOAT, W2b.T.shape, W2b.T.flatten()
+        )
+        init_b2b = helper.make_tensor("b2b", TensorProto.FLOAT, b2b.shape, b2b)
+
+        # Shared layer
+        gemm1 = helper.make_node("Gemm", ["X", "W1", "b1"], ["H1"], transB=1)
+        relu1 = helper.make_node("Relu", ["H1"], ["A1"])
+
+        # Branch 1 - uses A1
+        gemm2a = helper.make_node("Gemm", ["A1", "W2a", "b2a"], ["branch1"], transB=1)
+
+        # Branch 2 - also uses A1!
+        gemm2b = helper.make_node("Gemm", ["A1", "W2b", "b2b"], ["branch2"], transB=1)
+
+        # Combine branches
+        add = helper.make_node("Add", ["branch1", "branch2"], ["Y"])
+
+        graph = helper.make_graph(
+            [gemm1, relu1, gemm2a, gemm2b, add],
+            "ResidualMLP",
+            [X],
+            [Y],
+            [init_W1, init_b1, init_W2a, init_b2a, init_W2b, init_b2b],
+        )
+
+        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
+        model.ir_version = 9
+        onnx.checker.check_model(model)
+
+        m = gp.Model()
+        x = m.addMVar((n_in,), lb=-1.0, ub=1.0, name="x")
+        with self.assertRaises(NoModel) as cm:
+            add_predictor_constr(m, model, x)
+
+        # Verify the error message mentions the architecture issue
+        self.assertIn("non-sequential", str(cm.exception).lower())