Code reviews

pobonomo · pobonomo · commit 9fd874b30619 · 2025-10-07T15:22:43.000+02:00
diff --git a/docs/source/user/start.rst b/docs/source/user/start.rst
@@ -159,9 +159,8 @@ in the :doc:`../auto_examples/index` section.
   Variable shapes: For tabular models (scikit-learn, tree ensembles, dense
   neural nets), inputs are typically 2D MVars with shape ``(batch, features)``
   and outputs are 1D or 2D (the package orients a 1D output based on the
-  batch size). For convolutional neural networks (Keras/PyTorch), inputs can be
-  4D MVars with shape ``(batch, H, W, C)`` (channels-last). A 3D input of shape
-  ``(H, W, C)`` is automatically interpreted as a single-batch input.
+  batch size). For convolutional neural networks (Keras/PyTorch), use 4D MVars
+  with shape ``(batch, H, W, C)`` (channels-last).
 
 
 .. rubric:: Footnotes
diff --git a/src/gurobi_ml/modeling/_var_utils.py b/src/gurobi_ml/modeling/_var_utils.py
@@ -308,15 +308,12 @@ def validate_input_vars(model, gp_vars, accepted_dim=(1, 2)):
             return (mv.reshape(1, -1), None, None)
         if mv.ndim in accepted_dim:
             return (mv, None, None)
-        # Try to add a leading batch dimension if that makes it valid
-        if (mv.ndim + 1) in accepted_dim:
-            if mv.ndim == 1:
-                return (mv.reshape(1, -1), None, None)
-            if mv.ndim == 3:
-                return (mv.reshape((1,) + mv.shape), None, None)
+        # Only allow legacy 1D -> 2D promotion; do not auto-batch 3D to 4D.
+        if (mv.ndim + 1) in accepted_dim and mv.ndim == 1:
+            return (mv.reshape(1, -1), None, None)
         raise ParameterError(
-            "Variables should be an MVar of dimension {} and is dimension {}".format(
-                " or ".join([f"{d}" for d in accepted_dim]), mv.ndim
+            "Variables should be an MVar of dimension {}".format(
+                " or ".join([f"{d}" for d in accepted_dim])
             )
         )
 
@@ -349,11 +346,9 @@ def validate_input_vars(model, gp_vars, accepted_dim=(1, 2)):
             return (mv.reshape(1, -1), None, None)
         if mv.ndim in accepted_dim:
             return (mv, None, None)
-        if (mv.ndim + 1) in accepted_dim:
-            if mv.ndim == 1:
-                return (mv.reshape(1, -1), None, None)
-            if mv.ndim == 3:
-                return (mv.reshape((1,) + mv.shape), None, None)
+        # Only allow legacy 1D -> 2D promotion; do not auto-batch 3D to 4D.
+        if (mv.ndim + 1) in accepted_dim and mv.ndim == 1:
+            return (mv.reshape(1, -1), None, None)
         raise ParameterError(
             "Input variables have dimension {} but expected {}".format(
                 mv.ndim, ", ".join(map(str, accepted_dim))
diff --git a/src/gurobi_ml/modeling/neuralnet/layers.py b/src/gurobi_ml/modeling/neuralnet/layers.py
@@ -227,12 +227,6 @@ def _create_output_vars(self, input_vars):
             int(output_shape_1),
             self.channels,
         )
-        print(
-            f"Conv2D layer with input shape {input_vars.shape} gives output shape {output_shape}"
-        )
-        print(
-            f"  kernel size {self.kernel_size}, stride {self.strides}, padding {self.padding}"
-        )
         rval = self.gp_model.addMVar(output_shape, lb=-gp.GRB.INFINITY, name="act")
         self.gp_model.update()
         return rval
@@ -242,7 +236,8 @@ def _mip_model(self, **kwargs):
         model = self.gp_model
         model.update()
 
-        (_, height, width, _) = self.input.shape
+        (_, height, width, in_c) = self.input.shape
+        out_n, out_h, out_w, out_c = self.output.shape
         mixing = self.gp_model.addMVar(
             self.output.shape,
             lb=-gp.GRB.INFINITY,
@@ -254,25 +249,24 @@ def _mip_model(self, **kwargs):
 
         assert self.padding == "valid"
 
-        # Here comes the complicated loop...
-        # I am sure there is a better way but this is a pedestrian version
-        kernel_w, kernel_h = self.kernel_size
-        stride_h, stride_w = self.strides
-        for k in range(self.channels):
-            for out_i, i in enumerate(range(0, height - kernel_h + 1, stride_h)):
-                if i + kernel_h > height:
+        kh, kw = self.kernel_size
+        sh, sw = self.strides
+        # Pre-flatten kernel to (kh*kw*in_c, out_c) for efficient batched matmul
+        coefs_flat = self.coefs.reshape(int(kh * kw * in_c), int(out_c))
+
+        for oi in range(int(out_h)):
+            i = oi * sh
+            if i + kh > height:
+                continue
+            for oj in range(int(out_w)):
+                j = oj * sw
+                if j + kw > width:
                     continue
-                for out_j, j in enumerate(range(0, width - kernel_w + 1, stride_w)):
-                    if j + kernel_w > width:
-                        continue
-                    self.gp_model.addConstr(
-                        mixing[:, out_i, out_j, k]
-                        == (
-                            self.input[:, i : i + kernel_h, j : j + kernel_w, :]
-                            * self.coefs[:, :, :, k]
-                        ).sum()
-                        + self.intercept[k]
-                    )
+                # Extract patch (batch, kh, kw, in_c) and flatten to (batch, kh*kw*in_c)
+                patch = self.input[:, i : i + kh, j : j + kw, :]
+                patch2d = patch.reshape(int(out_n), int(kh * kw * in_c))
+                expr = patch2d @ coefs_flat + self.intercept
+                self.gp_model.addConstr(mixing[:, oi, oj, :] == expr)
 
         if "activation" in kwargs:
             activation = kwargs["activation"]
@@ -313,7 +307,6 @@ def __init__(self, gp_model, output_vars, input_vars, **kwargs):
     def _create_output_vars(self, input_vars):
         assert len(input_vars.shape) >= 2
         output_shape = (input_vars.shape[0], int(np.prod(input_vars.shape[1:])))
-        print(f"Flattening {input_vars.shape} into {output_shape}")
         rval = self.gp_model.addMVar(output_shape, lb=-gp.GRB.INFINITY, name="act")
         self.gp_model.update()
         return rval
@@ -370,9 +363,6 @@ def _create_output_vars(self, input_vars):
         )
         rval = self.gp_model.addMVar(output_shape, lb=-gp.GRB.INFINITY, name="act")
         self.gp_model.update()
-        print(
-            f"MaxPool2D layer with input shape {input_vars.shape} gives output shape {output_shape}"
-        )
         return rval
 
     def _mip_model(self, **kwargs):
diff --git a/src/gurobi_ml/torch/sequential.py b/src/gurobi_ml/torch/sequential.py
@@ -121,10 +121,10 @@ def __init__(self, gp_model, predictor, input_vars, output_vars=None, **kwargs):
                 if isinstance(step, nn.Softmax):
                     raise NoModel(predictor, "Softmax activation is not supported")
                 raise NoModel(predictor, f"Unsupported layer {type(step).__name__}")
-        # Accept both tabular (1D/2D) and spatial (3D/4D NHWC) inputs at the top level.
-        # validate_input_vars will add a batch dimension for 1D/3D inputs as needed.
+        # Accept tabular (1D/2D) and spatial (4D NHWC) inputs at the top level.
+        # 3D inputs are not auto-batched; users should pass 4D for CNNs.
         super().__init__(
-            gp_model, predictor, input_vars, output_vars, accepted_dim=(1, 2, 3, 4)
+            gp_model, predictor, input_vars, output_vars, accepted_dim=(1, 2, 4)
         )
 
     def _mip_model(self, **kwargs):
@@ -169,14 +169,14 @@ def _mip_model(self, **kwargs):
                     )
                     N = H * W * C
                     if layer_weight.shape[0] == N:
-                        pt_index_for_mip = [0] * N
-                        for h in range(H):
-                            for w in range(W):
-                                for c in range(C):
-                                    k_mip = h * (W * C) + w * C + c
-                                    j_pt = c * (H * W) + h * W + w
-                                    pt_index_for_mip[k_mip] = j_pt
-                        layer_weight = layer_weight[np.array(pt_index_for_mip), :]
+                        # Build a vectorized mapping from MIP NHWC row-major order
+                        # (h,w,c) to PyTorch's NCHW flatten order (c,h,w).
+                        # idx has shape (C,H,W) with values 0..N-1 in PyTorch order.
+                        idx = np.arange(N).reshape(C, H, W)
+                        # Transpose to (H,W,C) and ravel to get, for each MIP row k,
+                        # the corresponding PyTorch row j.
+                        pt_index_for_mip = idx.transpose(1, 2, 0).ravel()
+                        layer_weight = layer_weight[pt_index_for_mip, :]
                     pre_flat_spatial_shape = None
                 layer = self._add_dense_layer(
                     _input,