wip

yf225 · yf225 · commit 37daf60d2730 · 2025-11-13T20:07:50.000-08:00
diff --git a/helion/_compiler/indexing_strategy.py b/helion/_compiler/indexing_strategy.py
@@ -614,15 +614,17 @@ def compute_shape(
                 else:
                     output_size.append(1)
                 k_index += 1
-            elif isinstance(k, torch.Tensor) and (
-                k.ndim == 1 or (len(index) == 1 and tensor.ndim == 1)
-            ):
+            elif isinstance(k, torch.Tensor):
+                # Handle tensor indexing (both 1D and multi-dimensional)
                 input_size.popleft()
+                # Add all dimensions of the indexing tensor to output
                 output_size.extend(k.size())
                 k_index += 1
             else:
                 raise exc.InvalidIndexingType(k)
-        assert len(input_size) == 0, "invalid subscript"
+        # Advanced indexing might not consume all dimensions
+        # Add any remaining dimensions from the input
+        output_size.extend(input_size)
         return output_size
 
     @staticmethod
diff --git a/helion/_compiler/type_propagation.py b/helion/_compiler/type_propagation.py
@@ -456,6 +456,7 @@ def _device_indexing_size(self, key: TypeInfo) -> list[int | torch.SymInt]:
         inputs_consumed = 0
         output_sizes = []
         env = CompileEnvironment.current()
+
         for k in keys:
             if isinstance(k, LiteralType):
                 if isinstance(k.value, (int, torch.SymInt)):
@@ -501,19 +502,33 @@ def _device_indexing_size(self, key: TypeInfo) -> list[int | torch.SymInt]:
                 raise exc.DataDependentOutputShapeNotSupported(
                     op_desc="Boolean mask indexing (tensor[boolean_mask])"
                 )
-            elif isinstance(k, TensorType) and k.fake_value.ndim == 1:
+            elif isinstance(k, TensorType):
+                # Handle tensor indexing (both 1D and multi-dimensional)
+                # For advanced indexing, multiple tensor indices are broadcast together
+                # and the first one determines the output dimensions
                 inputs_consumed += 1
-                output_sizes.append(k.fake_value.size(0))
+                # Add all dimensions of the tensor for multi-dimensional indexing
+                for dim in range(k.fake_value.ndim):
+                    output_sizes.append(k.fake_value.size(dim))
             elif k.contains_type(TileIndexType):
                 raise exc.OverpackedTile(k)
             else:
                 raise exc.InvalidIndexingType(k)
-        if inputs_consumed != self.fake_value.ndim:
+        # Advanced indexing with tensors can consume fewer dimensions than the tensor has
+        # Only check for consuming too many dimensions
+        if inputs_consumed > self.fake_value.ndim:
             raise exc.RankMismatch(
                 self.fake_value.ndim,
                 inputs_consumed,
-                f"tensor shape: {tuple(self.fake_value.shape)}",
+                f"tensor shape: {tuple(self.fake_value.shape)}, consumed {inputs_consumed} dimensions",
             )
+
+        # Add any remaining dimensions from the original tensor
+        # This handles cases like tensor[idx] where tensor is multi-dimensional
+        # and idx is a tensor that only indexes the first dimension
+        for dim in range(inputs_consumed, self.fake_value.ndim):
+            output_sizes.append(self.fake_value.size(dim))
+
         return output_sizes
 
     def propagate_setitem(