Apply DimensionInputInfo to SAM sample

yizhuoz004 · yizhuoz004 · commit 5c954364fd89 · 2025-07-22T12:16:24.000-07:00
diff --git a/tripy/examples/segment-anything-model-v2/sam2/build_sam.py b/tripy/examples/segment-anything-model-v2/sam2/build_sam.py
@@ -81,8 +81,7 @@ def get_component_configs(model, cfg):
                     (seq_len, mem_attention_batch, 64),
                     getattr(tp, model_precision),
                 ),
-                # TODO (#594): Remove this hack once we are able to pass in DimensionSizes directly:
-                tp.InputInfo(((4, 16, 64),), tp.int32),
+                tp.DimensionInputInfo(value_bounds=(4, 16, 64)),
             ],
             "skip_dtype_convert": [],
         },
diff --git a/tripy/examples/segment-anything-model-v2/sam2/modeling/memory_attention.py b/tripy/examples/segment-anything-model-v2/sam2/modeling/memory_attention.py
@@ -186,10 +186,8 @@ def forward(
         memory: tp.Tensor,  # cross-attention inputs
         curr_pos: Optional[tp.Tensor] = None,  # pos_enc for self-attention inputs
         memory_pos: Optional[tp.Tensor] = None,  # pos_enc for cross-attention inputs
-        num_obj_ptr_tokens: Optional[tp.Tensor] = None,  # number of object pointer *tokens*
+        num_obj_ptr_tokens: Optional[tp.DimensionSize] = None,  # number of object pointer *tokens*
     ):
-        # TODO (#594): Remove this hack once we are able to pass in DimensionSizes directly:
-        num_obj_ptr_tokens = num_obj_ptr_tokens.shape[0]
         output = curr
         if self.pos_enc_at_input and curr_pos is not None:
             output = output + 0.1 * curr_pos
diff --git a/tripy/examples/segment-anything-model-v2/sam2/modeling/sam2_base.py b/tripy/examples/segment-anything-model-v2/sam2/modeling/sam2_base.py
@@ -242,8 +242,6 @@ def _build_sam_heads(self):
         else:
             self.obj_ptr_tpos_proj = torch.nn.Identity()
 
-        self.fake_object_ptrs = torch.ones((1,), dtype=torch.int32, device="cuda")
-
     def _forward_sam_heads(
         self,
         backbone_features,
@@ -667,14 +665,12 @@ def _prepare_memory_conditioned_features(
         memory = torch.cat(to_cat_memory, dim=0)
         memory_pos_embed = torch.cat(to_cat_memory_pos_embed, dim=0)
         if isinstance(self.memory_attention, tp.Module) or isinstance(self.memory_attention, tp.Executable):
-            if self.fake_object_ptrs.shape != (num_obj_ptr_tokens,):
-                self.fake_object_ptrs = torch.ones((num_obj_ptr_tokens,), dtype=torch.int32, device="cuda")
             pix_feat_with_mem = self.memory_attention(
                 curr=tp.Tensor(current_vision_feats[0].half().contiguous()),
                 memory=tp.Tensor(memory.half().contiguous()),
                 curr_pos=tp.Tensor(current_vision_pos_embeds[0].half().contiguous()),
                 memory_pos=tp.Tensor(memory_pos_embed.half().contiguous()),
-                num_obj_ptr_tokens=tp.Tensor(self.fake_object_ptrs),
+                num_obj_ptr_tokens=tp.DimensionSize(num_obj_ptr_tokens),
             )
         else:
             pix_feat_with_mem = self.memory_attention(