Add util function testing._is_single_token_bitmask (#294)

Ubospica · web-flow · commit d54821091367 · 2025-04-08T05:12:15.000-04:00
This PR adds a function _is_single_token_bitmask to check if the bitmask
only allows one token.
diff --git a/cpp/grammar_matcher.cc b/cpp/grammar_matcher.cc
@@ -66,6 +66,16 @@ void _DebugGetMaskedTokensFromBitmask(
   }
 }
 
+std::pair<bool, int> _IsSingleTokenBitmask(const DLTensor& bitmask, int vocab_size, int index) {
+  int32_t* data_ptr = CheckAndGetBitmaskPtr(bitmask, vocab_size, index);
+  DynamicBitset bitset(vocab_size, reinterpret_cast<uint32_t*>(data_ptr));
+  if (bitset.Count() == 1) {
+    return std::make_pair(true, bitset.FindFirstOne());
+  } else {
+    return std::make_pair(false, -1);
+  }
+}
+
 void ApplyTokenBitmaskInplaceCPU(
     DLTensor* logits,
     const DLTensor& bitmask,
diff --git a/cpp/nanobind/nanobind.cc b/cpp/nanobind/nanobind.cc
@@ -210,7 +210,8 @@ NB_MODULE(xgrammar_bindings, m) {
       )
       .def("_regex_to_ebnf", &RegexToEBNF)
       .def("_ebnf_to_grammar_no_normalization", &_EBNFToGrammarNoNormalization)
-      .def("_get_masked_tokens_from_bitmask", &Matcher_DebugGetMaskedTokensFromBitmask)
+      .def("_get_masked_tokens_from_bitmask", &Testing_DebugGetMaskedTokensFromBitmask)
+      .def("_is_single_token_bitmask", &Testing_IsSingleTokenBitmask)
       .def("_get_allow_empty_rule_ids", &GetAllowEmptyRuleIds)
       .def(
           "_generate_range_regex",
diff --git a/cpp/nanobind/python_methods.cc b/cpp/nanobind/python_methods.cc
@@ -60,7 +60,7 @@ bool GrammarMatcher_FillNextTokenBitmask(
   return matcher.FillNextTokenBitmask(&bitmask_dltensor, index, debug_print);
 }
 
-std::vector<int> Matcher_DebugGetMaskedTokensFromBitmask(
+std::vector<int> Testing_DebugGetMaskedTokensFromBitmask(
     intptr_t token_bitmask_ptr, std::vector<int64_t> shape, int32_t vocab_size, int32_t index
 ) {
   XGRAMMAR_CHECK(shape.size() == 1 || shape.size() == 2) << "token_bitmask tensor must be 1D or 2D";
@@ -80,6 +80,24 @@ std::vector<int> Matcher_DebugGetMaskedTokensFromBitmask(
   return result;
 }
 
+std::pair<bool, int> Testing_IsSingleTokenBitmask(
+    intptr_t token_bitmask_ptr, std::vector<int64_t> shape, int32_t vocab_size, int32_t index
+) {
+  XGRAMMAR_CHECK(shape.size() == 1 || shape.size() == 2) << "token_bitmask tensor must be 1D or 2D";
+
+  DLTensor bitmask_dltensor{
+      reinterpret_cast<void*>(token_bitmask_ptr),
+      DLDevice{kDLCPU, 0},
+      static_cast<int32_t>(shape.size()),
+      GetBitmaskDLType(),
+      shape.data(),
+      nullptr,
+      0
+  };
+
+  return _IsSingleTokenBitmask(bitmask_dltensor, vocab_size, index);
+}
+
 void Kernels_ApplyTokenBitmaskInplaceCPU(
     intptr_t logits_ptr,
     std::pair<int64_t, int64_t> logits_shape,
diff --git a/cpp/nanobind/python_methods.h b/cpp/nanobind/python_methods.h
@@ -35,7 +35,11 @@ bool GrammarMatcher_FillNextTokenBitmask(
     bool debug_print
 );
 
-std::vector<int> Matcher_DebugGetMaskedTokensFromBitmask(
+std::vector<int> Testing_DebugGetMaskedTokensFromBitmask(
+    intptr_t token_bitmask_ptr, std::vector<int64_t> shape, int32_t vocab_size, int32_t index
+);
+
+std::pair<bool, int> Testing_IsSingleTokenBitmask(
     intptr_t token_bitmask_ptr, std::vector<int64_t> shape, int32_t vocab_size, int32_t index
 );
 
diff --git a/cpp/support/dynamic_bitset.h b/cpp/support/dynamic_bitset.h
@@ -134,6 +134,18 @@ class DynamicBitset {
     return *this;
   }
 
+  int FindFirstOne() const { return DoFindOneFrom(0); }
+
+  int FindNextOne(int pos) const {
+    if (pos >= size_ - 1 || size_ == 0) return -1;
+    ++pos;
+    int blk = pos / BITS_PER_BLOCK;
+    int ind = pos % BITS_PER_BLOCK;
+    uint32_t fore = data_[blk] >> ind;
+    int result = fore ? pos + LowestBit(fore) : DoFindOneFrom(blk + 1);
+    return result < size_ ? result : -1;
+  }
+
   int FindFirstZero() const { return DoFindZeroFrom(0); }
 
   int FindNextZero(int pos) const {
@@ -210,6 +222,18 @@ class DynamicBitset {
     return position * BITS_PER_BLOCK + LowestBit(~data_[position]);
   }
 
+  int DoFindOneFrom(int first_block) const {
+    int position = -1;
+    for (int i = first_block; i < buffer_size_; ++i) {
+      if (data_[i] != 0) {
+        position = i;
+        break;
+      }
+    }
+    if (position == -1) return -1;
+    return position * BITS_PER_BLOCK + LowestBit(data_[position]);
+  }
+
   // The size of the bitset.
   int size_;
   // The size of the buffer.
diff --git a/include/xgrammar/matcher.h b/include/xgrammar/matcher.h
@@ -26,6 +26,8 @@ void _DebugGetMaskedTokensFromBitmask(
     std::vector<int>* rejected_tokens, const DLTensor& token_bitmask, int vocab_size, int index = 0
 );
 
+std::pair<bool, int> _IsSingleTokenBitmask(const DLTensor& bitmask, int vocab_size, int index);
+
 void ApplyTokenBitmaskInplaceCPU(
     DLTensor* logits,
     const DLTensor& bitmask,
diff --git a/python/xgrammar/testing.py b/python/xgrammar/testing.py
@@ -175,6 +175,32 @@ def _get_masked_tokens_from_bitmask(
     )
 
 
+def _is_single_token_bitmask(
+    bitmask: torch.Tensor, vocab_size: int, index: int = 0
+) -> Tuple[bool, int]:
+    """Check if the bitmask is a single token bitmask.
+
+    Parameters
+    ----------
+    bitmask : torch.Tensor
+        The bitmask to check. Should be on CPU.
+    vocab_size : int
+        The size of the vocabulary.
+    index : int, default: 0
+        The index of the bitmask.
+
+    Returns
+    -------
+    is_single_token : bool
+        True if the bitmask is a single token bitmask, False otherwise.
+    token_id : int
+        The id of the token if the bitmask is a single token bitmask, -1 otherwise.
+    """
+    return _core.testing._is_single_token_bitmask(
+        bitmask.data_ptr(), list(bitmask.shape), vocab_size, index
+    )
+
+
 def _bool_mask_to_bitmask(bool_mask: torch.Tensor) -> torch.Tensor:
     """Get the bitmask from bool mask. If the bool mask does not align with the 32-bit block
     size, it will add extra 1 paddings.
diff --git a/tests/python/test_token_bitmask_operations.py b/tests/python/test_token_bitmask_operations.py
@@ -8,7 +8,11 @@
 import torch
 
 import xgrammar as xgr
-from xgrammar.testing import _bool_mask_to_bitmask, _get_masked_tokens_from_bitmask
+from xgrammar.testing import (
+    _bool_mask_to_bitmask,
+    _get_masked_tokens_from_bitmask,
+    _is_single_token_bitmask,
+)
 
 _is_cuda_available = torch.cuda.is_available()
 _is_mps_available = torch.backends.mps.is_available()
@@ -38,6 +42,27 @@ def test_get_masked_tokens_from_bitmask(token_mask_size: int, index: int):
     assert _get_masked_tokens_from_bitmask(bitmask, token_mask_size, index) == expected
 
 
+def test_is_single_token_bitmask():
+    batch = 2
+    batch_index = 1
+    vocab_size = 1024
+    token_id = 100
+
+    bool_mask = torch.zeros(batch, vocab_size, dtype=torch.bool)
+    bitmask = _bool_mask_to_bitmask(bool_mask)
+    assert _is_single_token_bitmask(bitmask, vocab_size, batch_index) == (False, -1)
+    bool_mask[batch_index, token_id] = True
+    bitmask = _bool_mask_to_bitmask(bool_mask)
+    assert _is_single_token_bitmask(bitmask, vocab_size, batch_index) == (True, token_id)
+    bool_mask[batch_index, token_id + 1] = True
+    bitmask = _bool_mask_to_bitmask(bool_mask)
+    assert _is_single_token_bitmask(bitmask, vocab_size, batch_index) == (False, -1)
+
+
+test_is_single_token_bitmask()
+exit()
+
+
 @pytest.mark.parametrize("device", ("cpu", "cuda"))
 def test_apply_token_bitmask_inplace(device: str):
     if device == "cuda" and not _is_cuda_available: