huggingface · Flakes342 · Nov 20, 2025 · Nov 20, 2025 · Nov 21, 2025 · Nov 24, 2025
diff --git a/src/transformers/models/blip_2/processing_blip_2.py b/src/transformers/models/blip_2/processing_blip_2.py
@@ -67,7 +67,8 @@ def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):
             tokenizer.add_tokens([self.image_token], special_tokens=True)
         else:
             self.image_token = tokenizer.image_token
-        self.num_query_tokens = num_query_tokens
+        # Default to 32 if missing, matching official BLIP-2 checkpoints
+        self.num_query_tokens = num_query_tokens if num_query_tokens is not None else 32
 
         super().__init__(image_processor, tokenizer)
 
@@ -107,8 +108,9 @@ def __call__(
         return_tensors = output_kwargs["text_kwargs"].pop("return_tensors", None)
         max_length = output_kwargs["text_kwargs"].pop("max_length", None)
         if max_length is not None:
-            output_kwargs["text_kwargs"]["max_length"] = max_length - self.num_query_tokens
-
+            adjusted_max_length = max_length - self.num_query_tokens
+            if adjusted_max_length > 0:
+                output_kwargs["text_kwargs"]["max_length"] = adjusted_max_length
         encoding = BatchFeature(tensor_type=return_tensors)
         if text is not None:
             if isinstance(text, str):

diff --git a/tests/models/blip_2/test_processing_blip_2.py b/tests/models/blip_2/test_processing_blip_2.py
@@ -118,3 +118,16 @@ def test_tokenizer_decode(self):
         decoded_tok = tokenizer.batch_decode(predicted_ids)
 
         self.assertListEqual(decoded_tok, decoded_processor)
+
+    def test_none_num_query_tokens_is_handled(self):
+        image_processor = self.get_image_processor()
+        tokenizer = self.get_tokenizer()
+
+        processor = Blip2Processor(tokenizer=tokenizer, image_processor=image_processor, num_query_tokens=None)
+
+        input_str = "hello world"
+
+        outputs = processor(text=input_str, max_length=20, return_tensors="np")
+        self.assertEqual(processor.num_query_tokens, 32)
+        self.assertIn("input_ids", outputs)
+        self.assertIn("attention_mask", outputs)