GenerationAttack and HuggingFace predictor updates

mgrange1998 · facebook-github-bot · commit fe4df992ba1b · 2025-11-18T10:10:44.000-08:00
Summary:
This change updates GenerationAttack and HuggingFace predictor with the following functionality. 

- Fixes "batch_size=self.batch_size" s.t it is propagated downstream properly. Updated the default value to 1 to reflect intended behavior
- Adds "_generate_decode_logic" helper to encapsulate calling "model.generate"
- Extends generation functionality to not include prompt when decoding

Differential Revision: D87341640
diff --git a/privacy_guard/attacks/extraction/generation_attack.py b/privacy_guard/attacks/extraction/generation_attack.py
@@ -91,7 +91,7 @@ def __init__(
         input_column: str = "prompt",
         target_column: str = "target",
         output_column: str = "prediction",
-        batch_size: int = 4,
+        batch_size: int = 1,
         **generation_kwargs: Any,
     ) -> None:
         if output_file is None and output_format is not None:
@@ -133,6 +133,7 @@ def run_attack(self) -> TextInclusionAnalysisInput:
         logger.info(f"Generating text for {len(prompts)} prompts")
         generations = self.predictor.generate(
             prompts=prompts,
+            batch_size=self.batch_size,
             **self.generation_kwargs,
         )
 
diff --git a/privacy_guard/attacks/extraction/predictors/huggingface_predictor.py b/privacy_guard/attacks/extraction/predictors/huggingface_predictor.py
@@ -36,6 +36,7 @@ def __init__(
         device: str | None = None,
         model_kwargs: Dict[str, Any] | None = None,
         tokenizer_kwargs: Dict[str, Any] | None = None,
+        include_prompt_in_generation_result: bool = True,
         **kwargs: Any,
     ) -> None:
         self.model_name: str = model_name
@@ -51,6 +52,7 @@ def __init__(
         self.tokenizer_kwargs: Dict[str, Any] = tokenizer_kwargs or {}
         self.model: PreTrainedModel
         self.tokenizer: PreTrainedTokenizer
+        self.include_prompt_in_generation_result = include_prompt_in_generation_result
         # Model already loaded on device - now pass the kwargs
         self.model, self.tokenizer = load_model_and_tokenizer(
             model_name,
@@ -73,15 +75,16 @@ def preprocess_batch(self, batch: List[str]) -> List[str]:
                 clean_batch.append(item)
         return clean_batch
 
-    def _generate_process_batch(
-        self, batch: List[str], max_new_tokens: int = 512, **generation_kwargs: Any
+    def _generate_decode_logic(
+        self,
+        inputs: Dict[str, Any],
+        max_new_tokens: int = 512,
+        **generation_kwargs: Any,
     ) -> List[str]:
-        """Process a single batch of prompts."""
-        clean_batch = self.preprocess_batch(batch)
-
-        inputs = self.tokenizer(
-            clean_batch, return_tensors="pt", padding=True, truncation=True
-        ).to(self.device)
+        """Calls the correct generate call based on the model type.
+        Supports logic for returning only the generated text, or the full sample including
+        the prompt."""
+        include_prompt_in_generation_result = self.include_prompt_in_generation_result
 
         with torch.no_grad():
             # Handle both regular models and DDP-wrapped models
@@ -94,10 +97,35 @@ def _generate_process_batch(
                     **inputs, max_new_tokens=max_new_tokens, **generation_kwargs
                 )
 
-        batch_results = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        if include_prompt_in_generation_result:
+            batch_results = self.tokenizer.batch_decode(
+                outputs, skip_special_tokens=True
+            )
+        else:
+            trimmed_outputs = []
+            for output, input_val in zip(outputs, inputs["input_ids"]):
+                trimmed_outputs.append(output[len(input_val) :])
+
+            batch_results = self.tokenizer.batch_decode(
+                trimmed_outputs, skip_special_tokens=True
+            )
 
         return batch_results
 
+    def _generate_process_batch(
+        self, batch: List[str], max_new_tokens: int = 512, **generation_kwargs: Any
+    ) -> List[str]:
+        """Process a single batch of prompts."""
+        clean_batch = self.preprocess_batch(batch)
+
+        inputs = self.tokenizer(
+            clean_batch, return_tensors="pt", padding=True, truncation=True
+        ).to(self.device)
+
+        return self._generate_decode_logic(
+            inputs=inputs, max_new_tokens=max_new_tokens, **generation_kwargs
+        )
+
     def generate(self, prompts: List[str], **generation_kwargs: Any) -> List[str]:
         """Generate text continuations for given prompts."""
         if not prompts:
diff --git a/privacy_guard/attacks/extraction/predictors/tests/test_huggingface_predictor.py b/privacy_guard/attacks/extraction/predictors/tests/test_huggingface_predictor.py
@@ -90,6 +90,43 @@ def test_generate(self, mock_load_model_and_tokenizer: MagicMock) -> None:
         self.assertEqual(result, ["Generated text"])
         self.mock_model.generate.assert_called_once()
 
+    @patch(
+        "privacy_guard.attacks.extraction.predictors.huggingface_predictor.load_model_and_tokenizer"
+    )
+    def test_generate_no_prompt_in_result(
+        self, mock_load_model_and_tokenizer: MagicMock
+    ) -> None:
+        """Test generate functionality."""
+        mock_load_model_and_tokenizer.return_value = (
+            self.mock_model,
+            self.mock_tokenizer,
+        )
+
+        # Mock tokenizer responses
+        mock_inputs = MagicMock()
+        mock_inputs.to.return_value = {
+            "input_ids": torch.tensor([[1, 2, 3]]),
+            "attention_mask": torch.tensor([[1, 1, 1]]),
+        }
+        self.mock_tokenizer.return_value = mock_inputs
+        self.mock_tokenizer.batch_decode.return_value = [
+            "Generated text without prompt"
+        ]
+
+        predictor = HuggingFacePredictor(
+            self.model_name, self.device, include_prompt_in_generation_result=False
+        )
+
+        # Mock the tqdm within the generate method - patch the specific import
+        with patch(
+            "privacy_guard.attacks.extraction.predictors.huggingface_predictor.tqdm"
+        ) as mock_tqdm:
+            mock_tqdm.side_effect = lambda x, **kwargs: x
+            result = predictor.generate(["Test prompt"])
+
+        self.assertEqual(result, ["Generated text without prompt"])
+        self.mock_model.generate.assert_called_once()
+
     @patch(
         "privacy_guard.attacks.extraction.predictors.huggingface_predictor.load_model_and_tokenizer"
     )