Add GPTOSSPredictor to apply correct template format (facebookresearch#82)

mgrange1998 · facebook-github-bot · commit 9f12bf9e9e39 · 2025-11-17T12:58:11.000-08:00
Summary: Pull Request resolved: facebookresearch#82 Differential Revision: D86975066
diff --git a/privacy_guard/attacks/extraction/predictors/gpt_oss_predictor.py b/privacy_guard/attacks/extraction/predictors/gpt_oss_predictor.py
@@ -0,0 +1,151 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pyre-strict
+
+"""
+HuggingFace predictor implementation for GenAI extraction attacks.
+"""
+
+from typing import Any, Dict, List
+
+import torch
+
+import transformers.utils.import_utils
+
+from privacy_guard.attacks.extraction.predictors.huggingface_predictor import (
+    HuggingFacePredictor,
+)
+from transformers.utils.import_utils import (
+    _is_package_available,
+    is_accelerate_available,
+)
+
+
+class GPTOSSPredictor(HuggingFacePredictor):
+    """
+    Inherits from HuggingFacePredictor and updates the generation logic to match
+    GPT OSS expectation.
+
+    Use this predictor for models like "gpt-oss-20b" and "gpt-oss-120b"
+
+    Note: HuggingFacePredictor "get_logits" and "get_logprobs" behavior is
+    not yet tested w/ GPTOSSPredictor
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        device: str | None = None,
+        model_kwargs: Dict[str, Any] | None = None,
+        tokenizer_kwargs: Dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        accelerate_available = self.accelerate_available_workaround()
+        if not accelerate_available:
+            raise ImportError(
+                'Required library "accelerate"  for GPT OSS not available'
+            )
+
+        super().__init__(
+            model_name=model_name,
+            device=device,
+            model_kwargs=model_kwargs,
+            tokenizer_kwargs=tokenizer_kwargs,
+            **kwargs,
+        )
+
+    def accelerate_available_workaround(self) -> bool:
+        """
+        In old transformers versions, availability for the required 'accelerate' package
+        is checked once at import time and the result is saved for all future checks.
+
+        For Meta internal packaging this check returns as false at import time even when
+        the package is available at runtime.
+
+        This is a workaround which updates the saved values in transformers
+        when this class is initialized.
+
+        See the following link to the old transformers code pointer.
+        https://github.com/huggingface/transformers/blob/
+        e95441bdb586a7c3c9b4f61a41e99178c1becf54/src/transformers/utils/import_utils.py#L126
+        """
+        if is_accelerate_available():
+            return True
+
+        _accelerate_available, _accelerate_version = (  # pyre-ignore
+            _is_package_available("accelerate", return_version=True)
+        )
+
+        if _accelerate_available:
+            transformers.utils.import_utils._accelerate_available = (
+                _accelerate_available
+            )
+            transformers.utils.import_utils._accelerate_version = _accelerate_version
+
+            return is_accelerate_available()
+
+        return False
+
+    def preprocess_batch_messages(self, batch: List[str]) -> List[Dict[str, str]]:
+        """
+        Prepare a batch of messages for prediction.
+
+        Differs than parent HuggingfacePredictor in that it returns a list of Dict
+        instead of str, and includes "role" user field.
+        """
+        clean_batch = []
+        for item in batch:
+            if not isinstance(item, str):
+                raise Warning(f"Found non-string item in batch: {type(item)}")
+                clean_batch.append(str(item) if item is not None else "")
+            else:
+                clean_batch.append({"role": "user", "content": item})
+        return clean_batch
+
+    # Override
+    def _generate_process_batch(
+        self, batch: List[str], max_new_tokens: int = 512, **generation_kwargs: Any
+    ) -> List[str]:
+        """Process a single batch of prompts.
+        apply_chat_template is used to apply the harmony response format, required for
+        gpt models to work properly.
+        """
+        clean_batch: List[Dict[str, str]] = self.preprocess_batch_messages(batch)
+
+        # Different than parent HuggingfacePredictor class
+        inputs = self.tokenizer.apply_chat_template(  # pyre-ignore
+            clean_batch,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(self.device)
+        # Everything after is the same as parent
+
+        with torch.no_grad():
+            # Handle both regular models and DDP-wrapped models
+            # TODO: identify which of these paths is utilized for GPT OSS
+            if hasattr(self.model, "module"):
+                outputs = self.model.module.generate(  # pyre-ignore
+                    **inputs, max_new_tokens=max_new_tokens, **generation_kwargs
+                )
+            else:
+                outputs = self.model.generate(  # pyre-ignore
+                    **inputs, max_new_tokens=max_new_tokens, **generation_kwargs
+                )
+
+        batch_results = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+
+        return batch_results
diff --git a/privacy_guard/attacks/extraction/predictors/tests/test_gpt_oss_predictor.py b/privacy_guard/attacks/extraction/predictors/tests/test_gpt_oss_predictor.py
@@ -0,0 +1,174 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pyre-strict
+import unittest
+from unittest.mock import MagicMock, patch
+
+import torch
+from privacy_guard.attacks.extraction.predictors.gpt_oss_predictor import (
+    GPTOSSPredictor,
+)
+
+
+class TestGPTOSSPredictor(unittest.TestCase):
+    def setUp(self) -> None:
+        self.model_name = "test-model"
+        self.device = "cpu"
+        self.vocab_size = 50257
+
+        # Create simple mocks for model and tokenizer
+        self.mock_model = MagicMock(
+            spec=["generate", "config"]
+        )  # Only allow these attributes
+        self.mock_model.config.vocab_size = self.vocab_size
+        self.mock_model.generate.return_value = torch.tensor([[1, 2, 3, 4, 5]])
+
+        self.mock_tokenizer = MagicMock()
+        self.mock_tokenizer.pad_token = None
+        self.mock_tokenizer.eos_token = "<|endoftext|>"
+        self.mock_tokenizer.pad_token_id = 0
+        self.mock_tokenizer.batch_decode.return_value = ["Generated text"]
+
+        with patch.object(
+            GPTOSSPredictor, "accelerate_available_workaround", return_value=True
+        ), patch(
+            "privacy_guard.attacks.extraction.predictors.huggingface_predictor.load_model_and_tokenizer",
+            return_value=(
+                self.mock_model,
+                self.mock_tokenizer,
+            ),
+        ):
+            self.predictor = GPTOSSPredictor(self.model_name, self.device)
+
+    def test_init(self) -> None:
+        """Test predictor initialization."""
+        self.assertEqual(self.predictor.model_name, self.model_name)
+        self.assertEqual(self.predictor.device, self.device)
+
+    def test_generate(self) -> None:
+        """Test generate functionality."""
+
+        # Mock tokenizer responses
+        mock_inputs = MagicMock()
+        mock_inputs.to.return_value = {
+            "input_ids": torch.tensor([[1, 2, 3]]),
+            "attention_mask": torch.tensor([[1, 1, 1]]),
+        }
+        self.mock_tokenizer.return_value = mock_inputs
+        self.mock_tokenizer.batch_decode.return_value = ["Generated text"]
+
+        # Mock the tqdm within the generate method - patch the specific import
+        with patch(
+            "privacy_guard.attacks.extraction.predictors.huggingface_predictor.tqdm"
+        ) as mock_tqdm:
+            mock_tqdm.side_effect = lambda x, **kwargs: x
+            result = self.predictor.generate(["Test prompt"])
+
+        self.assertEqual(result, ["Generated text"])
+        self.mock_model.generate.assert_called_once()
+
+    @patch(
+        "privacy_guard.attacks.extraction.predictors.gpt_oss_predictor.is_accelerate_available"
+    )
+    def test_accelerate_available_workaround_when_initially_true(
+        self, mock_is_accelerate_available: MagicMock
+    ) -> None:
+        """Test accelerate_available_workaround when is_accelerate_available is True initially."""
+
+        # Setup: mock is_accelerate_available to return True
+        mock_is_accelerate_available.return_value = True
+
+        # Execute: call the workaround method
+        # accelerate_available_workaround is called in __init__
+        result = self.predictor.accelerate_available_workaround()
+
+        # Assert: method returns True and only checks is_accelerate_available
+        self.assertTrue(result)
+        mock_is_accelerate_available.assert_called_once()
+
+    @patch(
+        "privacy_guard.attacks.extraction.predictors.gpt_oss_predictor._is_package_available"
+    )
+    @patch(
+        "privacy_guard.attacks.extraction.predictors.gpt_oss_predictor.is_accelerate_available"
+    )
+    def test_accelerate_available_workaround_when_package_available(
+        self,
+        mock_is_accelerate_available: MagicMock,
+        mock_is_package_available: MagicMock,
+    ) -> None:
+        """Test when is_accelerate_available is initially false but _is_package_available returns true."""
+
+        # Setup: mock is_accelerate_available to return False initially, then True after workaround
+        mock_is_accelerate_available.side_effect = [False, True]
+
+        # Setup: mock _is_package_available to return True and a version string
+        mock_is_package_available.return_value = (True, "0.21.0")
+
+        # Execute: call the workaround method
+        result = self.predictor.accelerate_available_workaround()
+
+        # Assert: method returns True after setting the accelerate availability
+        self.assertTrue(result)
+        self.assertEqual(mock_is_accelerate_available.call_count, 2)
+        mock_is_package_available.assert_called_once()
+        # mock_import_utils._is_package_available.assert_called_once_with(
+        #    "accelerate", return_version=True
+        # )
+
+    @patch(
+        "privacy_guard.attacks.extraction.predictors.gpt_oss_predictor._is_package_available"
+    )
+    @patch(
+        "privacy_guard.attacks.extraction.predictors.gpt_oss_predictor.is_accelerate_available"
+    )
+    def test_accelerate_available_workaround_when_both_false(
+        self,
+        mock_is_accelerate_available: MagicMock,
+        mock_is_package_available: MagicMock,
+    ) -> None:
+        """Test when both is_accelerate_available and _is_package_available are false."""
+
+        # Setup: mock is_accelerate_available to return False
+        mock_is_accelerate_available.return_value = False
+
+        # Setup: mock _is_package_available to return False
+        mock_is_package_available.return_value = (False, "N/A")
+
+        # Execute: call the workaround method
+        result = self.predictor.accelerate_available_workaround()
+
+        # Assert: method returns False
+        self.assertFalse(result)
+        mock_is_accelerate_available.assert_called_once()
+        mock_is_package_available.assert_called_once()
+        # mock_import_utils._is_package_available.assert_called_once_with(
+        #    "accelerate", return_version=True
+        # )
+
+    def test_init_fails_when_accelerate_not_available(
+        self,
+    ) -> None:
+        """Test that instantiating GPTOSSPredictor when accelerate is not available
+        raises exception."""
+        with self.assertRaises(ImportError):
+            with patch.object(
+                GPTOSSPredictor, "accelerate_available_workaround", return_value=False
+            ):
+                _ = GPTOSSPredictor(self.model_name, self.device)
+
+
+if __name__ == "__main__":
+    unittest.main()