[Bugfix] Refactor QuantizationMixin to use resolved config (#1912)

brian-dellabetta · web-flow · commit 7194d4b969a0 · 2025-10-16T18:58:37.000Z
SUMMARY: Fixes #1906 This refactors QuantizationMixin to not update any pydantic fields during validation. Rather than modifying them in order to make them the source of truth, this adds properties `resolved_config` & `resolved_targets` that all modifiers should instead use as source of truth. These are resolved once, when needed, and not serialized, which should fix the bug in #1906 TEST PLAN: Added a `test_resolved_targets` unit test --------- Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -268,7 +268,7 @@ def on_end(self, state: State, event: Event, **kwargs):
         self.ended_ = True
 
         for _, module in tqdm(
-            match_named_modules(state.model, self.targets, self.ignore),
+            match_named_modules(state.model, self.resolved_targets, self.ignore),
             desc="Calibrating weights",
         ):
             update_weight_zp_scale(module)
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -162,7 +162,9 @@ def on_initialize(self, state: State, **kwargs) -> bool:
         # prepare module names
         self._module_names = {
             m: name
-            for name, m in match_named_modules(state.model, self.targets, self.ignore)
+            for name, m in match_named_modules(
+                state.model, self.resolved_targets, self.ignore
+            )
         }
 
         return True
@@ -176,7 +178,9 @@ def on_start(self, state: State, event: Event, **kwargs):
 
         # register gptq hooks
         added_hook = False
-        for _, module in match_named_modules(state.model, self.targets, self.ignore):
+        for _, module in match_named_modules(
+            state.model, self.resolved_targets, self.ignore
+        ):
             if getattr_chain(module, "quantization_scheme.weights", None) is not None:
                 # HACK: previously, embeddings were not quantized because they were not
                 # accessible by the layer compressor. For now, we manually ignore it,
diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -71,7 +71,7 @@ def on_start(self, state: State, event: Event, **kwargs):
         QuantizationMixin.start_calibration(self, state.model)
 
         named_modules = list(
-            match_named_modules(state.model, self.targets, self.ignore)
+            match_named_modules(state.model, self.resolved_targets, self.ignore)
         )
         # TODO: this step can be combined with update_weight_zp_scale
         # once update_fused_layer_weight_global_scales is removed
diff --git a/src/llmcompressor/modifiers/quantization/quantization/mixin.py b/src/llmcompressor/modifiers/quantization/quantization/mixin.py
@@ -15,7 +15,7 @@
     preset_name_to_scheme,
 )
 from compressed_tensors.utils import match_named_modules
-from pydantic import Field, PrivateAttr, field_validator, model_validator
+from pydantic import Field, PrivateAttr, field_validator
 from torch.utils.hooks import RemovableHandle
 
 from llmcompressor.modifiers.quantization.calibration import (
@@ -62,6 +62,9 @@ class QuantizationMixin(HooksMixin):
     :param targets: list of layer names to quantize if a scheme is provided. If unset,
         will contain all targets listed in config_groups. If config_groups is also
         unset, will default to ["Linear"] (i.e. all Linear layers will be targeted).
+        This field is not the source of truth for finding all matching target layers
+        in a model. Additional information can be stored in `config_groups`. Use
+        self.resolved_targets instead.
     :param ignore: optional list of module class names or submodule names to not
         quantize even if they match a target in config_groups. Defaults to empty list.
     :param scheme: a single quantization scheme to apply to the model. This is a
@@ -83,12 +86,16 @@ class QuantizationMixin(HooksMixin):
     """
 
     config_groups: Optional[Dict[str, QuantizationScheme]] = None
-    targets: Union[str, List[str]] = Field(default_factory=list)
+    # NOTE: targets is not the sole source of truth for finding all matching target
+    # layers in a model. Additional information can be stored in `config_groups`
+    # Use self.resolved_targets as source of truth.
+    targets: Union[str, List[str]] = Field(default_factory=lambda: ["Linear"])
     ignore: List[str] = Field(default_factory=list)
     scheme: Optional[Union[str, Dict[str, Any]]] = None
     kv_cache_scheme: Optional[QuantizationArgs] = None
 
     _calibration_hooks: Set[RemovableHandle] = PrivateAttr(default_factory=set)
+    _resolved_config: Optional[QuantizationConfig] = PrivateAttr(None)
 
     @field_validator("targets", mode="before")
     def validate_targets(cls, value: Union[str, List[str]]) -> List[str]:
@@ -116,27 +123,29 @@ def validate_scheme(
 
         return value
 
-    @model_validator(mode="after")
-    def validate_model_after(model: "QuantizationMixin") -> "QuantizationMixin":
+    @property
+    def resolved_config(self) -> QuantizationConfig:
         """
-        - If targets have not been set, aggregate targets from config_groups
-          into a single unique list
-        - If targets have still not been found, default to targets=["Linear"]
+        Quantization config needs to be resolved just once based on
+        scheme and config_groups inputs.
         """
+        if self._resolved_config is None:
+            self._resolved_config = self.resolve_quantization_config()
+        return self._resolved_config
 
-        if len(model.targets) > 0 and model.config_groups is not None:
-            raise ValueError("Please specify either `targets` or `config_groups`")
-
-        if len(model.targets) == 0 and model.config_groups is not None:
-            for config_group in model.config_groups.values():
-                for target in config_group.targets:
-                    if target not in model.targets:
-                        model.targets.append(target)
-
-        if len(model.targets) == 0:
-            model.targets.append("Linear")
-
-        return model
+    @property
+    def resolved_targets(self) -> Set[str]:
+        """
+        Set of all resolved targets, i.e. all unique targets listed
+        in resolved quantization config.
+        Use this property instead of the targets field, as targets can
+        also come from config_groups depending on how recipe is configured.
+        """
+        targets = set()
+        for config_group in self.resolved_config.config_groups.values():
+            for target in config_group.targets:
+                targets.add(target)
+        return targets
 
     def initialize_quantization(self, model: torch.nn.Module):
         """
@@ -145,13 +154,11 @@ def initialize_quantization(self, model: torch.nn.Module):
 
         :param model: model to attach schemes and observers to
         """
-        # apply scheme and status to model
-        config = self.resolve_quantization_config()
 
-        for _, module in match_named_modules(model, self.targets, self.ignore):
+        for _, module in match_named_modules(model, self.resolved_targets, self.ignore):
             reset_quantization_status(module)  # reset any previously applied qconfigs
 
-        apply_quantization_config(model, config)
+        apply_quantization_config(model, self.resolved_config)
 
         # disable quantization until calibration
         model.apply(disable_quantization)
@@ -164,7 +171,7 @@ def start_calibration(self, model: torch.nn.Module):
         :param model: model to prepare for calibration
         """
         self._calibration_hooks = self._initialize_hooks(model)
-        for _, module in match_named_modules(model, self.targets, self.ignore):
+        for _, module in match_named_modules(model, self.resolved_targets, self.ignore):
             self._initialize_observers(module)
             apply_calibration_status(module)
 
@@ -178,7 +185,7 @@ def end_calibration(self, model: torch.nn.Module):
         :param model: model to end calibration for
         """
         self.remove_hooks(self._calibration_hooks)
-        for _, module in match_named_modules(model, self.targets, self.ignore):
+        for _, module in match_named_modules(model, self.resolved_targets, self.ignore):
             freeze_module_quantization(module)  # remove observers
 
         model.apply(enable_quantization)  # keep quantization enabled
@@ -270,7 +277,7 @@ def _initialize_observers(self, module: torch.nn.Module):
 
     def _initialize_hooks(self, model: torch.nn.Module) -> Set[RemovableHandle]:
         hooks = set()
-        for _, module in match_named_modules(model, self.targets, self.ignore):
+        for _, module in match_named_modules(model, self.resolved_targets, self.ignore):
             if not hasattr(module, "quantization_scheme"):
                 continue
 
diff --git a/tests/llmcompressor/modifiers/quantization/test_base.py b/tests/llmcompressor/modifiers/quantization/test_base.py
@@ -159,3 +159,55 @@ def test_serialize_actorder(has_actorder, actorder, exp_actorder):
         modifier = GPTQModifier(targets=["Linear"], scheme="W8A8")
 
     assert modifier.model_dump()["actorder"] == exp_actorder
+
+
+@pytest.mark.parametrize(
+    "scheme,targets,config_groups,resolved_targets,should_error",
+    [
+        ("W4A16", ["Linear"], None, {"Linear"}, False),
+        (
+            "W4A16",
+            [r"re:.*q_proj$", r"re:.*k_proj$"],
+            None,
+            {r"re:.*q_proj$", r"re:.*k_proj$"},
+            False,
+        ),
+        (
+            None,
+            ["Linear"],
+            dict(
+                group_0=dict(
+                    targets=[r"re:.*q_proj$"],
+                ),
+                group_1=dict(
+                    targets=[r"re:.*k_proj$"],
+                ),
+            ),
+            {r"re:.*q_proj$", r"re:.*k_proj$"},
+            False,
+        ),
+        (
+            "W4AA16",
+            ["Linear"],
+            dict(
+                group_0=dict(
+                    targets=[r"re:.*q_proj$"],
+                ),
+            ),
+            {},
+            True,
+        ),
+    ],
+)
+def test_resolved_targets(
+    scheme, targets, config_groups, should_error, resolved_targets
+):
+    if should_error:
+        with pytest.raises(ValueError):
+            GPTQModifier(targets=targets, scheme=scheme, config_groups=config_groups)
+    else:
+        modifier = GPTQModifier(
+            targets=targets, scheme=scheme, config_groups=config_groups
+        )
+
+        assert modifier.resolved_targets == resolved_targets

Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ def on_start(self, state: State, event: Event, **kwargs):`
`71`	`71`	`QuantizationMixin.start_calibration(self, state.model)`
`72`	`72`
`73`	`73`	`named_modules = list(`
`74`		`- match_named_modules(state.model, self.targets, self.ignore)`
	`74`	`+ match_named_modules(state.model, self.resolved_targets, self.ignore)`
`75`	`75`	`)`
`76`	`76`	`# TODO: this step can be combined with update_weight_zp_scale`
`77`	`77`	`# once update_fused_layer_weight_global_scales is removed`