From 1db62f41a2e3b89faf26a7a53d5296ace6af7db3 Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Wed, 19 Nov 2025 09:11:21 +0000
Subject: [PATCH] Fix formatting and typos in modifiers.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 src/llmcompressor/modifiers/autoround/base.py | 58 ++++++++-------
 src/llmcompressor/modifiers/awq/base.py       | 40 +++++------
 .../modifiers/pruning/sparsegpt/base.py       | 38 +++++-----
 .../modifiers/pruning/wanda/base.py           | 36 +++++-----
 .../modifiers/quantization/gptq/base.py       | 72 ++++++++++---------
 .../quantization/gptq/gptq_quantize.py        |  2 +-
 .../quantization/quantization/base.py         |  2 +-
 .../quantization/quantization/mixin.py        | 38 +++++-----
 .../modifiers/transform/quip/base.py          | 23 +++---
 .../modifiers/transform/spinquant/base.py     | 27 +++----
 .../modifiers/transform/spinquant/mappings.py |  2 +-
 11 files changed, 179 insertions(+), 159 deletions(-)

diff --git a/src/llmcompressor/modifiers/autoround/base.py b/src/llmcompressor/modifiers/autoround/base.py
index 2480751a9b..881e593eb8 100644
--- a/src/llmcompressor/modifiers/autoround/base.py
+++ b/src/llmcompressor/modifiers/autoround/base.py
@@ -62,35 +62,39 @@ class AutoRoundModifier(Modifier, QuantizationMixin):
     This modifier leverages signed gradient descent (SignSGD) optimizer and
     block-wise loss to optimize rounding values and weight clipping in a few steps.
 
-    | Sample yaml:
-    | test_stage:
-    |    modifiers:
-    |      AutoRoundModifier:
-    |          iters: 200
-    |          config_groups:
-    |            group_0:
-    |                targets:
-    |                  - "Linear"
-    |                input_activations: null
-    |                output_activations: null
-    |                weights:
-    |                    num_bits: 4
-    |                    type: "int"
-    |                    symmetric: true
-    |                    strategy: group
-    |                    group_size: 128
+    Sample yaml:
+
+    ```yaml
+    test_stage:
+      modifiers:
+        AutoRoundModifier:
+          iters: 200
+          config_groups:
+            group_0:
+              targets:
+                - "Linear"
+              input_activations: null
+              output_activations: null
+              weights:
+                num_bits: 4
+                type: "int"
+                symmetric: true
+                strategy: group
+                group_size: 128
+    ```
 
     Lifecycle:
-        - on_initialize
-            - apply config to model
-        - on_start
-            - add input capture hooks to decoding layers
-        - on_sequential_epoch_end
-            - apply_autoround
-            - post_autoround_cleanup
-        - on_finalize
-            - remove_hooks()
-            - model.apply(freeze_module_quantization)
+
+    - on_initialize
+        - apply config to model
+    - on_start
+        - add input capture hooks to decoding layers
+    - on_sequential_epoch_end
+        - apply_autoround
+        - post_autoround_cleanup
+    - on_finalize
+        - remove_hooks()
+        - model.apply(freeze_module_quantization)
 
     :param config_groups: dictionary specifying quantization schemes to apply to target
         modules. Modules not matching a scheme target will NOT be quantized.
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
index 98e53b4e00..dc35a5c02f 100644
--- a/src/llmcompressor/modifiers/awq/base.py
+++ b/src/llmcompressor/modifiers/awq/base.py
@@ -58,7 +58,6 @@ class AWQModifier(Modifier, QuantizationMixin):
           balance_layers: ["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"]
         - smooth_layer: "re:.*final_layer_norm"
           balance_layers: ["re:.*fc1"]
-      ]
       ignore: ["lm_head"]
       config_groups:
         group_0:
@@ -75,25 +74,26 @@ class AWQModifier(Modifier, QuantizationMixin):
     ```
 
     Lifecycle:
-        - on_initialize
-            - resolve mappings
-            - capture kwargs needed for forward passes into modules
-        - on_start
-            - set up activation cache hooks to capture input activations
-                to balance layers
-        - on sequential epoch end
-            - apply smoothing to each smoothing layer
-                - consume cached activations across all batches
-                    - clear cached activations as they are used
-                - find best smoothing scale for each smoothing layer
-                - apply to model weights
-                - raise error if any unused activations remain
-        - on_end
-            - re-run logic of sequential epoch end (in case of basic pipeline)
-            - set scales and zero points
-            - remove activation hooks
-        - on_finalize
-            - clear resolved mappings and captured activations
+
+    - on_initialize
+        - resolve mappings
+        - capture kwargs needed for forward passes into modules
+    - on_start
+        - set up activation cache hooks to capture input activations
+            to balance layers
+    - on sequential epoch end
+        - apply smoothing to each smoothing layer
+            - consume cached activations across all batches
+                - clear cached activations as they are used
+            - find best smoothing scale for each smoothing layer
+            - apply to model weights
+            - raise error if any unused activations remain
+    - on_end
+        - re-run logic of sequential epoch end (in case of basic pipeline)
+        - set scales and zero points
+        - remove activation hooks
+    - on_finalize
+        - clear resolved mappings and captured activations
 
     :param sequential_targets: list of module names to compress in
         the same calibration pass
diff --git a/src/llmcompressor/modifiers/pruning/sparsegpt/base.py b/src/llmcompressor/modifiers/pruning/sparsegpt/base.py
index 0845586602..b3739f0d5f 100644
--- a/src/llmcompressor/modifiers/pruning/sparsegpt/base.py
+++ b/src/llmcompressor/modifiers/pruning/sparsegpt/base.py
@@ -26,24 +26,28 @@ class SparseGPTModifier(SparsityModifierBase):
     """
     Modifier for applying the one-shot SparseGPT algorithm to a model
 
-    | Sample yaml:
-    |   test_stage:
-    |       obcq_modifiers:
-    |           SparseGPTModifier:
-    |               sparsity: 0.5
-    |               mask_structure: "2:4"
-    |               dampening_frac: 0.001
-    |               block_size: 128
-    |               targets: ['Linear']
-    |               ignore: ['re:.*lm_head']
+    Sample yaml:
+
+    ```yaml
+    test_stage:
+      obcq_modifiers:
+        SparseGPTModifier:
+          sparsity: 0.5
+          mask_structure: "2:4"
+          dampening_frac: 0.001
+          block_size: 128
+          targets: ['Linear']
+          ignore: ['re:.*lm_head']
+    ```
 
     Lifecycle:
-        - on_initialize
-            - register_hook(module, calibrate_module, "forward")
-        - on_sequential_batch_end
-            - sparsify_weight
-        - on_finalize
-            - remove_hooks()
+
+    - on_initialize
+        - register_hook(module, calibrate_module, "forward")
+    - on_sequential_batch_end
+        - sparsify_weight
+    - on_finalize
+        - remove_hooks()
 
     :param sparsity: Sparsity to compress model to
     :param sparsity_profile: Can be set to 'owl' to use Outlier Weighed
@@ -92,7 +96,7 @@ def calibrate_module(
 
         :param module: module being calibrated
         :param args: inputs to the module, the first element of which is the
-            cannonical input
+            canonical input
         :param _output: uncompressed module output, unused
         """
         # Assume that the first argument is the input
diff --git a/src/llmcompressor/modifiers/pruning/wanda/base.py b/src/llmcompressor/modifiers/pruning/wanda/base.py
index 67eb616889..2599ccdd94 100644
--- a/src/llmcompressor/modifiers/pruning/wanda/base.py
+++ b/src/llmcompressor/modifiers/pruning/wanda/base.py
@@ -26,23 +26,27 @@ class WandaPruningModifier(SparsityModifierBase):
     Modifier for applying the one-shot WANDA algorithm to a model
     from the paper: https://arxiv.org/abs/2306.11695
 
-    | Sample yaml:
-    |   test_stage:
-    |       sparsity_modifiers:
-    |           WandaPruningModifier:
-    |               sparsity: 0.5
-    |               mask_structure: "2:4"
+    Sample yaml:
+
+    ```yaml
+    test_stage:
+      sparsity_modifiers:
+        WandaPruningModifier:
+          sparsity: 0.5
+          mask_structure: "2:4"
+    ```
 
     Lifecycle:
-        - on_initialize
-            - register_hook(module, calibrate_module, "forward")
-            - run_sequential / run_basic
-                - make_empty_row_scalars
-                - accumulate_row_scalars
-        - on_sequential_batch_end
-            - sparsify_weight
-        - on_finalize
-            - remove_hooks()
+
+    - on_initialize
+        - register_hook(module, calibrate_module, "forward")
+        - run_sequential / run_basic
+            - make_empty_row_scalars
+            - accumulate_row_scalars
+    - on_sequential_batch_end
+        - sparsify_weight
+    - on_finalize
+        - remove_hooks()
 
     :param sparsity: Sparsity to compress model to
     :param sparsity_profile: Can be set to 'owl' to use Outlier Weighed
@@ -78,7 +82,7 @@ def calibrate_module(
 
         :param module: module being calibrated
         :param args: inputs to the module, the first element of which is the
-            cannonical input
+            canonical input
         :param _output: uncompressed module output, unused
         """
         # Assume that the first argument is the input
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
index 385de9840a..ab23e4fad3 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/base.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -36,40 +36,44 @@ class GPTQModifier(Modifier, QuantizationMixin):
     """
     Implements the GPTQ algorithm from https://arxiv.org/abs/2210.17323. This modifier
     uses activations to calibrate a hessian matrix, which is then used to determine
-    optimal quantizion values and orderings for the model weights.
-
-    | Sample yaml:
-    | test_stage:
-    |    obcq_modifiers:
-    |      GPTQModifier:
-    |          block_size: 128
-    |          dampening_frac: 0.001
-    |          offload_hessians: False
-    |          actorder: static
-    |          config_groups:
-    |            group_0:
-    |                targets:
-    |                  - "Linear"
-    |                input_activations: null
-    |                output_activations: null
-    |                weights:
-    |                    num_bits: 8
-    |                    type: "int"
-    |                    symmetric: true
-    |                    strategy: group
-    |                    group_size: 128
+    optimal quantization values and orderings for the model weights.
+
+    Sample yaml:
+
+    ```yaml
+    test_stage:
+      obcq_modifiers:
+        GPTQModifier:
+          block_size: 128
+          dampening_frac: 0.001
+          offload_hessians: False
+          actorder: static
+          config_groups:
+            group_0:
+              targets:
+                - "Linear"
+              input_activations: null
+              output_activations: null
+              weights:
+                num_bits: 8
+                type: "int"
+                symmetric: true
+                strategy: group
+                group_size: 128
+    ```
 
     Lifecycle:
-        - on_initialize
-            - apply config to model
-        - on_start
-            - add activation calibration hooks
-            - add gptq weight calibration hooks
-        - on_sequential_epoch_end
-            - quantize_weight
-        - on_finalize
-            - remove_hooks()
-            - model.apply(freeze_module_quantization)
+
+    - on_initialize
+        - apply config to model
+    - on_start
+        - add activation calibration hooks
+        - add gptq weight calibration hooks
+    - on_sequential_epoch_end
+        - quantize_weight
+    - on_finalize
+        - remove_hooks()
+        - model.apply(freeze_module_quantization)
 
     :param sequential_targets: list of layer names to compress during GPTQ, or
         '__ALL__' to compress every layer in the model
@@ -99,7 +103,7 @@ class GPTQModifier(Modifier, QuantizationMixin):
         the kv_cache_scheme gets converted into a QuantizationScheme that:
             - targets the `q_proj` and `k_proj` modules of the model. The outputs
               of those modules are the keys and values that might be cached
-            - quantizes the outputs of the aformentioned layers, so that
+            - quantizes the outputs of the aforementioned layers, so that
               keys and values are compressed before storing them in the cache
         There is an explicit assumption that the model contains modules with
         `k_proj` and `v_proj` in their names. If this is not the case
@@ -220,7 +224,7 @@ def calibrate_module(
 
         :param module: module being calibrated
         :param args: inputs to the module, the first element of which is the
-            cannonical input
+            canonical input
         :param _output: uncompressed module output, unused
         """
         # Assume that first argument is the input
diff --git a/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py b/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
index b621fdb801..af145df2dc 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
@@ -286,7 +286,7 @@ def _apply_activation_ordering(
     W: torch.Tensor, H: torch.Tensor
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """
-    Permute weight and hessian in order of greatest outupt activations
+    Permute weight and hessian in order of greatest output activations
 
     :param W: weight to permute
     :param H: hessian used to determine activation ordering
diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
index 1330d16adf..c81a8ba75e 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/base.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -37,7 +37,7 @@ class QuantizationModifier(Modifier, QuantizationMixin):
         the kv_cache_scheme gets converted into a QuantizationScheme that:
             - targets the `q_proj` and `k_proj` modules of the model. The outputs
               of those modules are the keys and values that might be cached
-            - quantizes the outputs of the aformentioned layers, so that
+            - quantizes the outputs of the aforementioned layers, so that
               keys and values are compressed before storing them in the cache
         There is an explicit assumption that the model contains modules with
         `k_proj` and `v_proj` in their names. If this is not the case
diff --git a/src/llmcompressor/modifiers/quantization/quantization/mixin.py b/src/llmcompressor/modifiers/quantization/quantization/mixin.py
index 42264af22e..caf4ae496c 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/mixin.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/mixin.py
@@ -43,26 +43,28 @@
 
 class QuantizationMixin(HooksMixin):
     """
-    Mixin which enables a Modifier to act as a quantization config, attching observers,
+    Mixin which enables a Modifier to act as a quantization config, attaching observers,
     calibration hooks, and compression wrappers to modifiers
 
     Lifecycle:
-        - on_initialize: QuantizationMixin.initialize_quantization
-            - Attach schemes to modules
-            - Attach observers to modules
-            - Disable quantization until calibration starts/finishes
-        - on_start: QuantizationMixin.start_calibration
-            - Attach calibration hooks
-            - Apply calibration status
-            - Enable quantization during calibration
-        - on_end: QuantizationMixin.end_calibration
-            - Remove calibration hooks
-            - Apply freeze status
-            - Keep quantization enabled for future steps
-        NOTE: QuantizationMixin does not update scales and zero-points on its own,
-          as this is not desired for all Modifiers inheriting from it. Modifier must
-          explicitly call `update_weight_zp_scale`.
-          See QuantizationModifier.on_start method for example
+
+    - on_initialize: QuantizationMixin.initialize_quantization
+        - Attach schemes to modules
+        - Attach observers to modules
+        - Disable quantization until calibration starts/finishes
+    - on_start: QuantizationMixin.start_calibration
+        - Attach calibration hooks
+        - Apply calibration status
+        - Enable quantization during calibration
+    - on_end: QuantizationMixin.end_calibration
+        - Remove calibration hooks
+        - Apply freeze status
+        - Keep quantization enabled for future steps
+
+    NOTE: QuantizationMixin does not update scales and zero-points on its own,
+        as this is not desired for all Modifiers inheriting from it. Modifier must
+        explicitly call `update_weight_zp_scale`.
+        See QuantizationModifier.on_start method for example
 
     :param config_groups: dictionary specifying quantization schemes to apply to target
         modules. Modules not matching a scheme target will NOT be quantized.
@@ -85,7 +87,7 @@ class QuantizationMixin(HooksMixin):
         the kv_cache_scheme gets converted into a QuantizationScheme that:
             - targets the `q_proj` and `k_proj` modules of the model. The outputs
               of those modules are the keys and values that might be cached
-            - quantizes the outputs of the aformentioned layers, so that
+            - quantizes the outputs of the aforementioned layers, so that
               keys and values are compressed before storing them in the cache
         There is an explicit assumption that the model contains modules with
         `k_proj` and `v_proj` in their names. If this is not the case
diff --git a/src/llmcompressor/modifiers/transform/quip/base.py b/src/llmcompressor/modifiers/transform/quip/base.py
index ace8d64fd4..12b2259a3f 100644
--- a/src/llmcompressor/modifiers/transform/quip/base.py
+++ b/src/llmcompressor/modifiers/transform/quip/base.py
@@ -34,15 +34,16 @@ class QuIPModifier(Modifier):
     the model weights and two of which remain as online rotations computed at runtime.
 
     Lifecycle:
-        - on_initialize
-            - as needed, create transform schemes for V (input) and U (output)
-        - on_start
-            - apply TransformConfig
-                - fuse transforms into weights for mergeable transforms
-                - add hooks for online transforms
-        - on sequential epoch end
-        - on_end
-        - on_finalize
+
+    - on_initialize
+        - as needed, create transform schemes for V (input) and U (output)
+    - on_start
+        - apply TransformConfig
+            - fuse transforms into weights for mergeable transforms
+            - add hooks for online transforms
+    - on sequential epoch end
+    - on_end
+    - on_finalize
 
     :param rotations: which rotation schemes to apply to the model. Including `"v"` will
         rotate the input side of weights, and including `"u"` will rotate the output
@@ -152,7 +153,7 @@ def _create_v_scheme(self) -> TransformScheme:
             apply=[
                 TransformArgs(
                     targets=self.targets,
-                    location="input",  # non-mergable
+                    location="input",  # non-mergeable
                     ignore=self.ignore,
                 ),
                 TransformArgs(
@@ -179,7 +180,7 @@ def _create_u_scheme(self) -> TransformScheme:
                 ),
                 TransformArgs(
                     targets=self.targets,
-                    location="output",  # non-mergable
+                    location="output",  # non-mergeable
                     inverse=True,
                     ignore=self.ignore,
                 ),
diff --git a/src/llmcompressor/modifiers/transform/spinquant/base.py b/src/llmcompressor/modifiers/transform/spinquant/base.py
index 8d84e860f2..e18359be4a 100644
--- a/src/llmcompressor/modifiers/transform/spinquant/base.py
+++ b/src/llmcompressor/modifiers/transform/spinquant/base.py
@@ -37,7 +37,7 @@ class SpinQuantModifier(Modifier, use_enum_values=True):
     with learned rotations" (https://arxiv.org/abs/2405.16406)
 
     Transforms (rotations) are extra layers added to a model which reduce the accuracy
-    loss induced by quantization. This is achived through "rotating" weights and
+    loss induced by quantization. This is achieved through "rotating" weights and
     activations into a space with a smaller dynamic range of values, thus decreasing
     the range of scales required for quantization.
 
@@ -47,18 +47,19 @@ class SpinQuantModifier(Modifier, use_enum_values=True):
     rotations, meaning that they require additional computation at runtime.
 
     Lifecycle:
-        - on_initialize
-            - infer SpinQuantMappings & NormMappings
-            - as needed, create transform schemes for R1, R2, R3, & R4
-        - on_start
-            - normalize embeddings
-            - fuse norm layers into subsequent Linear layers
-            - apply TransformConfig
-                - fuse transforms into weights for mergeable transforms
-                - add hooks for online transforms
-        - on sequential epoch end
-        - on_end
-        - on_finalize
+
+    - on_initialize
+        - infer SpinQuantMappings & NormMappings
+        - as needed, create transform schemes for R1, R2, R3, & R4
+    - on_start
+        - normalize embeddings
+        - fuse norm layers into subsequent Linear layers
+        - apply TransformConfig
+            - fuse transforms into weights for mergeable transforms
+            - add hooks for online transforms
+    - on sequential epoch end
+    - on_end
+    - on_finalize
 
     :param rotations: A list containing the names of rotations to apply to the model.
         Possible rotations include R1, R2, R3, and R4
diff --git a/src/llmcompressor/modifiers/transform/spinquant/mappings.py b/src/llmcompressor/modifiers/transform/spinquant/mappings.py
index 85c2f0c0f3..da3d76f6c1 100644
--- a/src/llmcompressor/modifiers/transform/spinquant/mappings.py
+++ b/src/llmcompressor/modifiers/transform/spinquant/mappings.py
@@ -25,7 +25,7 @@ class SpinQuantMapping(BaseModel):
     :param mlp_in: list of names or regexes for the mlp blocks that
         receive the input to the MLP block, usually up_proj and gate_proj
     :param mlp_out: list of names or regexes for the mlp blocks that
-        consitute the output of the MLP block, usually down_proj
+        constitute the output of the MLP block, usually down_proj
     """
 
     embedding: str