remove autoawq ipex support

LRL2-ModelCloud · LRL2-ModelCloud · commit 549230344f6f · 2025-11-20T11:25:02.000+08:00
diff --git a/src/transformers/integrations/awq.py b/src/transformers/integrations/awq.py
@@ -149,10 +149,6 @@ def replace_with_awq_linear(
                 target_cls = WQLinear_ExllamaV2
             else:
                 raise ValueError(f"Unrecognized Exllama version: {quantization_config.exllama_config['version']}")
-        elif quantization_config.version == AWQLinearVersion.IPEX:
-            from gptqmodel.quantization.awq.modules.linear.gemm_ipex import WQLinear_IPEX
-
-            target_cls = WQLinear_IPEX
         else:
             raise ValueError(f"Unrecognized AWQ version: {quantization_config.version}")
     else:
diff --git a/src/transformers/quantizers/quantizer_awq.py b/src/transformers/quantizers/quantizer_awq.py
@@ -52,47 +52,24 @@ def validate_environment(self, device_map, **kwargs):
         if not is_accelerate_available():
             raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)")
 
-        if (
-            self.quantization_config.version == AWQLinearVersion.GEMM
-            and not torch.cuda.is_available()
-            and not torch.xpu.is_available()
-        ):
-            logger.warning_once("No CUDA or XPU found, consider switching to the IPEX version for CPU-only execution.")
-            self.quantization_config.version = AWQLinearVersion.IPEX
-
-        if self.quantization_config.version == AWQLinearVersion.IPEX:
-            if version.parse(importlib.metadata.version("autoawq")) < version.parse("0.2.6"):
-                raise RuntimeError(
-                    "To use IPEX backend, you need autoawq>0.2.6. Please install the latest version or from source."
-                )
-            if device_map is None:
-                logger.warning_once(
-                    "You have loaded an AWQ model without setting device_map, please set 'cpu' or 'xpu' or 'auto'"
-                )
-            elif isinstance(device_map, dict) and "disk" in device_map.values():
-                raise ValueError(
-                    "You are attempting to load an IPEX version AWQ model with a device_map that contains disk device."
-                    " This is not supported. Please make sure only cpu and xpu in the device_map."
-                )
-        else:
-            if not torch.cuda.is_available() and not torch.xpu.is_available():
-                raise RuntimeError(
-                    "GPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPU"
-                )
+        if not torch.cuda.is_available() and not torch.xpu.is_available():
+            raise RuntimeError(
+                "GPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPU"
+            )
 
-            if device_map is None:
-                logger.warning_once(
-                    "You have loaded an AWQ model on CPU and have a CUDA/XPU device available, make sure to set "
-                    "your model on a GPU device in order to run your model."
+        if device_map is None:
+            logger.warning_once(
+                "You have loaded an AWQ model on CPU and have a CUDA/XPU device available, make sure to set "
+                "your model on a GPU device in order to run your model."
+            )
+        elif device_map is not None:
+            if isinstance(device_map, dict) and any(
+                forbidden in device_map.values() for forbidden in ("cpu", torch.device("cpu"), "disk")
+            ):
+                raise ValueError(
+                    "You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
+                    " This is not supported. Please remove the CPU or disk device from the device_map."
                 )
-            elif device_map is not None:
-                if isinstance(device_map, dict) and any(
-                    forbidden in device_map.values() for forbidden in ("cpu", torch.device("cpu"), "disk")
-                ):
-                    raise ValueError(
-                        "You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
-                        " This is not supported. Please remove the CPU or disk device from the device_map."
-                    )
 
     def update_dtype(self, dtype):
         if dtype is None:
@@ -134,11 +111,6 @@ def _process_model_after_weight_loading(self, model, **kwargs):
 
             model = post_init_awq_exllama_modules(model, self.quantization_config.exllama_config)
 
-        if self.quantization_config.version == AWQLinearVersion.IPEX:
-            from ..integrations import post_init_awq_ipex_modules
-
-            model = post_init_awq_ipex_modules(model)
-
     def is_serializable(self, safe_serialization=None):
         if self.quantization_config.version == AWQLinearVersion.EXLLAMA:
             logger.warning("You cannot save an AWQ model that uses Exllama backend!")
diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py
@@ -70,7 +70,6 @@ class AWQLinearVersion(str, Enum):
     GEMM = "gemm"
     GEMV = "gemv"
     EXLLAMA = "exllama"
-    IPEX = "ipex"
 
     @staticmethod
     def from_str(version: str):
@@ -81,8 +80,6 @@ def from_str(version: str):
             return AWQLinearVersion.GEMV
         elif version == "exllama":
             return AWQLinearVersion.EXLLAMA
-        elif version == "ipex":
-            return AWQLinearVersion.IPEX
         else:
             raise ValueError(f"Unknown AWQLinearVersion {version}")
 
@@ -871,7 +868,6 @@ def post_init(self):
             AWQLinearVersion.GEMM,
             AWQLinearVersion.GEMV,
             AWQLinearVersion.EXLLAMA,
-            AWQLinearVersion.IPEX,
         ]:
             raise ValueError(
                 f"Only supported versions are in [AWQLinearVersion.GEMM, AWQLinearVersion.GEMV, AWQLinearVersion.EXLLAMA, AWQLinearVersion.IPEX] - not recognized version {self.version}"