@@ -52,47 +52,24 @@ def validate_environment(self, device_map, **kwargs):
5252 if not is_accelerate_available ():
5353 raise ImportError ("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)" )
5454
55- if (
56- self .quantization_config .version == AWQLinearVersion .GEMM
57- and not torch .cuda .is_available ()
58- and not torch .xpu .is_available ()
59- ):
60- logger .warning_once ("No CUDA or XPU found, consider switching to the IPEX version for CPU-only execution." )
61- self .quantization_config .version = AWQLinearVersion .IPEX
62-
63- if self .quantization_config .version == AWQLinearVersion .IPEX :
64- if version .parse (importlib .metadata .version ("autoawq" )) < version .parse ("0.2.6" ):
65- raise RuntimeError (
66- "To use IPEX backend, you need autoawq>0.2.6. Please install the latest version or from source."
67- )
68- if device_map is None :
69- logger .warning_once (
70- "You have loaded an AWQ model without setting device_map, please set 'cpu' or 'xpu' or 'auto'"
71- )
72- elif isinstance (device_map , dict ) and "disk" in device_map .values ():
73- raise ValueError (
74- "You are attempting to load an IPEX version AWQ model with a device_map that contains disk device."
75- " This is not supported. Please make sure only cpu and xpu in the device_map."
76- )
77- else :
78- if not torch .cuda .is_available () and not torch .xpu .is_available ():
79- raise RuntimeError (
80- "GPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPU"
81- )
55+ if not torch .cuda .is_available () and not torch .xpu .is_available ():
56+ raise RuntimeError (
57+ "GPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPU"
58+ )
8259
83- if device_map is None :
84- logger .warning_once (
85- "You have loaded an AWQ model on CPU and have a CUDA/XPU device available, make sure to set "
86- "your model on a GPU device in order to run your model."
60+ if device_map is None :
61+ logger .warning_once (
62+ "You have loaded an AWQ model on CPU and have a CUDA/XPU device available, make sure to set "
63+ "your model on a GPU device in order to run your model."
64+ )
65+ elif device_map is not None :
66+ if isinstance (device_map , dict ) and any (
67+ forbidden in device_map .values () for forbidden in ("cpu" , torch .device ("cpu" ), "disk" )
68+ ):
69+ raise ValueError (
70+ "You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
71+ " This is not supported. Please remove the CPU or disk device from the device_map."
8772 )
88- elif device_map is not None :
89- if isinstance (device_map , dict ) and any (
90- forbidden in device_map .values () for forbidden in ("cpu" , torch .device ("cpu" ), "disk" )
91- ):
92- raise ValueError (
93- "You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
94- " This is not supported. Please remove the CPU or disk device from the device_map."
95- )
9673
9774 def update_dtype (self , dtype ):
9875 if dtype is None :
@@ -134,11 +111,6 @@ def _process_model_after_weight_loading(self, model, **kwargs):
134111
135112 model = post_init_awq_exllama_modules (model , self .quantization_config .exllama_config )
136113
137- if self .quantization_config .version == AWQLinearVersion .IPEX :
138- from ..integrations import post_init_awq_ipex_modules
139-
140- model = post_init_awq_ipex_modules (model )
141-
142114 def is_serializable (self , safe_serialization = None ):
143115 if self .quantization_config .version == AWQLinearVersion .EXLLAMA :
144116 logger .warning ("You cannot save an AWQ model that uses Exllama backend!" )
0 commit comments