Skip to content

Commit 90792b7

Browse files
committed
Revert "Fix GPT-OSS swiglu_limit not passed in for MXFP4 #40197"
The cherry-picked commit does not match the changes nor the PR This reverts commit e75d67e.
1 parent a03df6a commit 90792b7

File tree

1 file changed

+1
-10
lines changed

1 file changed

+1
-10
lines changed

src/transformers/quantizers/quantizer_mxfp4.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,7 @@ def validate_environment(self, *args, **kwargs):
6161
return
6262

6363
if not torch.cuda.is_available():
64-
raise RuntimeError("Using MXFP4 quantized models requires a GPU")
65-
66-
if not is_accelerate_available():
67-
raise ImportError("Using mxfp4 requires Accelerate: `pip install accelerate`")
68-
69-
compute_capability = torch.cuda.get_device_capability()
70-
major, minor = compute_capability
71-
72-
if not is_triton_available("3.4.0") or not is_triton_kernels_availalble():
73-
if self.pre_quantized and not self.quantization_config.dequantize:
64+
if self.pre_quantized:
7465
logger.warning_once(
7566
"Using MXFP4 quantized models requires a GPU, we will default to dequantizing the model to bf16"
7667
)

0 commit comments

Comments
 (0)