Skip to content
This repository was archived by the owner on May 11, 2025. It is now read-only.

Commit de21987

Browse files
assertion for non-activated experts in MoE (#755)
1 parent f14d0fd commit de21987

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

awq/quantize/quantizer.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,8 +666,17 @@ def cache_input_hook(m, x, y, name, feat_dict):
666666
self.inps = self._module_forward(self.inps, layer, module_kwargs)
667667
for h in handles:
668668
h.remove()
669+
669670
# now solve for scaling and clipping
670-
input_feat = {k: torch.cat(v, dim=0) for k, v in input_feat.items()}
671+
def cat_and_assert(k, v):
672+
x = torch.cat(v, dim=0)
673+
assert x.shape[0] != 0, (
674+
f"{k} has a zero dimension. This can happen if no data was passed through (e.g. an expert in MoE not being activated). "
675+
"Try increasing max_calib_samples (warning: this can significantly increase quantization time and memory usage.)"
676+
)
677+
return x
678+
679+
input_feat = {k: cat_and_assert(k, v) for k, v in input_feat.items()}
671680

672681
return input_feat
673682

0 commit comments

Comments
 (0)