assertion for non-activated experts in MoE (#755)

casper-hansen · web-flow · commit de219874e496 · 2025-05-03T11:29:09.000+02:00
diff --git a/awq/quantize/quantizer.py b/awq/quantize/quantizer.py
@@ -666,8 +666,17 @@ def cache_input_hook(m, x, y, name, feat_dict):
         self.inps = self._module_forward(self.inps, layer, module_kwargs)
         for h in handles:
             h.remove()
+
         # now solve for scaling and clipping
-        input_feat = {k: torch.cat(v, dim=0) for k, v in input_feat.items()}
+        def cat_and_assert(k, v):
+            x = torch.cat(v, dim=0)
+            assert x.shape[0] != 0, (
+                f"{k} has a zero dimension. This can happen if no data was passed through (e.g. an expert in MoE not being activated). "
+                "Try increasing max_calib_samples (warning: this can significantly increase quantization time and memory usage.)"
+            )
+            return x
+
+        input_feat = {k: cat_and_assert(k, v) for k, v in input_feat.items()}
 
         return input_feat