Skip to content

Commit 6b7197c

Browse files
committed
add compatibility toggle moe_router_padding_for_fp8 back
Signed-off-by: Zhongbo Zhu <[email protected]>
1 parent 9b8be4b commit 6b7197c

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

megatron/core/transformer/transformer_config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,10 @@ class TransformerConfig(ModelParallelConfig):
468468
is a multiple of 16/32 for quantized precision (e.g., FP8, FP4). This can remove the explicit
469469
padding in the GroupedMLP layer."""
470470

471+
moe_router_padding_for_fp8: Optional[bool] = False
472+
"""[Compatibility alias for moe_router_padding_for_quantization]
473+
Enabling this will also enable moe_router_padding_for_quantization."""
474+
471475
moe_router_num_groups: Optional[int] = None
472476
"""Number of groups to divide experts into for group-limited routing.
473477
When using group-limited routing:
@@ -1306,6 +1310,13 @@ def __post_init__(self):
13061310
f"but your version is {get_te_version()}."
13071311
)
13081312

1313+
if self.moe_router_padding_for_fp8:
1314+
# enable moe_router_padding_for_quantization
1315+
warnings.warn(
1316+
"--moe-router-padding-for-fp8 is going to be deprecated. Use --moe-router-padding-for-quantization instead."
1317+
)
1318+
self.moe_router_padding_for_quantization = True
1319+
13091320
if self.moe_router_padding_for_quantization:
13101321
if self.fp8 is None and self.fp4 is None:
13111322
raise ValueError("fp8/fp4 must be specified when moe_router_padding_for_quantization is True.")

megatron/training/arguments.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,6 +3066,9 @@ def _add_moe_args(parser):
30663066
'dropless training with FP8/FP4 precision when num_local_experts > 1. This is a more '
30673067
'efficient way to pad for FP8/FP4 which eliminates the explicit padding in the '
30683068
'GroupedMLP layer.')
3069+
group.add_argument('--moe-router-padding-for-fp8', action='store_true',
3070+
help='[Compatibility alias for --moe-router-padding-for-quantization] '
3071+
'Enabling this will also enable --moe-router-padding-for-quantization.')
30693072
group.add_argument('--moe-aux-loss-coeff', type=float, nargs='+', default=0.0,
30703073
help='Scaling coefficient for the aux loss: a starting value of 1e-2 is recommended.')
30713074
group.add_argument('--moe-z-loss-coeff', type=float, default=None,

0 commit comments

Comments
 (0)