We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 716bb4a commit 7b8e39eCopy full SHA for 7b8e39e
megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2782,9 +2782,6 @@ def reduce_gradients(
2782
outer_fsdp_group_grad_reduce (bool, optional): Whether to reduce gradients
2783
across outer-DP groups. Defaults to False.
2784
"""
2785
- # Sort parameters by their bucket IDs to ensure a deterministic processing order.
2786
- # Performing reduce-scatter operations out of order can lead to hangs.
2787
- params = sorted(list(params), key=lambda x: self.buffer.param_to_param_group[x])
2788
for param in params:
2789
bucket_id = self.buffer.param_to_param_group[param]
2790
param_group = self.buffer.parameter_groups[bucket_id]
0 commit comments