Revert "[Dev] fix(megatron-fsdp): Resolve hang caused by non-deterministic reduce-scatter (#2252)"

ko3n1g · ko3n1g · commit 7b8e39eeb675 · 2025-11-20T23:27:46.000Z
This reverts commit c6e2b29.
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2782,9 +2782,6 @@ def reduce_gradients(
             outer_fsdp_group_grad_reduce (bool, optional): Whether to reduce gradients
                 across outer-DP groups. Defaults to False.
         """
-        # Sort parameters by their bucket IDs to ensure a deterministic processing order.
-        # Performing reduce-scatter operations out of order can lead to hangs.
-        params = sorted(list(params), key=lambda x: self.buffer.param_to_param_group[x])
         for param in params:
             bucket_id = self.buffer.param_to_param_group[param]
             param_group = self.buffer.parameter_groups[bucket_id]