Change argument name.

Victarry · Victarry · commit 153b6ed9c9ab · 2025-11-17T06:46:58.000-08:00
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
@@ -1129,7 +1129,7 @@ def validate_args(args, defaults={}):
                 assert not args.distrib_optim_fully_reshardable_mem_efficient, \
                     '--distrib-optim-fully-reshardable-mem-efficient requires -enable-gloo-process-groups'
 
-    if args.fake_distributed_group:
+    if args.fake_process_group:
         assert not args.enable_gloo_process_groups, "Fake distributed group requires disabling Gloo process groups."
 
     # Checkpointing
@@ -2836,8 +2836,8 @@ def _add_distributed_args(parser):
                        "and must be consistent across all ranks.")
     group.add_argument('--replication-factor', default=2, type=int,
                        help="Number of machines storing the replica of a given rank's data.")
-    group.add_argument('--fake-distributed-group', action='store_true', default=False,
-                       help='If set, initialize with fake distributed group and all distributed communication operations will be skipped. \
+    group.add_argument('--fake-process-group', action='store_true', default=False,
+                       help='If set, initialize with fake distributed process group and all distributed communication operations will be skipped. \
                        This is quite useful for profiling memory usage of distributed training with just one GPU. \
                        Setting WORLD_SIZE and RANK to the specific values for target distribtued scale.')
     return parser
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
@@ -346,7 +346,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s
             'rank': args.rank,
             'timeout': timedelta(minutes=args.distributed_timeout_minutes),
         }
-        if args.fake_distributed_group:
+        if args.fake_process_group:
             from torch.testing._internal.distributed.fake_pg import FakeStore
             store = FakeStore()
             init_process_group_kwargs['backend'] = 'fake'

Original file line number	Diff line number	Diff line change
`@@ -346,7 +346,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s`
`346`	`346`	`'rank': args.rank,`
`347`	`347`	`'timeout': timedelta(minutes=args.distributed_timeout_minutes),`
`348`	`348`	`}`
`349`		`- if args.fake_distributed_group:`
	`349`	`+ if args.fake_process_group:`
`350`	`350`	`from torch.testing._internal.distributed.fake_pg import FakeStore`
`351`	`351`	`store = FakeStore()`
`352`	`352`	`init_process_group_kwargs['backend'] = 'fake'`