Skip to content

Commit 0337f20

Browse files
committed
minor updates
Signed-off-by: Robin Zhang <[email protected]>
1 parent ecf173d commit 0337f20

File tree

7 files changed

+14
-18
lines changed

7 files changed

+14
-18
lines changed

megatron/core/transformer/cuda_graphs.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,25 +1731,25 @@ def cuda_graph_set_manual_hooks(self):
17311731
for layer in layers:
17321732
layer.setup_manual_hooks(model_chunk._make_forward_pre_hook)
17331733

1734-
def destroy_cudagraphs(self):
1734+
def delete_cuda_graphs(self):
17351735
"""
17361736
Destroy CUDA Graphs.
17371737
"""
17381738
assert self._graphs_created, "CUDA Graphs have not been created."
1739-
graphs_destroyed, graphs_not_destroyed = 0, 0
1739+
graphs_deleted, graphs_not_deleted = 0, 0
17401740
for _, layers in enumerate(self.callables_per_chunk):
17411741
for layer in layers:
17421742
for graph in layer.cuda_graphs:
17431743
if is_te_min_version("2.10.0"):
17441744
graph.reset()
1745-
graphs_destroyed += 1
1745+
graphs_deleted += 1
17461746
else:
1747-
graphs_not_destroyed += 1
1747+
graphs_not_deleted += 1
17481748
layer.cuda_graphs = []
17491749
layer.cuda_graph_manual_hooks = []
17501750
log_single_rank(
17511751
logger,
17521752
logging.INFO,
1753-
f'{graphs_destroyed} graphs destroyed, {graphs_not_destroyed} graphs not destroyed.',
1753+
f'{graphs_deleted} graphs deleted, {graphs_not_deleted} graphs not deleted.',
17541754
)
17551755
self._graphs_created = False

megatron/core/transformer/moe/fused_a2a.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
except ImportError:
1313
HAVE_DEEP_EP = False
1414

15-
HAVE_HYBRIDEP = False
16-
1715
import torch
1816

1917
_buffer = None

megatron/core/transformer/moe/token_dispatcher.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@
3737
from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
3838
from megatron.core.transformer.transformer_config import TransformerConfig
3939

40-
logger = logging.getLogger(__name__)
41-
4240
""" We use the following notation throughout this file:
4341
H: hidden size
4442
B: micro batch size
@@ -1369,6 +1367,7 @@ def __init__(
13691367
num_experts=self.tp_size * self.config.num_moe_experts,
13701368
config=self.config,
13711369
)
1370+
self.cudagraph_attrs = ['_comm_manager.token_probs', '_comm_manager.routing_map']
13721371
else:
13731372
raise ValueError(
13741373
f"Invalid backend: {self.config.moe_flex_dispatcher_backend}"

megatron/core/transformer/transformer_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22

33
import warnings
4-
from dataclasses import dataclass
4+
from dataclasses import dataclass, field
55
from typing import Callable, List, Literal, Optional, Tuple, Union
66

77
import torch
@@ -664,7 +664,7 @@ class TransformerConfig(ModelParallelConfig):
664664
excluding optimizer) is enabled.
665665
"transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""
666666

667-
cuda_graph_scope: Optional[List[CudaGraphScope]] = None
667+
cuda_graph_scope: List[CudaGraphScope] = field(default_factory=list)
668668
"""Determines the CUDA graphs capturing scope.
669669
When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
670670
"moe_router", "moe_preprocess", "mamba". None means the full layer.

megatron/training/training.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2513,7 +2513,7 @@ def get_e2e_base_metrics():
25132513

25142514
# Destroy CUDA Graphs.
25152515
if args.cuda_graph_impl == "transformer_engine" and cuda_graph_helper.graphs_created():
2516-
cuda_graph_helper.destroy_cudagraphs()
2516+
cuda_graph_helper.delete_cuda_graphs()
25172517

25182518
one_logger_utils.track_e2e_metrics()
25192519

tests/unit_tests/inference/engines/test_dynamic_engine.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,9 @@ class DynamicEngineTestConfig:
106106
return_log_probs: bool = False
107107
materialize_only_last_token_logits: bool = True
108108
skip_prompt_log_probs: bool = False
109-
cuda_graph_scope: List[CudaGraphScope] = None
109+
cuda_graph_scope: List[CudaGraphScope] = field(
110+
default_factory=lambda: [CudaGraphScope.full_iteration]
111+
)
110112
force_build_cuda_graphs: bool = False
111113
transformer_impl: str = "local"
112114
# If False, do not build cuda graphs in the tests, even if
@@ -130,9 +132,6 @@ def __post_init__(self):
130132
assert self.num_tokens_total is not None
131133
self.max_sequence_length = self.num_tokens_total
132134

133-
if self.cuda_graph_scope is None:
134-
self.cuda_graph_scope = [CudaGraphScope.full_iteration]
135-
136135

137136
@dataclass
138137
class DynamicEngineTestEnv:

tests/unit_tests/transformer/test_cuda_graphs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ def teardown_method(self, method):
558558
destroy_global_vars()
559559
destroy_num_microbatches_calculator()
560560
if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
561-
self.cuda_graph_helper.destroy_cudagraphs()
561+
self.cuda_graph_helper.delete_cuda_graphs()
562562
self.cuda_graph_helper = None
563563
gc.collect()
564564

@@ -742,7 +742,7 @@ def _run_test_helper(
742742
loss_list.append(loss.item())
743743

744744
if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
745-
self.cuda_graph_helper.destroy_cudagraphs()
745+
self.cuda_graph_helper.delete_cuda_graphs()
746746
self.cuda_graph_helper = None
747747

748748
return torch.tensor(loss_list)

0 commit comments

Comments
 (0)