Skip to content

Commit 6ef5901

Browse files
committed
save intermediate changes
1 parent 21b585b commit 6ef5901

File tree

6 files changed

+10
-10
lines changed

6 files changed

+10
-10
lines changed

cpp/tensorrt_llm/thop/attentionOp.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,9 @@ class Runner : public RunnerBase
181181
[[maybe_unused]] MlaParams<T> mla_params;
182182
if (op.isMLAEnabled())
183183
{
184-
TORCH_CHECK(mla_tensor_params.size() == 1,
185-
"Expecting 1 tensor for custom MLA tensor params: helix_position_offsets.");
184+
std::cerr << "[attentionOp::run] mla_tensor_params.size() = " << mla_tensor_params.size() << std::endl;
185+
TORCH_CHECK(mla_tensor_params.size() == 2,
186+
"Expecting 2 tensor for custom MLA tensor params: helix_position_offsets and helix_inactive_rank.");
186187
if (is_context && op.mUseSparseAttention)
187188
{
188189
if (latent_cache.has_value())

tensorrt_llm/_torch/attention_backend/trtllm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,6 @@ def forward(
13821382
softmax_stats_tensor: Optional[torch.Tensor] = None,
13831383
helix_position_offsets: Optional[torch.Tensor] = None,
13841384
enable_attn_nvfp4_output: bool = True,
1385-
helix_position_offsets: Optional[torch.Tensor] = None,
13861385
output: Optional[torch.Tensor] = None,
13871386
output_sf: Optional[torch.Tensor] = None,
13881387
attention_sinks: Optional[torch.Tensor] = None,

tensorrt_llm/_torch/distributed/communicator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import math
23
import pickle # nosec B403
34
from abc import ABC, abstractmethod
@@ -359,8 +360,8 @@ def __init__(self, mapping: Mapping):
359360
moe_cluster_size=self.mapping.moe_cluster_size,
360361
moe_tp_size=self.mapping.moe_tp_size,
361362
moe_ep_size=self.mapping.moe_ep_size,
362-
attn_tp_size=self.mapping.attn_tp_size,
363-
attn_cp_size=self.mapping.attn_cp_size,
363+
# attn_tp_size=self.mapping.attn_tp_size,
364+
# attn_cp_size=self.mapping.attn_cp_size,
364365
enable_attention_dp=self.mapping.enable_attention_dp,
365366
enable_lm_head_tp_in_adp=self.mapping.enable_lm_head_tp_in_adp)
366367
self.mapping = mapping_without_helix

tensorrt_llm/_torch/models/modeling_deepseekv3.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,7 +1584,7 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig]):
15841584
# affected by CP. For other layers, CP ranks are repurposed to TP. This shall be undone
15851585
# at the end of __init__.
15861586
if model_config.mapping.cp_size > 1:
1587-
logger.info(
1587+
print(
15881588
f"[DeepseekV3ForCausalLM::__init__] Repurposing KVP ranks to TP while keeping other details the same."
15891589
)
15901590
self.mapping_with_cp = copy.deepcopy(model_config.mapping)
@@ -1603,7 +1603,6 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig]):
16031603
tp_size=original_tp_size * original_cp_size,
16041604
pp_size=model_config.mapping.pp_size,
16051605
moe_ep_size=model_config.mapping.moe_ep_size,
1606-
auto_parallel=model_config.mapping.auto_parallel,
16071606
enable_attention_dp=model_config.mapping.enable_attention_dp)
16081607
model_config._frozen = True
16091608
###############################################################################
@@ -1658,7 +1657,7 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig]):
16581657
###############################################################################
16591658
# Undo any manipulations done to mapping.
16601659
if self.mapping_with_cp is not None:
1661-
logger.info(
1660+
print(
16621661
f"[DeepseekV3ForCausalLM::__init__] Restoring original mapping."
16631662
)
16641663
model_config._frozen = False

tensorrt_llm/_torch/modules/attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from tensorrt_llm._utils import (get_sm_version, is_sm_100f, nvtx_range,
99
nvtx_range_debug)
1010
from tensorrt_llm.logger import logger
11-
from tensorrt_llm.mapping import Mapping
11+
from tensorrt_llm.mapping import Mapping, CpType
1212

1313
from ..attention_backend import (AttentionInputType, AttentionMetadata,
1414
FlashInferAttentionMetadata, TrtllmAttention,

tensorrt_llm/mapping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def __init__(
6767
#################################################################
6868
# TODO: Remove this hardcoding.
6969
if cp_size > 1:
70-
assert cp_type == CpType.HELIX
70+
cp_config = {"cp_type": CpType.HELIX}
7171
#################################################################
7272
moe_world_size = tp_size if cp_type == CpType.ULYSSES else tp_size * cp_size
7373

0 commit comments

Comments
 (0)