@@ -1584,7 +1584,7 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig]):
15841584 # affected by CP. For other layers, CP ranks are repurposed to TP. This shall be undone
15851585 # at the end of __init__.
15861586 if model_config .mapping .cp_size > 1 :
1587- logger . info (
1587+ print (
15881588 f"[DeepseekV3ForCausalLM::__init__] Repurposing KVP ranks to TP while keeping other details the same."
15891589 )
15901590 self .mapping_with_cp = copy .deepcopy (model_config .mapping )
@@ -1603,7 +1603,6 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig]):
16031603 tp_size = original_tp_size * original_cp_size ,
16041604 pp_size = model_config .mapping .pp_size ,
16051605 moe_ep_size = model_config .mapping .moe_ep_size ,
1606- auto_parallel = model_config .mapping .auto_parallel ,
16071606 enable_attention_dp = model_config .mapping .enable_attention_dp )
16081607 model_config ._frozen = True
16091608 ###############################################################################
@@ -1658,7 +1657,7 @@ def __init__(self, model_config: ModelConfig[PretrainedConfig]):
16581657 ###############################################################################
16591658 # Undo any manipulations done to mapping.
16601659 if self .mapping_with_cp is not None :
1661- logger . info (
1660+ print (
16621661 f"[DeepseekV3ForCausalLM::__init__] Restoring original mapping."
16631662 )
16641663 model_config ._frozen = False
0 commit comments