We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 5730fac + 10120db commit 855b40eCopy full SHA for 855b40e
launcher_scripts/conf/training/nemotron/nemotron_340b.yaml
@@ -135,7 +135,6 @@ model:
135
defer_embedding_wgrad_compute: True
136
wgrad_deferral_limit: 22
137
cross_entropy_loss_fusion: True
138
- enable_vboost: True
139
ub_tp_comm_overlap: True
140
apply_rope_fusion: True
141
deteministic_mode: False
@@ -161,7 +160,6 @@ model:
161
160
fp8_amax_history_len: 1024 # Number of steps for which amax history is recorded per tensor
162
fp8_amax_compute_algo: max # 'most_recent' or 'max'. Algorithm for computing amax from history
163
fp8_wgrad: True
164
- ub_tp_comm_overlap: False
165
166
optim:
167
name: mcore_distributed_optim
0 commit comments