Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions examples/ascend/train/qwen3_lora_megatron/dense_npu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
NPROC_PER_NODE=2 \
ASCEND_RT_VISIBLE_DEVICES=0,1 \
megatron sft \
--model Qwen/Qwen2.5-7B-Instruct \
--load_safetensors true \
--save_safetensors true \
--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
'AI-ModelScope/alpaca-gpt4-data-en#500' \
'swift/self-cognition#500' \
--train_type lora \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--tensor_model_parallel_size 2 \
--sequence_parallel true \
--micro_batch_size 1 \
--global_batch_size 2 \
--recompute_granularity full \
--recompute_method uniform \
--recompute_num_layers 1 \
--finetune true \
--cross_entropy_loss_fusion true \
--lr 1e-4 \
--lr_warmup_fraction 0.05 \
--min_lr 1e-5 \
--max_epochs 1 \
--save megatron_output/Qwen2.5-7B-Instruct \
--save_interval 100 \
--max_length 2048 \
--system 'You are a helpful assistant.' \
--num_workers 4 \
--no_save_optim true \
--no_save_rng true \
--dataset_num_proc 4 \
--no_gradient_accumulation_fusion true \
--no_masked_softmax_fusion true \
--model_author swift \
--model_name swift-robot