diff --git a/examples/ascend/train/qwen3_lora_megatron/dense_npu.sh b/examples/ascend/train/qwen3_lora_megatron/dense_npu.sh new file mode 100644 index 0000000000..897495c15e --- /dev/null +++ b/examples/ascend/train/qwen3_lora_megatron/dense_npu.sh @@ -0,0 +1,38 @@ +NPROC_PER_NODE=2 \ +ASCEND_RT_VISIBLE_DEVICES=0,1 \ +megatron sft \ + --model Qwen/Qwen2.5-7B-Instruct \ + --load_safetensors true \ + --save_safetensors true \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \ + 'AI-ModelScope/alpaca-gpt4-data-en#500' \ + 'swift/self-cognition#500' \ + --train_type lora \ + --lora_rank 8 \ + --lora_alpha 32 \ + --target_modules all-linear \ + --tensor_model_parallel_size 2 \ + --sequence_parallel true \ + --micro_batch_size 1 \ + --global_batch_size 2 \ + --recompute_granularity full \ + --recompute_method uniform \ + --recompute_num_layers 1 \ + --finetune true \ + --cross_entropy_loss_fusion true \ + --lr 1e-4 \ + --lr_warmup_fraction 0.05 \ + --min_lr 1e-5 \ + --max_epochs 1 \ + --save megatron_output/Qwen2.5-7B-Instruct \ + --save_interval 100 \ + --max_length 2048 \ + --system 'You are a helpful assistant.' \ + --num_workers 4 \ + --no_save_optim true \ + --no_save_rng true \ + --dataset_num_proc 4 \ + --no_gradient_accumulation_fusion true \ + --no_masked_softmax_fusion true \ + --model_author swift \ + --model_name swift-robot