训练配置:
--target_modules all-linear
--agent_template glm5_1
--template "glm5_2"
--model_type glm_moe_dsa
环境配置:
megatron-core 0.17.1
ms_swift 4.4.0.dev0
mcore_bridge 1.5.1
transformers 5.2.0
ms_swift 4.3.1
log信息:
[rank7]: Traceback (most recent call last):
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/cli/_megatron/sft.py", line 7, in
[rank7]: megatron_sft_main()
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/pipelines/train/sft.py", line 97, in megatron_sft_main
[rank7]: return MegatronSft(args).main()
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/pipelines/base.py", line 52, in main
[rank7]: result = self.run()
[rank7]: ^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/pipelines/train/sft.py", line 70, in run
[rank7]: trainer = self.prepare_trainer()
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/pipelines/train/sft.py", line 37, in prepare_trainer
[rank7]: return MegatronTrainer(self.args, self.template)
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/trainers/base.py", line 69, in init
[rank7]: self.prepare_model()
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/trainers/base.py", line 190, in prepare_model
[rank7]: self.peft_models = self._prepare_peft_model(self.unwrapped_models)
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/trainers/base.py", line 196, in _prepare_peft_model
[rank7]: self.bridge.load_weights(models, args.model_dir)
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1927, in load_weights
[rank7]: list(self._convert([mg_model], state_dict, hf_prefix, True, 'Loading: '))
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1807, in _convert
[rank7]: res = self._set_layer_state(mg_layer, hf_state_dict, f'{self.hf_layers_prefix}.', layer_idx, to_mcore)
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1684, in _set_layer_state
[rank7]: hf_state_dict.update(self._set_layer_attn(mg_layer, hf_state_dict, layer_idx, to_mcore))
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1626, in _set_layer_attn
[rank7]: self._set_mla_attn_state(mg_attn, hf_state_dict, f'{self.hf_attn_prefix}.', layer_idx, to_mcore))
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1615, in _set_mla_attn_state
[rank7]: hf_state_dict.update(self._set_indexer(indexer, hf_state_dict, 'indexer.', to_mcore))
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1300, in _set_indexer
[rank7]: self._set_state_dict(mg_indexer, 'linear_wq_b.weight', hf_state_dict, 'wq_b.weight', to_mcore)
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 506, in _set_state_dict
[rank7]: hf_weight = hf_state_dict[hf_key].load()
[rank7]: ~~~~~~~~~~~~~^^^^^^^^
[rank7]: KeyError: 'wq_b.weight'
Pull Request / Pull Request 信息
No response
训练配置:
--target_modules all-linear
--agent_template glm5_1
--template "glm5_2"
--model_type glm_moe_dsa
环境配置:
megatron-core 0.17.1
ms_swift 4.4.0.dev0
mcore_bridge 1.5.1
transformers 5.2.0
ms_swift 4.3.1
log信息:
[rank7]: Traceback (most recent call last):
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/cli/_megatron/sft.py", line 7, in
[rank7]: megatron_sft_main()
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/pipelines/train/sft.py", line 97, in megatron_sft_main
[rank7]: return MegatronSft(args).main()
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/pipelines/base.py", line 52, in main
[rank7]: result = self.run()
[rank7]: ^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/pipelines/train/sft.py", line 70, in run
[rank7]: trainer = self.prepare_trainer()
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/pipelines/train/sft.py", line 37, in prepare_trainer
[rank7]: return MegatronTrainer(self.args, self.template)
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/trainers/base.py", line 69, in init
[rank7]: self.prepare_model()
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/trainers/base.py", line 190, in prepare_model
[rank7]: self.peft_models = self._prepare_peft_model(self.unwrapped_models)
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/swift/megatron/trainers/base.py", line 196, in _prepare_peft_model
[rank7]: self.bridge.load_weights(models, args.model_dir)
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1927, in load_weights
[rank7]: list(self._convert([mg_model], state_dict, hf_prefix, True, 'Loading: '))
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1807, in _convert
[rank7]: res = self._set_layer_state(mg_layer, hf_state_dict, f'{self.hf_layers_prefix}.', layer_idx, to_mcore)
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1684, in _set_layer_state
[rank7]: hf_state_dict.update(self._set_layer_attn(mg_layer, hf_state_dict, layer_idx, to_mcore))
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1626, in _set_layer_attn
[rank7]: self._set_mla_attn_state(mg_attn, hf_state_dict, f'{self.hf_attn_prefix}.', layer_idx, to_mcore))
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1615, in _set_mla_attn_state
[rank7]: hf_state_dict.update(self._set_indexer(indexer, hf_state_dict, 'indexer.', to_mcore))
[rank7]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 1300, in _set_indexer
[rank7]: self._set_state_dict(mg_indexer, 'linear_wq_b.weight', hf_state_dict, 'wq_b.weight', to_mcore)
[rank7]: File "/usr/local/python3.12/lib/python3.12/site-packages/mcore_bridge/bridge/gpt_bridge.py", line 506, in _set_state_dict
[rank7]: hf_weight = hf_state_dict[hf_key].load()
[rank7]: ~~~~~~~~~~~~~^^^^^^^^
[rank7]: KeyError: 'wq_b.weight'
Pull Request / Pull Request 信息
No response