-
Notifications
You must be signed in to change notification settings - Fork 876
Description
背景:
用llama factory对qwen2.5-vl-7B进行了lora微调,并将adapter合并导出。打算量化qwen2.5-vl-7B-finetuning
(单卡5090)
问题:
swift指令如下:
CUDA_VISIBLE_DEVICES=0 swift export
--model ~/LLaMA-Factory/merged_model/qwen25vl_7b_finetuning/
--quant_bits 4
--quant_method awq
--output_dir ~/ms-swift/awq_model/
--dataset ~/ms-swift/dataset
--model_type qwen2_5_vl
报错:
[INFO:swift] Successfully registered ~/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/dataset/data/dataset_info.json
.
[INFO:swift] rank: -1, local_rank: -1, world_size: 1, local_world_size: 1
[INFO:swift] Loading the model using model_dir: /llama-factory/LLaMA-Factory/merged_model/qwen25vl_7b_lab_16//llama-factory/LLaMA-Factory/merged_model/qwen25vl_7b_lab_16/', model_type='qwen2_5_vl', model_revision=None, task_type='causal_lm', torch_dtype=torch.float16, attn_impl=None, new_special_tokens=[], num_labels=None, problem_type=None, rope_scaling=None, device_map=None, max_memory={}, max_model_len=None, local_repo_path=None, init_strategy=None, template='qwen2_5_vl', system=None, max_length=2048, truncation_strategy='delete', max_pixels=None, agent_template=None, norm_bbox=None, use_chat_template=True, padding_free=False, padding_side='right', loss_scale='default', sequence_parallel_size=1, response_prefix=None, template_backend='swift', dataset=['
[WARNING:swift] Please install the package: pip install "qwen_vl_utils>=0.0.6" "decord" -U
.
[INFO:swift] Setting args.lazy_tokenize: True
[INFO:swift] args.output_dir: ~/ms-swift/awq_model
[INFO:swift] Global seed set to 42
[INFO:swift] args: ExportArguments(model='/ms-swift/dataset'], val_dataset=[], split_dataset_ratio=0.0, data_seed=42, dataset_num_proc=1, load_from_cache_file=True, dataset_shuffle=True, val_dataset_shuffle=False, streaming=False, interleave_prob=None, stopping_strategy='first_exhausted', shuffle_buffer_size=1000, download_mode='reuse_dataset_if_exists', columns={}, strict=False, remove_unused_columns=True, model_name=None, model_author=None, custom_dataset_info=[], quant_method='awq', quant_bits=4, hqq_axis=None, bnb_4bit_compute_dtype=torch.float32, bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True, bnb_4bit_quant_storage=None, max_new_tokens=None, temperature=None, top_k=None, top_p=None, repetition_penalty=None, num_beams=1, stream=False, stop_words=[], logprobs=False, top_logprobs=None, ckpt_dir=None, lora_modules=[], tuner_backend='peft', train_type='lora', adapters=[], external_plugins=[], seed=42, model_kwargs={}, load_args=True, load_data_args=False, packing=False, packing_length=None, lazy_tokenize=True, cached_dataset=[], custom_register_path=[], use_hf=False, hub_token=None, ddp_timeout=18000000, ddp_backend=None, ignore_args_error=False, use_swift_lora=False, merge_lora=False, safe_serialization=True, max_shard_size='5GB', output_dir='/ms-swift/awq_model', quant_n_samples=256, quant_batch_size=1, group_size=128, to_cached_dataset=False, to_ollama=False, to_mcore=False, to_hf=False, mcore_model=None, mcore_adapters=[], thread_count=None, test_convert_precision=False, test_convert_dtype=torch.float32, push_to_hub=False, hub_model_id=None, hub_private_repo=False, commit_message='update files', to_peft_format=False, exist_ok=False)
[INFO:swift] Start time of running main: 2025-09-18 20:24:31.082479
[INFO:swift] swift.version: 3.8.1
~/ms-swift1/.venv/lib/python3.10/site-packages/awq/init.py:21: DeprecationWarning:
I have left this message as the final dev message to help you transition.
Important Notice:
- AutoAWQ is officially deprecated and will no longer be maintained.
- The last tested configuration used Torch 2.6.0 and Transformers 4.51.3.
- If future versions of Transformers break AutoAWQ compatibility, please report the issue to the Transformers project.
Alternative:
- AutoAWQ has been adopted by the vLLM Project: https://github.com/vllm-project/llm-compressor
For further inquiries, feel free to reach out:
-
LinkedIn: https://www.linkedin.com/in/casper-hansen-804005170/
warnings.warn(_FINAL_DEV_MESSAGE, category=DeprecationWarning, stacklevel=1)
[INFO:swift] Loading the model using model_dir:/llama-factory/LLaMA-Factory/merged_model/qwen25vl_7b_lab_16//ms-swift1/.venv/lib/python3.10/site-packages/swift/cli/export.py", line 5, in
[WARNING:swift] Please install the package:pip install "qwen_vl_utils>=0.0.6" "decord" -U
.
Usinguse_fast=True
buttorchvision
is not available. Falling back to the slow image processor.
[INFO:swift] model_kwargs: {'device_map': 'cuda:0'}
Traceback (most recent call last):
File "
export_main()
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/export/export.py", line 53, in export_main/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/base.py", line 49, in main
return SwiftExport(args).main()
File "
result = self.run()
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/export/export.py", line 30, in run/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/export/quant.py", line 255, in quantize_model
quantize_model(args)
File "
QuantEngine(args).quantize()
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/export/quant.py", line 25, in init/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/infer/utils.py", line 145, in prepare_model_template
self.model, self.template = prepare_model_template(args, **kwargs)
File "
model, processor = args.get_model_processor(**kwargs)
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/argument/base_args/base_args.py", line 317, in get_model_processor/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/register.py", line 660, in get_model_tokenizer
return get_model_tokenizer(**kwargs)
File "
model, processor = get_function(model_dir, model_info, model_kwargs, load_model, **kwargs)
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/model/qwen.py", line 763, in get_model_tokenizer_qwen2_5_vl/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/model/qwen.py", line 689, in get_model_tokenizer_qwen2_vl
return get_model_tokenizer_qwen2_vl(*args, **kwargs)
File "
model, tokenizer = get_model_tokenizer_multimodal(*args, **kwargs)
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/register.py", line 346, in get_model_tokenizer_multimodal/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/register.py", line 339, in get_model_tokenizer_with_flash_attn
model, _ = get_model_tokenizer_with_flash_attn(model_dir, *args, **kwargs)
File "
return get_model_tokenizer_from_local(model_dir, model_info, model_kwargs, load_model, **kwargs)
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/register.py", line 293, in get_model_tokenizer_from_local/ms-swift1/.venv/lib/python3.10/site-packages/awq/models/auto.py", line 83, in from_pretrained
model = automodel_class.from_pretrained(
File "
return AWQ_CAUSAL_LM_MODEL_MAP[model_type].from_pretrained(
File "/ms-swift1/.venv/lib/python3.10/site-packages/awq/models/base.py", line 389, in from_pretrained/ms-swift1/.venv/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
model = target_cls.from_pretrained(
File "
return model_class.from_pretrained(
File "/ms-swift1/.venv/lib/python3.10/site-packages/swift/llm/model/patcher.py", line 302, in _new_from_pretrained/ms-swift1/.venv/lib/python3.10/site-packages/transformers/modeling_utils.py", line 279, in _wrapper
model = from_pretrained(cls, *args, **kwargs)
File "
return func(*args, **kwargs)
File "/ms-swift1/.venv/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4342, in from_pretrained/ms-swift1/.venv/lib/python3.10/site-packages/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py", line 1481, in init
model = cls(config, *model_args, **model_kwargs)
File "
super().init(config)
File "/ms-swift1/.venv/lib/python3.10/site-packages/transformers/modeling_utils.py", line 1884, in init/ms-swift1/.venv/lib/python3.10/site-packages/transformers/generation/configuration_utils.py", line 1290, in from_model_config
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
File "
decoder_config_dict = decoder_config.to_dict()
AttributeError: 'dict' object has no attribute 'to_dict'