22from .base import INPUT_MODELS
33from .internlm2 import InternLM2Reader
44from .llama import LlamaModel , LlamaReader
5- from .qwen import Qwen3MoeReader
5+ from .qwen import Qwen3MoeReader , Qwen3Reader
66
77
88class InternVLReader (LlamaReader ):
@@ -35,7 +35,22 @@ def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_
3535
3636
3737class InternS1Reader (Qwen3MoeReader ):
38- """InternVL3Reader for InternVL+Qwen3MoE model."""
38+ """InternS1Reader for internlm/InternS1 model."""
39+
40+ attn_layer_prefix = 'model.language_model.layers'
41+ attn_layer_patten = r'model\.language_model\.layers\.([0-9]+).'
42+ tok_embeddings_key = 'model.language_model.embed_tokens.weight'
43+ norm_weight_key = 'model.language_model.norm.weight'
44+ output_weight_key = 'lm_head.weight'
45+
46+ def __init__ (self , new_params : dict , unused_params : dict , last_bin : bool , model_cfg : dict , ** kwargs ):
47+ model_cfg = model_cfg .get ('text_config' )
48+ if model_cfg is None :
49+ raise ValueError (f'Miss "text_config" in model config: { model_cfg } ' )
50+ super ().__init__ (new_params , unused_params , last_bin , model_cfg , ** kwargs )
51+
52+
53+ class InternS1MiniReader (Qwen3Reader ):
3954
4055 attn_layer_prefix = 'model.language_model.layers'
4156 attn_layer_patten = r'model\.language_model\.layers\.([0-9]+).'
@@ -60,12 +75,11 @@ def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
6075 config = AutoConfig .from_pretrained (model_path , trust_remote_code = True )
6176 self .llm_config = getattr (config , 'llm_config' , None ) or getattr (config , 'text_config' , None )
6277 arch = self .llm_config .architectures [0 ]
63- relations = dict (
64- InternLM2ForCausalLM = ('internlm2' , InternVL2Reader ),
65- LlamaForCausalLM = ('llama' , InternVLReader ),
66- Qwen2ForCausalLM = ('qwen2' , InternVLReader ),
67- Qwen3MoeForCausalLM = ('qwen3-moe' , InternS1Reader ),
68- )
78+ relations = dict (InternLM2ForCausalLM = ('internlm2' , InternVL2Reader ),
79+ LlamaForCausalLM = ('llama' , InternVLReader ),
80+ Qwen2ForCausalLM = ('qwen2' , InternVLReader ),
81+ Qwen3MoeForCausalLM = ('qwen3-moe' , InternS1Reader ),
82+ Qwen3ForCausalLM = ('qwen3' , InternS1MiniReader ))
6983 llm_model , self .Reader = relations [arch ]
7084 self .llm_model = INPUT_MODELS .get (llm_model )(model_path = model_path , tokenizer_path = tokenizer_path , ** kwargs )
7185
0 commit comments