66from transformers .processing_utils import ImagesKwargs , ProcessingKwargs
77
88from lmdeploy .utils import get_logger
9- from lmdeploy .vl .model .base import VISION_MODELS , VisonModel
9+ from lmdeploy .vl .model .internvl import VISION_MODELS , InternVLVisionModel
1010from lmdeploy .vl .model .utils import disable_logging
1111
1212logger = get_logger ('lmdeploy' )
@@ -32,7 +32,7 @@ class InternVLProcessorKwargs(ProcessingKwargs, total=False):
3232
3333
3434@VISION_MODELS .register_module ()
35- class InternVL3VisionModel (VisonModel ):
35+ class InternVL3VisionModel (InternVLVisionModel ):
3636 """Internvl3 vision model."""
3737
3838 _arch = ['InternVLForConditionalGeneration' , 'InternS1ForConditionalGeneration' ]
@@ -44,11 +44,12 @@ def __init__(self,
4444 hf_config : AutoConfig = None ,
4545 backend : str = '' ):
4646 super ().__init__ (model_path , with_llm , max_memory , hf_config , backend )
47- self .arch = hf_config .architectures [0 ]
47+ self .arch = self . hf_config .architectures [0 ]
4848
4949 def build_preprocessor (self ):
5050 self .processor = AutoProcessor .from_pretrained (self .model_path , trust_remote_code = True )
5151 tokenizer = self .processor .tokenizer
52+ self .image_token = self .processor .image_token
5253 self .image_token_id = tokenizer .context_image_token_id
5354 self .image_tokens_per_patch = self .processor .image_seq_length
5455 self .tokenizer_init_kwargs = tokenizer .init_kwargs
@@ -145,69 +146,3 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
145146 outputs .extend ([x .reshape (- 1 , x .shape [- 1 ]) for x in feats ])
146147 messages .append (dict (role = 'forward' , content = outputs ))
147148 return messages
148-
149- @staticmethod
150- def proc_messages (
151- messages ,
152- chat_template ,
153- sequence_start ,
154- tools : Optional [List [object ]] = None ,
155- enable_thinking : Optional [bool ] = None ,
156- ):
157- """Apply chat template to get the prompt."""
158- prompt_messages = []
159- IMAGE_TOKEN = '<IMAGE_TOKEN>'
160- for message in messages :
161- if isinstance (message ['content' ], str ):
162- prompt_messages .append (message )
163- continue
164- elif message ['role' ] in ['preprocess' , 'forward' ]:
165- continue
166- n_images = len ([1 for x in message ['content' ] if x ['type' ] == 'image' ])
167- content = [x .get ('text' , '' ) for x in message ['content' ] if x ['type' ] == 'text' ]
168- prompt = content [0 ]
169- if IMAGE_TOKEN in prompt and f'<img>{ IMAGE_TOKEN } ' not in prompt :
170- prompt = prompt .replace (f'{ IMAGE_TOKEN } ' , f'<img>{ IMAGE_TOKEN } </img>' )
171- prompt = prompt .replace ('</img><img>' , '' )
172- prompt = prompt .replace ('<img><img>' , '<img>' )
173- prompt = prompt .replace ('</img></img>' , '</img>' )
174- elif IMAGE_TOKEN not in prompt :
175- prompt = f'<img>{ IMAGE_TOKEN * n_images } </img>\n ' + prompt
176- else :
177- pass
178- prompt_messages .append (dict (role = 'user' , content = prompt ))
179- prompt = chat_template .messages2prompt (prompt_messages ,
180- sequence_start ,
181- tools = tools ,
182- enable_thinking = enable_thinking )
183- return prompt , IMAGE_TOKEN
184-
185- def to_pytorch (self ,
186- messages ,
187- chat_template ,
188- tokenizer ,
189- sequence_start ,
190- tools : Optional [List [object ]] = None ,
191- enable_thinking : Optional [bool ] = None ,
192- ** kwargs ):
193- prompt , IMAGE_TOKEN = self .proc_messages (messages ,
194- chat_template ,
195- sequence_start ,
196- tools = tools ,
197- enable_thinking = enable_thinking )
198- return self .to_pytorch_aux (messages , prompt , IMAGE_TOKEN , tokenizer , sequence_start )
199-
200- def to_turbomind (self ,
201- messages ,
202- chat_template ,
203- tokenizer ,
204- sequence_start ,
205- tools : Optional [List [object ]] = None ,
206- enable_thinking : Optional [bool ] = None ,
207- ** kwargs ):
208- prompt , IMAGE_TOKEN = self .proc_messages (messages ,
209- chat_template ,
210- sequence_start ,
211- tools = tools ,
212- enable_thinking = enable_thinking )
213- return self .to_turbomind_aux (messages , prompt , IMAGE_TOKEN , tokenizer , sequence_start )
0 commit comments