Skip to content

Commit b564a83

Browse files
Fix cache miss for InternVL
--------- Signed-off-by: Jianhong Zhang <[email protected]>
1 parent c17c7b0 commit b564a83

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

vllm/model_executor/models/internvl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,7 @@ def extract_feature(self, pixel_values: torch.Tensor) -> torch.Tensor:
14911491
end_idx = start_idx + i
14921492
batch_sliced_pixel_values = \
14931493
pixel_values[start_idx:end_idx, ...]
1494+
batch_sliced_pixel_values = batch_sliced_pixel_values.contiguous().clone()
14941495
if is_lazy:
14951496
vit_embeds_minibatch = \
14961497
self.vision_model(

vllm/worker/hpu_model_runner.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3960,6 +3960,13 @@ def try_revert_dummy_output_tokens():
39603960
with self.profiler.record_event('internal',
39613961
model_event_name,
39623962
args=profiler_args):
3963+
# Clone inputs_embeds early to prevent cache miss failure
3964+
if 'InternVLChatModel' in str(type(self.model.model)):
3965+
if "inputs_embeds" in execute_model_kwargs:
3966+
orig = execute_model_kwargs["inputs_embeds"]
3967+
execute_model_kwargs["inputs_embeds"] = orig.contiguous().clone()
3968+
if sampling_metadata.selected_token_indices is not None:
3969+
sampling_metadata.selected_token_indices = sampling_metadata.selected_token_indices.clone()
39633970
hidden_states = self.model.forward(
39643971
**execute_model_kwargs,
39653972
selected_token_indices=sampling_metadata.

0 commit comments

Comments
 (0)