Skip to content

Commit 52515c3

Browse files
standard error message
1 parent dd5dc7a commit 52515c3

28 files changed

+69
-58
lines changed

script.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import torch
2+
3+
4+
class DummyModule(torch.nn.Module):
5+
def forward(self, x):
6+
return x * 2
7+
8+
9+
if __name__ == "__main__":
10+
model = DummyModule()
11+
input_tensor = torch.tensor([-1.0, -2.0, -3.0])
12+
output = model(input_tensor)

src/transformers/models/deepseek_vl/modeling_deepseek_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def get_placeholder_mask(
178178
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
179179
torch_check(
180180
inputs_embeds[special_image_mask].numel() == image_features.numel(),
181-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}",
181+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {n_image_features}",
182182
)
183183
return special_image_mask
184184

src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def get_placeholder_mask(
295295
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
296296
torch_check(
297297
inputs_embeds[special_image_mask].numel() == image_features.numel(),
298-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}",
298+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {n_image_features}",
299299
)
300300
return special_image_mask
301301

src/transformers/models/gemma3n/modeling_gemma3n.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,15 +2169,15 @@ def get_placeholder_mask(
21692169
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
21702170
torch_check(
21712171
image_features is None or inputs_embeds[special_image_mask].numel() == image_features.numel(),
2172-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.shape[0] * image_features.shape[1]}",
2172+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {image_features.shape[0] * image_features.shape[1]}",
21732173
)
21742174

21752175
n_audio_tokens = special_audio_mask.sum()
21762176
special_audio_mask = special_audio_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
2177-
if audio_features is not None and inputs_embeds[special_audio_mask].numel() != audio_features.numel():
2178-
raise ValueError(
2179-
f"Audio features and image tokens do not match: tokens: {n_audio_tokens}, features {audio_features.shape[0] * audio_features.shape[1]}"
2180-
)
2177+
torch_check(
2178+
audio_features is None or inputs_embeds[special_audio_mask].numel() == audio_features.numel(),
2179+
lambda: f"Audio features and audio tokens do not match, tokens: {n_audio_tokens}, features: {audio_features.shape[0] * audio_features.shape[1]}",
2180+
)
21812181

21822182
return special_image_mask, special_audio_mask
21832183

src/transformers/models/gemma3n/modular_gemma3n.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2241,15 +2241,15 @@ def get_placeholder_mask(
22412241
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
22422242
torch_check(
22432243
image_features is None or inputs_embeds[special_image_mask].numel() == image_features.numel(),
2244-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.shape[0] * image_features.shape[1]}",
2244+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {image_features.shape[0] * image_features.shape[1]}",
22452245
)
22462246

22472247
n_audio_tokens = special_audio_mask.sum()
22482248
special_audio_mask = special_audio_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
2249-
if audio_features is not None and inputs_embeds[special_audio_mask].numel() != audio_features.numel():
2250-
raise ValueError(
2251-
f"Audio features and image tokens do not match: tokens: {n_audio_tokens}, features {audio_features.shape[0] * audio_features.shape[1]}"
2252-
)
2249+
torch_check(
2250+
audio_features is None or inputs_embeds[special_audio_mask].numel() == audio_features.numel(),
2251+
lambda: f"Audio features and audio tokens do not match, tokens: {n_audio_tokens}, features: {audio_features.shape[0] * audio_features.shape[1]}",
2252+
)
22532253

22542254
return special_image_mask, special_audio_mask
22552255

src/transformers/models/glm46v/modeling_glm46v.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,14 +359,14 @@ def get_placeholder_mask(
359359
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
360360
torch_check(
361361
image_features is None or inputs_embeds[special_image_mask].numel() == image_features.numel(),
362-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.shape[0]}",
362+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {image_features.shape[0]}",
363363
)
364364

365365
n_video_tokens = special_video_mask.sum()
366366
special_video_mask = special_video_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
367367
torch_check(
368368
video_features is None or inputs_embeds[special_video_mask].numel() == video_features.numel(),
369-
lambda: f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {video_features.shape[0]}",
369+
lambda: f"Video features and video tokens do not match, tokens: {n_video_tokens}, features: {video_features.shape[0]}",
370370
)
371371
return special_image_mask, special_video_mask
372372

src/transformers/models/glm4v/modeling_glm4v.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,14 +1207,14 @@ def get_placeholder_mask(
12071207
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
12081208
torch_check(
12091209
image_features is None or inputs_embeds[special_image_mask].numel() == image_features.numel(),
1210-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.shape[0]}",
1210+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {image_features.shape[0]}",
12111211
)
12121212

12131213
n_video_tokens = special_video_mask.sum()
12141214
special_video_mask = special_video_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
12151215
torch_check(
12161216
video_features is None or inputs_embeds[special_video_mask].numel() == video_features.numel(),
1217-
lambda: f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {video_features.shape[0]}",
1217+
lambda: f"Video features and video tokens do not match, tokens: {n_video_tokens}, features: {video_features.shape[0]}",
12181218
)
12191219
return special_image_mask, special_video_mask
12201220

src/transformers/models/glm4v/modular_glm4v.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,14 +1205,14 @@ def get_placeholder_mask(
12051205
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
12061206
torch_check(
12071207
image_features is None or inputs_embeds[special_image_mask].numel() == image_features.numel(),
1208-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.shape[0]}",
1208+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {image_features.shape[0]}",
12091209
)
12101210

12111211
n_video_tokens = special_video_mask.sum()
12121212
special_video_mask = special_video_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
12131213
torch_check(
12141214
video_features is None or inputs_embeds[special_video_mask].numel() == video_features.numel(),
1215-
lambda: f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {video_features.shape[0]}",
1215+
lambda: f"Video features and video tokens do not match, tokens: {n_video_tokens}, features: {video_features.shape[0]}",
12161216
)
12171217
return special_image_mask, special_video_mask
12181218

src/transformers/models/glm4v_moe/modeling_glm4v_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,14 +1374,14 @@ def get_placeholder_mask(
13741374
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
13751375
torch_check(
13761376
image_features is None or inputs_embeds[special_image_mask].numel() == image_features.numel(),
1377-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.shape[0]}",
1377+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {image_features.shape[0]}",
13781378
)
13791379

13801380
n_video_tokens = special_video_mask.sum()
13811381
special_video_mask = special_video_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
13821382
torch_check(
13831383
video_features is None or inputs_embeds[special_video_mask].numel() == video_features.numel(),
1384-
lambda: f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {video_features.shape[0]}",
1384+
lambda: f"Video features and video tokens do not match, tokens: {n_video_tokens}, features: {video_features.shape[0]}",
13851385
)
13861386
return special_image_mask, special_video_mask
13871387

src/transformers/models/janus/modeling_janus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,7 @@ def get_placeholder_mask(
11141114
special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
11151115
torch_check(
11161116
inputs_embeds[special_image_mask].numel() == image_features.numel(),
1117-
lambda: f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}",
1117+
lambda: f"Image features and image tokens do not match, tokens: {n_image_tokens}, features: {n_image_features}",
11181118
)
11191119
return special_image_mask
11201120

0 commit comments

Comments
 (0)