We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b99f8c8 commit c5f20d5Copy full SHA for c5f20d5
vllm_flash_attn/flash_attn_interface.py
@@ -226,6 +226,8 @@ def flash_attn_varlen_func(
226
"FA2 does not support scheduler_metadata, q_descale, "
227
"k_descale, v_descale"
228
)
229
+ if s_aux is not None:
230
+ raise NotImplementedError("FA2 does not support s_aux")
231
if num_splits > 1:
232
raise NotImplementedError("FA2 does not support num_splits > 1")
233
out, softmax_lse = torch.ops._vllm_fa2_C.varlen_fwd(
@@ -250,7 +252,6 @@ def flash_attn_varlen_func(
250
252
softcap,
251
253
return_softmax_lse and dropout_p > 0,
254
None,
- s_aux=s_aux,
255
256
elif fa_version == 3:
257
assert alibi_slopes is None, "Alibi is not supported in FA3"
0 commit comments