fix fa2 wrapper

LucasWilkinson · LucasWilkinson · commit c5f20d54d3ad · 2025-08-06T14:31:19.000Z
Signed-off-by: Lucas Wilkinson &lt;lwilkins@redhat.com&gt;
diff --git a/vllm_flash_attn/flash_attn_interface.py b/vllm_flash_attn/flash_attn_interface.py
@@ -226,6 +226,8 @@ def flash_attn_varlen_func(
                     "FA2 does not support scheduler_metadata, q_descale, "
                     "k_descale, v_descale"
                 )
+        if s_aux is not None:
+            raise NotImplementedError("FA2 does not support s_aux")
         if num_splits > 1:
             raise NotImplementedError("FA2 does not support num_splits > 1")
         out, softmax_lse = torch.ops._vllm_fa2_C.varlen_fwd(
@@ -250,7 +252,6 @@ def flash_attn_varlen_func(
             softcap,
             return_softmax_lse and dropout_p > 0,
             None,
-            s_aux=s_aux,
         )
     elif fa_version == 3:
         assert alibi_slopes is None, "Alibi is not supported in FA3"