Skip to content

Commit 5e72005

Browse files
danthe3rdxFormers Bot
authored andcommitted
Enable Paged-Attention in FA3 build
ghstack-source-id: 80565872ee56f53b8604d402e73dc9cc473e6c8d Pull-Request-resolved: fairinternal/xformers#1392 __original_commit__ = fairinternal/xformers@31dd6f1
1 parent a4d8f9a commit 5e72005

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def get_flash_attention3_extensions(cuda_version: int, extra_compile_args):
339339
# Enabling paged attention causes segfault with some
340340
# versions of nvcc :(
341341
# https://github.com/Dao-AILab/flash-attention/issues/1453
342-
("paged", "-DFLASHATTENTION_DISABLE_PAGEDKV"),
342+
# ("paged", "-DFLASHATTENTION_DISABLE_PAGEDKV"),
343343
# We have `CUDA_MINIMUM_COMPUTE_CAPABILITY` set to 9.0
344344
# ("_sm80.cu", "-DFLASHATTENTION_DISABLE_SM8x"),
345345
)

xformers/ops/fmha/flash3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def _flash_attention3_incompatible_reason() -> Optional[str]:
101101
return None
102102

103103

104-
FLASH3_HAS_PAGED_ATTENTION = False
104+
FLASH3_HAS_PAGED_ATTENTION = True
105105
FLASH3_HAS_FLOAT8 = False
106106
_C_flashattention3 = None
107107
if importlib.util.find_spec("...flash_attn_3._C", package=__package__):

0 commit comments

Comments
 (0)