Enable Paged-Attention in FA3 build

danthe3rd · xFormers Bot · commit 5e7200598def · 2025-06-25T05:58:10.000Z
ghstack-source-id: 80565872ee56f53b8604d402e73dc9cc473e6c8d Pull-Request-resolved: fairinternal/xformers#1392 __original_commit__ = fairinternal/xformers@31dd6f1
diff --git a/setup.py b/setup.py
@@ -339,7 +339,7 @@ def get_flash_attention3_extensions(cuda_version: int, extra_compile_args):
         # Enabling paged attention causes segfault with some
         # versions of nvcc :(
         # https://github.com/Dao-AILab/flash-attention/issues/1453
-        ("paged", "-DFLASHATTENTION_DISABLE_PAGEDKV"),
+        # ("paged", "-DFLASHATTENTION_DISABLE_PAGEDKV"),
         # We have `CUDA_MINIMUM_COMPUTE_CAPABILITY` set to 9.0
         # ("_sm80.cu", "-DFLASHATTENTION_DISABLE_SM8x"),
     )
diff --git a/xformers/ops/fmha/flash3.py b/xformers/ops/fmha/flash3.py
@@ -101,7 +101,7 @@ def _flash_attention3_incompatible_reason() -> Optional[str]:
     return None
 
 
-FLASH3_HAS_PAGED_ATTENTION = False
+FLASH3_HAS_PAGED_ATTENTION = True
 FLASH3_HAS_FLOAT8 = False
 _C_flashattention3 = None
 if importlib.util.find_spec("...flash_attn_3._C", package=__package__):

Original file line number	Diff line number	Diff line change
`@@ -339,7 +339,7 @@ def get_flash_attention3_extensions(cuda_version: int, extra_compile_args):`
`339`	`339`	`# Enabling paged attention causes segfault with some`
`340`	`340`	`# versions of nvcc :(`
`341`	`341`	`# https://github.com/Dao-AILab/flash-attention/issues/1453`
`342`		`- ("paged", "-DFLASHATTENTION_DISABLE_PAGEDKV"),`
	`342`	`+ # ("paged", "-DFLASHATTENTION_DISABLE_PAGEDKV"),`
`343`	`343`	# We have `CUDA_MINIMUM_COMPUTE_CAPABILITY` set to 9.0
`344`	`344`	`# ("_sm80.cu", "-DFLASHATTENTION_DISABLE_SM8x"),`
`345`	`345`	`)`