fix(benchmarks): correct sdpa_backend inconsistency and attn_implementation for continuous batching (#42339)

engmohamedsalah · web-flow · commit 00ab75e65c05 · 2025-11-24T12:12:33.000+01:00
This commit fixes two bugs in BenchmarkConfig reported in issue #42211: 1. **sdpa_backend inconsistency (line 105)**: The warning message states "sdpa_backend must be None" but the code was setting it to "math". Changed to None to match the warning message. This allows PyTorch to auto-select the appropriate SDPA backend rather than forcing one globally, which is correct for continuous batching with custom attention masks. 2. **Invalid attn_implementation (line 243)**: Changed from "paged|sdpa" to "sdpa". Using "paged|sdpa" directly bypassed the validation logic at lines 91-105 since it only checks for exactly "sdpa". The "paged|" prefix is automatically added by init_continuous_batching() in continuous_api.py, so the config should use plain "sdpa" for consistency with other configs. Both bugs were introduced in commit 069684e (PR #41916). Fixes #42211
diff --git a/benchmark_v2/framework/benchmark_config.py b/benchmark_v2/framework/benchmark_config.py
@@ -102,7 +102,7 @@ def check_validity(self, skip_validity_check: bool = False) -> None:
                 logger.warning(
                     "when continuous batching is enabled, sdpa_backend must be None because of the attention mask, setting it to None"
                 )
-                self.sdpa_backend = "math"
+                self.sdpa_backend = None
 
     @property
     def hash(self) -> str:
@@ -240,5 +240,5 @@ def get_config_by_level(level: int) -> list[BenchmarkConfig]:
         configs.append(BenchmarkConfig(attn_implementation="sdpa", compile_mode="default"))
         configs.append(BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", kernelize=True))
         configs.append(BenchmarkConfig(attn_implementation="flash_attention_2", kernelize=True))
-        configs.append(BenchmarkConfig(attn_implementation="paged|sdpa", continuous_batching=True))
+        configs.append(BenchmarkConfig(attn_implementation="sdpa", continuous_batching=True))
     return configs