[https://nvbugs/5688388][fix] fix: Reducing num request in disagg test to speed up (#9598)

pcastonguay · web-flow · commit 3991aa9c7214 · 2025-12-02T12:48:53.000-05:00
Signed-off-by: Patrice Castonguay &lt;55748270+pcastonguay@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py
@@ -351,8 +351,8 @@ def test_disaggregated_llama_context_capacity(model, enable_cuda_graph,
             max_tokens = 25
 
             requests = []
-            # Send 256 requests to make sure the context worker is saturated
-            for _ in range(256):
+            # Send 32 requests to make sure the context worker is saturated
+            for _ in range(32):
                 requests.append(
                     (prompt, SamplingParams(max_tokens=1, ignore_eos=True),
                      DisaggregatedParams(request_type="context_only")))