support pd split

WANDY666 · WANDY666 · commit 38d42262c103 · 2025-11-04T07:40:15.000Z
diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py
@@ -541,7 +541,6 @@ def make_argument_parser() -> argparse.ArgumentParser:
         "--disable_dp_prompt_cache_fetch",
         action="store_true",
         default=False,
-        help="""Disable prefix prompt cache fetch for data parallel inference.
-        Enabled by default, but currently not supported for pd separated mode""",
+        help="""Disable prefix prompt cache fetch for data parallel inference, enabled by default.""",
     )
     return parser
diff --git a/lightllm/server/api_start.py b/lightllm/server/api_start.py
@@ -270,13 +270,12 @@ def normal_or_p_d_start(args):
 
     send_and_receive_node_ip(args)  # 多机用于收发node ip
     # PD 分离模式下必须禁用 DP prompt cache fetch，且 dp 必须 > 1
-    if not args.disable_dp_prompt_cache_fetch:
-        if args.run_mode != "normal" or args.dp <= 1:
-            args.disable_dp_prompt_cache_fetch = True
-            logger.warning(
-                """PD split mode or dp <= 1 does not support dp_prompt_cache_fetch;
-                overriding disable_dp_prompt_cache_fetch to True"""
-            )
+    if not args.disable_dp_prompt_cache_fetch and args.dp <= 1:
+        args.disable_dp_prompt_cache_fetch = True
+        logger.warning(
+            """dp <= 1 does not support dp_prompt_cache_fetch;
+            overriding disable_dp_prompt_cache_fetch to True"""
+        )
 
     set_env_start_args(args)
     logger.info(f"all start args:{args}")
diff --git a/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl_for_dp.py b/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl_for_dp.py
@@ -17,6 +17,7 @@ def __init__(self, info_queue: mp.Queue, mem_queue: mp.Queue) -> None:
 
     def init_custom(self):
         ChunckedPrefillForPrefillNode.init_custom(self)
+        super().init_custom()
         return
 
     def _pre_handle_finished_reqs(self, finished_reqs):
diff --git a/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_impl_for_dp.py b/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_impl_for_dp.py
@@ -18,6 +18,7 @@ def __init__(self, info_queue: mp.Queue, mem_queue: mp.Queue) -> None:
 
     def init_custom(self):
         NIXLChunckedPrefillForPrefillNode.init_custom(self)
+        super().init_custom()
         return
 
     def _filter_not_ready_reqs(self, req_ids: List[int]) -> List[InferReq]:

Original file line number	Diff line number	Diff line change
`@@ -541,7 +541,6 @@ def make_argument_parser() -> argparse.ArgumentParser:`
`541`	`541`	`"--disable_dp_prompt_cache_fetch",`
`542`	`542`	`action="store_true",`
`543`	`543`	`default=False,`
`544`		`- help="""Disable prefix prompt cache fetch for data parallel inference.`
`545`		`- Enabled by default, but currently not supported for pd separated mode""",`
	`544`	`+ help="""Disable prefix prompt cache fetch for data parallel inference, enabled by default.""",`
`546`	`545`	`)`
`547`	`546`	`return parser`