diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev index 7de9a2c48b6..aebd7dacf17 100644 --- a/docker/Dockerfile.ci.dev +++ b/docker/Dockerfile.ci.dev @@ -59,9 +59,9 @@ RUN bash -ex <<"EOF" ln -s libnvshmem_host.so.3 libnvshmem_host.so popd - git clone --branch tongliu/inter_node https://github.com/Autumn1998/DeepEP.git + git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git pushd DeepEP - git checkout 0fa8568c5923fcfc87f49ef0c3761dc013375a67 + git checkout 1dddd194c26911c35b4f53a148617dd73de0ffc9 patch -p1 < /workspace/deepep.patch popd TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/. diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py index feee48d7757..cf11238724b 100644 --- a/megatron/core/transformer/transformer_config.py +++ b/megatron/core/transformer/transformer_config.py @@ -859,6 +859,8 @@ def __post_init__(self): if self.moe_enable_deepep: if self.moe_token_dispatcher_type != "flex": raise ValueError("DeepEP backend is only supported with flex token dispatcher.") + if self.moe_flex_dispatcher_backend == "hybridep": + raise ValueError("Only one backend is supported for flex token dispatcher.") self.moe_flex_dispatcher_backend = "deepep" warnings.warn( "moe_enable_deepep is deprecated." @@ -873,8 +875,6 @@ def __post_init__(self): "Flex token dispatcher with deepep backend does not support " "moe_pad_expert_input_to_capacity" ) - if self.moe_enable_deepep or self.moe_flex_dispatcher_backend == "hybrid_ep": - raise ValueError("Only one type of backend is supported for flex token dispatcher.") if self.moe_shared_expert_intermediate_size is not None: if self.moe_shared_expert_intermediate_size <= 0: