Skip to content
Merged
4 changes: 2 additions & 2 deletions lmdeploy/pytorch/backends/cuda/graph_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch
from torch.profiler import record_function

from lmdeploy.pytorch.backends.deepep_moe_checker import moe_backend
from lmdeploy.pytorch.backends.deepep_moe_checker import get_moe_backend
from lmdeploy.pytorch.backends.selector import get_backend
from lmdeploy.pytorch.config import BackendConfig, CacheConfig, ModelConfig
from lmdeploy.pytorch.model_inputs import StepContext, get_step_ctx_manager
Expand Down Expand Up @@ -252,7 +252,7 @@ def prepare_inputs_for_generation(
):
"""Prepare inputs."""

if moe_backend.use_deepep_moe_backend():
if get_moe_backend().use_deepep_moe_backend():
from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode
deepep_mode = DeepEPMode.LOW_LATENCY if context.is_decoding else DeepEPMode.NORMAL
DeepEPBuffer.set_deepep_mode(deepep_mode)
Expand Down
Loading