Skip to content

Commit 21c22f0

Browse files
authored
fix qwen3-30-a3b lcb-code score (#4142)
* calculate inv_freq on device * adapt for dlinfer attn * update code * fix dlinfer moe para err * update cann version * update code
1 parent 9dd57d3 commit 21c22f0

File tree

5 files changed

+11
-14
lines changed

5 files changed

+11
-14
lines changed

docker/Dockerfile_ascend_a2_300i

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
ARG ASCEND_DEVICE_TYPE=ascend_a2
55
ARG ASCEND_HUB=swr.cn-south-1.myhuaweicloud.com/ascendhub
66

7-
FROM ${ASCEND_HUB}/cann:8.3.rc1.alpha002-910b-ubuntu22.04-py3.11 AS ascend_a2_base
8-
FROM ${ASCEND_HUB}/cann:8.3.rc1.alpha002-310p-ubuntu22.04-py3.11 AS ascend_300i_base
7+
FROM ${ASCEND_HUB}/cann:8.3.rc1-910b-ubuntu22.04-py3.11 AS ascend_a2_base
8+
FROM ${ASCEND_HUB}/cann:8.3.rc1-310p-ubuntu22.04-py3.11 AS ascend_300i_base
99

1010
FROM ${ASCEND_DEVICE_TYPE}_base AS builder
1111
ENV DEBIAN_FRONTEND=noninteractive
@@ -23,6 +23,6 @@ ARG LMDEPLOY_TAG=main
2323
RUN --mount=type=cache,target=/root/.cache \
2424
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
2525
pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
26-
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0rc1 torchvision==0.23.0 && \
26+
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0 torchvision==0.23.0 && \
2727
TORCH_DEVICE_BACKEND_AUTOLOAD=0 DEVICE=ascend pip install git+https://github.com/DeepLink-org/dlinfer.git@${DLINFER_TAG} && \
2828
LMDEPLOY_TARGET_DEVICE=ascend pip install git+https://github.com/InternLM/lmdeploy.git@${LMDEPLOY_TAG}

docker/Dockerfile_ascend_a3

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
ARG ASCEND_DEVICE_TYPE=ascend_a3
55
ARG ASCEND_HUB=swr.cn-south-1.myhuaweicloud.com/ascendhub
66

7-
FROM ${ASCEND_HUB}/cann:8.3.rc1.alpha002-a3-openeuler24.03-py3.11 AS ascend_a3_base
7+
FROM ${ASCEND_HUB}/cann:8.3.rc1-a3-openeuler24.03-py3.11 AS ascend_a3_base
88

99
FROM ${ASCEND_DEVICE_TYPE}_base AS builder
1010
ENV DEBIAN_FRONTEND=noninteractive
@@ -22,6 +22,6 @@ ARG LMDEPLOY_TAG=main
2222
RUN --mount=type=cache,target=/root/.cache \
2323
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
2424
pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
25-
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0rc1 torchvision==0.23.0 && \
25+
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0 torchvision==0.23.0 && \
2626
TORCH_DEVICE_BACKEND_AUTOLOAD=0 DEVICE=ascend pip install git+https://github.com/DeepLink-org/dlinfer.git@${DLINFER_TAG} && \
2727
LMDEPLOY_TARGET_DEVICE=ascend pip install git+https://github.com/InternLM/lmdeploy.git@${LMDEPLOY_TAG}

lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def get_total_slots():
212212
elif is_unpaged_prefill:
213213
# prepare some params of unpaged_prefill attention stage.
214214
q_start_loc_cpu, kv_seqlens_cpu = None, None
215-
q_seqlens_cpu = step_context.q_seqlens.cpu()
215+
q_seqlens_cpu = step_context.q_seqlens.cpu().to(torch.int32)
216216
if SocVersion.is_Ascend910():
217217
single_attention_mask = torch.logical_not(
218218
torch.tril(
@@ -251,7 +251,7 @@ def get_total_slots():
251251
step_context.block_offsets = step_context.block_offsets\
252252
.repeat_interleave(step_context.q_seqlens, 0)
253253
dynamo.mark_dynamic(step_context.block_offsets, [0, 1])
254-
kv_seqlens = step_context.kv_seqlens.to(torch.int32)
254+
kv_seqlens = step_context.kv_seqlens.cpu().to(torch.int32)
255255
if not step_context.is_decoding:
256256
if is_unpaged_prefill:
257257
if SocVersion.is_Ascend910():
@@ -269,11 +269,9 @@ def get_total_slots():
269269
else:
270270
raise ValueError(f"dlinfer doesn't support {SocVersion.device_name()} device currently.")
271271
kv_seqlens = kv_seqlens.repeat_interleave(step_context.q_seqlens, 0)
272-
if not is_unpaged_prefill and AscendOpsBackend.enable_aclgraph():
273-
kv_seqlens = kv_seqlens.cpu().tolist()
274272
else:
275273
if step_context.is_decoding:
276-
kv_seqlens_cpu = step_context.kv_seqlens.cpu()
274+
kv_seqlens_cpu = step_context.kv_seqlens.cpu().to(torch.int32)
277275
elif is_unpaged_prefill:
278276
pass
279277
else:

lmdeploy/pytorch/backends/dlinfer/moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ class DlinferSoftmaxTopKBuilder(SoftmaxTopKBuilder):
2727
"""Dlinfer softmax topk implementation builder."""
2828

2929
@staticmethod
30-
def build(top_k: int, dim: int = -1):
30+
def build(top_k: int, dim: int = -1, n_groups: int = -1):
3131
"""build."""
32-
return DlinferSoftmaxTopKImpl(top_k, dim)
32+
return DlinferSoftmaxTopKImpl(top_k, dim, n_groups)
3333

3434

3535
class DlinferFusedMoEImpl(FusedMoEImpl):

lmdeploy/pytorch/backends/dlinfer/rotary_embedding.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ def __init__(self, dim: int, base: int = 10000, scaling_factor: float = 1.0):
4646
self.dim = dim
4747
self.base = base
4848
# yapf: disable
49-
inv_freq = 1.0 / (self.base
50-
** (torch.arange(0, self.dim, 2, dtype=torch.int64).float() / self.dim)).float().cuda()
49+
inv_freq = 1.0 / (self.base**(torch.arange(0, self.dim, 2, dtype=torch.float, device='cuda') / self.dim))
5150
# yapf: enable
5251
self.register_buffer('inv_freq', inv_freq, persistent=False)
5352

0 commit comments

Comments
 (0)