Skip to content

Commit 7c4777a

Browse files
authored
[TRTLLM-9286][feat] Integration of CuteDSL NVFP4 grouped GEMM (#8880)
Signed-off-by: Enwei Zhu <[email protected]>
1 parent c789000 commit 7c4777a

File tree

31 files changed

+4802
-109
lines changed

31 files changed

+4802
-109
lines changed

cpp/tensorrt_llm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ set(TRTLLM_LINK_LIBS
189189
fb_gemm_src
190190
gemm_swiglu_sm90_src
191191
cutlass_src
192+
cute_dsl_src
192193
layers_src
193194
runtime_src
194195
testing_src

cpp/tensorrt_llm/kernels/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ file(GLOB_RECURSE SRC_CU *.cu)
2222
# selectiveScan trtllmGenKernels folder
2323
list(FILTER SRC_CPP EXCLUDE REGEX "cutlass_kernels/.*")
2424
list(FILTER SRC_CU EXCLUDE REGEX "cutlass_kernels/.*")
25+
list(FILTER SRC_CPP EXCLUDE REGEX "cuteDslKernels/.*")
26+
list(FILTER SRC_CU EXCLUDE REGEX "cuteDslKernels/.*")
2527
list(FILTER SRC_CPP EXCLUDE REGEX "flashMLA/.*")
2628
list(FILTER SRC_CU EXCLUDE REGEX "flashMLA/.*")
2729
list(FILTER SRC_CPP EXCLUDE REGEX "contextFusedMultiHeadAttention/.*")
@@ -75,6 +77,7 @@ target_include_directories(
7577
add_cuda_architectures(kernels_src 89)
7678

7779
add_subdirectory(cutlass_kernels)
80+
add_subdirectory(cuteDslKernels)
7881
add_subdirectory(flashMLA)
7982
add_subdirectory(contextFusedMultiHeadAttention)
8083
add_subdirectory(decoderMaskedMultiheadAttention)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#
2+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
3+
# All rights reserved. SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
6+
# use this file except in compliance with the License. You may obtain a copy of
7+
# the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14+
# License for the specific language governing permissions and limitations under
15+
# the License.
16+
#
17+
18+
file(GLOB_RECURSE SRC_CPP *.cpp)
19+
file(GLOB_RECURSE SRC_CU *.cu)
20+
21+
add_library(cute_dsl_src OBJECT ${SRC_CPP} ${SRC_CU})
22+
set_property(TARGET cute_dsl_src PROPERTY POSITION_INDEPENDENT_CODE ON)
23+
set_property(TARGET cute_dsl_src PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)

0 commit comments

Comments
 (0)