Skip to content

Commit 09f8d9e

Browse files
authored
Merge branch 'main' into trtllm-bench/prep_ds_entrypoint
2 parents 5fe028c + b6bced8 commit 09f8d9e

File tree

844 files changed

+11640
-4015
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

844 files changed

+11640
-4015
lines changed

cpp/include/tensorrt_llm/executor/executor.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,16 +1465,19 @@ class CacheTransceiverConfig
14651465
NIXL = 3
14661466
};
14671467
explicit CacheTransceiverConfig(std::optional<BackendType> backendType = std::nullopt,
1468-
std::optional<size_t> maxNumTokens = std::nullopt, std::optional<int> kvTransferTimeoutMs = std::nullopt);
1468+
std::optional<size_t> maxNumTokens = std::nullopt, std::optional<int> kvTransferTimeoutMs = std::nullopt,
1469+
std::optional<int> kvTransferSenderFutureTimeoutMs = std::nullopt);
14691470

14701471
bool operator==(CacheTransceiverConfig const& other) const;
14711472
void setBackendType(std::optional<BackendType> backendType);
14721473
void setMaxTokensInBuffer(std::optional<size_t> maxTokensInBuffer);
14731474
void setKvTransferTimeoutMs(std::optional<int> kvTransferTimeoutMs);
1475+
void setKvTransferSenderFutureTimeoutMs(std::optional<int> kvTransferSenderFutureTimeoutMs);
14741476

1475-
[[nodiscard]] std::optional<int> getKvTransferTimeoutMs() const;
14761477
[[nodiscard]] std::optional<size_t> getMaxTokensInBuffer() const;
14771478
[[nodiscard]] std::optional<BackendType> getBackendType() const;
1479+
[[nodiscard]] std::optional<int> getKvTransferTimeoutMs() const;
1480+
[[nodiscard]] std::optional<int> getKvTransferSenderFutureTimeoutMs() const;
14781481

14791482
private:
14801483
std::optional<BackendType> mBackendType;
@@ -1483,6 +1486,9 @@ class CacheTransceiverConfig
14831486
/// transfer may be degraded.
14841487
std::optional<size_t> mMaxTokensInBuffer;
14851488
std::optional<int> mKvTransferTimeoutMs;
1489+
// @brief Timeout in milliseconds to wait for the sender future to be ready when scheduled batch size is 0. This
1490+
// allows the request to be eventually cancelled by the user or because of kv_transfer_timeout_ms
1491+
std::optional<int> mKvTransferSenderFutureTimeoutMs;
14861492
};
14871493

14881494
/// @brief Configuration class for the model executor
Binary file not shown.

cpp/kernels/fmha_v2/conftest.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2-
# SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
1+
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
33
#
4-
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5-
# property and proprietary rights in and to this material, related
6-
# documentation and any modifications thereto. Any use, reproduction,
7-
# disclosure or distribution of this material and related documentation
8-
# without an express license agreement from NVIDIA CORPORATION or
9-
# its affiliates is strictly prohibited.
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
1015

1116
import subprocess
1217

cpp/kernels/fmha_v2/setup.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2-
# SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
1+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
33
#
4-
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5-
# property and proprietary rights in and to this material, related
6-
# documentation and any modifications thereto. Any use, reproduction,
7-
# disclosure or distribution of this material and related documentation
8-
# without an express license agreement from NVIDIA CORPORATION or
9-
# its affiliates is strictly prohibited.
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
1015

1116
import os
1217
import subprocess

cpp/kernels/fmha_v2/src/convert.cu

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
2+
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
44
*
5-
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6-
* property and proprietary rights in and to this material, related
7-
* documentation and any modifications thereto. Any use, reproduction,
8-
* disclosure or distribution of this material and related documentation
9-
* without an express license agreement from NVIDIA CORPORATION or
10-
* its affiliates is strictly prohibited.
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
1116
*/
1217

1318
#include <fmha/numeric_types.h>

cpp/kernels/fmha_v2/src/fmha/alibi_params.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
2+
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
44
*
5-
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6-
* property and proprietary rights in and to this material, related
7-
* documentation and any modifications thereto. Any use, reproduction,
8-
* disclosure or distribution of this material and related documentation
9-
* without an express license agreement from NVIDIA CORPORATION or
10-
* its affiliates is strictly prohibited.
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
1116
*/
1217

1318
#pragma once

cpp/kernels/fmha_v2/src/fmha/fragment.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
2+
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
44
*
5-
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6-
* property and proprietary rights in and to this material, related
7-
* documentation and any modifications thereto. Any use, reproduction,
8-
* disclosure or distribution of this material and related documentation
9-
* without an express license agreement from NVIDIA CORPORATION or
10-
* its affiliates is strictly prohibited.
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
1116
*/
1217

1318
#pragma once

cpp/kernels/fmha_v2/src/fmha/gemm.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
2+
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
44
*
5-
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6-
* property and proprietary rights in and to this material, related
7-
* documentation and any modifications thereto. Any use, reproduction,
8-
* disclosure or distribution of this material and related documentation
9-
* without an express license agreement from NVIDIA CORPORATION or
10-
* its affiliates is strictly prohibited.
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
1116
*/
1217

1318
#pragma once

cpp/kernels/fmha_v2/src/fmha/gmem_tile_o.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
2+
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
44
*
5-
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6-
* property and proprietary rights in and to this material, related
7-
* documentation and any modifications thereto. Any use, reproduction,
8-
* disclosure or distribution of this material and related documentation
9-
* without an express license agreement from NVIDIA CORPORATION or
10-
* its affiliates is strictly prohibited.
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
1116
*/
1217

1318
#pragma once

cpp/kernels/fmha_v2/src/fmha/gmem_tile_o_packed.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
2+
* SPDX-FileCopyrightText: Copyright (c) 2011-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
44
*
5-
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6-
* property and proprietary rights in and to this material, related
7-
* documentation and any modifications thereto. Any use, reproduction,
8-
* disclosure or distribution of this material and related documentation
9-
* without an express license agreement from NVIDIA CORPORATION or
10-
* its affiliates is strictly prohibited.
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
1116
*/
1217

1318
#pragma once

0 commit comments

Comments
 (0)