File tree Expand file tree Collapse file tree 3 files changed +49
-1
lines changed
tensorrt_llm/_torch/auto_deploy Expand file tree Collapse file tree 3 files changed +49
-1
lines changed Original file line number Diff line number Diff line change 1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+
16+
117"""The model factory interface used by auto-deploy to build custom models."""
218
319import copy
Original file line number Diff line number Diff line change 1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+
16+
117"""Graph transformation to automatically add kv cache into fused MHA op."""
218
319import operator
@@ -257,7 +273,7 @@ def _apply_to_full_model(
257273 current_num_pages = cm .info .num_pages
258274 self ._log_info (
259275 f"Current cache size (MB): { current_cache_size // 1024 ** 2 } , "
260- f"Current num pages (MB) : { current_num_pages } "
276+ f"Current num pages: { current_num_pages } "
261277 )
262278 if current_kv_cache_size != current_cache_size :
263279 self ._log_info (
Original file line number Diff line number Diff line change 1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+
16+
117import gc
218from contextlib import contextmanager
319from typing import Tuple
You can’t perform that action at this time.
0 commit comments