Skip to content

Commit 26529ef

Browse files
authored
Merge branch 'main' into move-oci
2 parents d6cafa7 + f95edb5 commit 26529ef

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

cpp/tensorrt_llm/batch_manager/cacheFormatter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,10 +761,11 @@ void CacheFormatter::unformat(tensorrt_llm::batch_manager::TransferSession& sess
761761
{
762762
cacheBufferId = mCacheTransBufferManager->assignBufferIndexForRecv();
763763
}
764-
TLLM_CHECK(cacheBufferId.has_value());
765764
auto [recvSplitCachestmp, bufferCoverTargetNumtmp, onlyUseDynamicBuffer]
766765
= mCacheTransBufferManager->getOrAllocateRecvBuffers(
767766
cacheBufferId, static_cast<int>(targetNum), bufferEleSizes, bufferManager);
767+
TLLM_CHECK(cacheBufferId.has_value() || onlyUseDynamicBuffer);
768+
768769
bufferCoverTargetNum = bufferCoverTargetNumtmp;
769770
remainNoCoverTargetNum = targetNum > bufferCoverTargetNum ? targetNum - bufferCoverTargetNum : 0;
770771

cpp/tensorrt_llm/batch_manager/cacheTransBuffer.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -394,10 +394,11 @@ std::tuple<std::vector<runtime::ITensor::SharedPtr>, size_t, bool> CacheTransBuf
394394
{
395395
TLLM_LOG_WARNING(
396396
"CacheTransceiver getOrAllocateBuffers: bufferCoverTargetNum:%d < targetNum:%d, may use dynamic "
397-
"buffer, "
398-
"it's better to increase MaxTokensInBuffer in cacheTransceiverConfig, otherwise, the performance may "
399-
"be degraded, requestedNumberOfElements.size():%ld, mNumberOfElements:%ld, "
400-
"requestedNumberOfElements[0]:%ld",
397+
"buffer which will fail with NIXL backend. It is recommended to set "
398+
"cacheTransceiverConfig.MaxTokensInBuffer (cache_transceiver_config.max_tokens_in_buffer in config "
399+
"YAML file) to a value greater than the maximum ISL of the processed requests. Otherwise, performance "
400+
"may be degraded or transfer may fail. requestedNumberOfElements.size():%ld, "
401+
"mNumberOfElements:%ld, requestedNumberOfElements[0]:%ld",
401402
bufferCoverTargetNum, targetNum, requestedNumberOfElements.size(), mNumberOfElements,
402403
requestedNumberOfElements[0]);
403404
}

0 commit comments

Comments
 (0)