3434#include < chrono>
3535#include < cmath>
3636#include < cstddef>
37+ #include < fcntl.h>
3738#include < filesystem>
3839#include < memory>
3940#include < set>
4041#include < thread>
42+ #include < unistd.h>
4143#include < variant>
4244
4345using namespace tensorrt_llm ::batch_manager;
@@ -212,7 +214,10 @@ void writePatternToOffloadedBlocksGDS(
212214 {
213215 buffer[i] = i & mask;
214216 }
215- ::write (fd, buffer.data(), poolBlockSize * sizeof(T));
217+ auto const bytesToWrite = static_cast <size_t >(poolBlockSize) * sizeof (T);
218+ auto const written = ::write (fd, buffer.data (), bytesToWrite);
219+ EXPECT_EQ (written, static_cast <ssize_t >(bytesToWrite))
220+ << " Failed to write pattern to offloaded block file " << filename;
216221 ::close (fd);
217222 }
218223 }
@@ -3575,7 +3580,7 @@ TEST_F(KVCacheManagerTest, KVCacheManagerMaxAttentionWindowWithReuseTest)
35753580 auto numAllocatedPrimaryBlocks = blockManager.getNumAllocatedBlocks () - blocksInSecondaryPool;
35763581 EXPECT_THAT (seq0.getCacheBlockIds (onlyWindowSize).at (beamIdx), ::testing::ElementsAreArray ({0 , 1 , 2 , 3 , 4 }));
35773582
3578- EXPECT_NO_THROW (kvCacheManager.removeSequence (requestId, llmRequest));
3583+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (requestId, llmRequest) ));
35793584 numAllocatedPrimaryBlocks = blockManager.getNumAllocatedBlocks () - blocksInSecondaryPool;
35803585 EXPECT_EQ (numAllocatedPrimaryBlocks, 0 );
35813586 // store blocks 0, 1, 2, 3, 4 for reuse ([1000,1001,1002,1003], [1004,1005,1006,1007], [1008,1009,1010,1011],
@@ -3601,7 +3606,7 @@ TEST_F(KVCacheManagerTest, KVCacheManagerMaxAttentionWindowWithReuseTest)
36013606 kvCacheManager.addToken (requestId);
36023607 numTokens = llmRequest->getNumTokens (beamIdx);
36033608 EXPECT_THAT (seq1.getCacheBlockIds (onlyWindowSize).at (beamIdx), ::testing::ElementsAreArray ({0 , 5 , 6 }));
3604- EXPECT_NO_THROW (kvCacheManager.removeSequence (requestId, llmRequest));
3609+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (requestId, llmRequest) ));
36053610
36063611 // /////////////////////////////////////////////////////////////////////////
36073612 // add a medium request and then remove it
@@ -3615,7 +3620,7 @@ TEST_F(KVCacheManagerTest, KVCacheManagerMaxAttentionWindowWithReuseTest)
36153620 GenerationRequest const & seq2 = kvCacheManager.getSequence (requestId);
36163621 EXPECT_EQ (llmRequest->getContextCurrentPosition (), 9 );
36173622 EXPECT_THAT (seq2.getCacheBlockIds (onlyWindowSize).at (beamIdx), ::testing::ElementsAreArray ({0 , 1 , 7 }));
3618- EXPECT_NO_THROW (kvCacheManager.removeSequence (requestId, llmRequest));
3623+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (requestId, llmRequest) ));
36193624
36203625 // /////////////////////////////////////////////////////////////////////////
36213626 // add a longer request within attention window and try to reuse
@@ -3637,7 +3642,7 @@ TEST_F(KVCacheManagerTest, KVCacheManagerMaxAttentionWindowWithReuseTest)
36373642 llmRequest->addNewToken (1016 , beamIdx);
36383643 kvCacheManager.addToken (requestId);
36393644 EXPECT_THAT (seq3.getCacheBlockIds (onlyWindowSize).at (beamIdx), ::testing::ElementsAreArray ({0 , 1 , 2 , 8 , 9 }));
3640- EXPECT_NO_THROW (kvCacheManager.removeSequence (requestId, llmRequest));
3645+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (requestId, llmRequest) ));
36413646}
36423647
36433648TEST_F (KVCacheManagerTest, KVCacheManagerSWAInvalidateReuseTest)
@@ -3715,8 +3720,8 @@ TEST_F(KVCacheManagerTest, KVCacheManagerSWAInvalidateReuseTest)
37153720 EXPECT_FALSE (blockManager.isSequenceValidForStoreForReuse (seq0.getRequestId (), onlyWindowSize));
37163721 EXPECT_TRUE (blockManager.isSequenceValidForStoreForReuse (seq1.getRequestId (), onlyWindowSize));
37173722
3718- EXPECT_NO_THROW (kvCacheManager.removeSequence (seq0.getRequestId (), llmRequest0));
3719- EXPECT_NO_THROW (kvCacheManager.removeSequence (seq1.getRequestId (), llmRequest1));
3723+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (seq0.getRequestId (), llmRequest0) ));
3724+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (seq1.getRequestId (), llmRequest1) ));
37203725}
37213726
37223727TEST_F (KVCacheManagerTest, KVCacheManagerVariableWindowAttentionWithReuseTest)
@@ -3806,7 +3811,7 @@ TEST_F(KVCacheManagerTest, KVCacheManagerVariableWindowAttentionWithReuseTest)
38063811 assertBlocks (seq0, {0 , 1 , 2 }, {0 , 1 , 2 });
38073812 auto numAllocatedPrimaryBlocks = blockManager.getNumAllocatedBlocks () - blocksInSecondaryPoolPerWindow * numWindows;
38083813
3809- EXPECT_NO_THROW (kvCacheManager.removeSequence (requestId, llmRequest));
3814+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (requestId, llmRequest) ));
38103815 numAllocatedPrimaryBlocks = blockManager.getNumAllocatedBlocks () - blocksInSecondaryPoolPerWindow * numWindows;
38113816 EXPECT_EQ (numAllocatedPrimaryBlocks, 0 );
38123817 // For both windows, store blocks 0, 1, 2 for reuse ([1000,1001,1002,1003], [1004,1005,1006,1007],
@@ -3832,7 +3837,7 @@ TEST_F(KVCacheManagerTest, KVCacheManagerVariableWindowAttentionWithReuseTest)
38323837 llmRequest->addNewToken (1009 , beamIdx);
38333838 kvCacheManager.addToken (requestId);
38343839 assertBlocks (seq1, {0 , 3 , 4 }, {0 , 3 , 4 });
3835- EXPECT_NO_THROW (kvCacheManager.removeSequence (requestId, llmRequest));
3840+ EXPECT_NO_THROW (static_cast < void >( kvCacheManager.removeSequence (requestId, llmRequest) ));
38363841}
38373842
38383843TEST_F (KVCacheManagerTest, KVCacheManagerEventStreamOverflow)
0 commit comments