@@ -550,6 +550,7 @@ class BlockManager {
550
550
* @return A vector of blocks (one for each layer) occupied by this sequence for this layer.
551
551
*/
552
552
const std::vector<KVCacheBlock::Ptr>& get_block_table (uint64_t seq_id, size_t layer_idx) {
553
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
553
554
OPENVINO_ASSERT (m_block_table.count (seq_id) == 1 );
554
555
return m_block_table[seq_id][layer_idx];
555
556
}
@@ -570,6 +571,7 @@ class BlockManager {
570
571
* @return Number of blocks freed in each sequence in the group.
571
572
*/
572
573
const size_t free_group_partially (SequenceGroup::Ptr sequence_group, size_t num_required_blocks) {
574
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
573
575
size_t blocks_num = std::ceil (num_required_blocks / sequence_group->get_not_finished_sequences ().size ());
574
576
auto not_finished_sequences = sequence_group->get_not_finished_sequences ();
575
577
for (size_t idx = 0 ; idx < not_finished_sequences.size (); ++idx) {
@@ -613,6 +615,7 @@ class BlockManager {
613
615
}
614
616
615
617
const size_t free_partially_beam_search_group (SequenceGroup::Ptr sequence_group, size_t num_required_blocks) {
618
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
616
619
size_t physical_blocks_released = 0 ;
617
620
size_t logical_blocks_released = 0 ;
618
621
while (num_required_blocks > physical_blocks_released) {
@@ -632,6 +635,7 @@ class BlockManager {
632
635
* @return The number of distinct physical blocks occupied by this sequence group.
633
636
*/
634
637
const size_t get_number_of_blocks_occupied_by_sequence (SequenceGroup::Ptr sequence_group) {
638
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
635
639
auto running_sequences = sequence_group->get_not_finished_sequences ();
636
640
std::set<size_t > indices;
637
641
for (size_t idx = 0 ; idx < running_sequences.size (); ++idx) {
@@ -652,6 +656,7 @@ class BlockManager {
652
656
* @return Whether or not this BlockManager is managing this sequence group.
653
657
*/
654
658
const bool has_block_table (uint64_t seq_id) {
659
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
655
660
return m_block_table.count (seq_id) > 0 ;
656
661
}
657
662
@@ -766,6 +771,7 @@ class BlockManager {
766
771
* other sequences tracked by this BlockManager.
767
772
*/
768
773
void fork_sequence (uint64_t parent_id, uint64_t child_id) {
774
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
769
775
OPENVINO_ASSERT (m_block_table.count (child_id) == 0 );
770
776
m_block_table[child_id].resize (m_num_layers);
771
777
for (size_t layer_idx = 0 ; layer_idx < m_num_layers; layer_idx++) {
@@ -782,6 +788,7 @@ class BlockManager {
782
788
* @param seq_id Identifier of the sequence to free.
783
789
*/
784
790
void free_sequence (size_t seq_id) {
791
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
785
792
OPENVINO_ASSERT (m_block_table.find (seq_id) != m_block_table.end (), " sequence with id " , seq_id,
786
793
" not found in BlockManager, but requested to free" );
787
794
auto & block_table = m_block_table[seq_id];
@@ -846,6 +853,7 @@ class BlockManager {
846
853
* @param logical_block_index_sets_to_free Sets (one for each layer) of logical block indices to be freed from this sequence.
847
854
*/
848
855
void free_blocks_from_sequence (size_t seq_id, const std::vector<std::set<size_t >>& logical_block_index_sets_to_free) {
856
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
849
857
std::vector<std::vector<size_t >> logical_block_indices_to_free (logical_block_index_sets_to_free.size ());
850
858
for (size_t i = 0 ; i < logical_block_index_sets_to_free.size (); i++) {
851
859
const auto & index_set = logical_block_index_sets_to_free[i];
@@ -916,6 +924,7 @@ class BlockManager {
916
924
* allocated ones.
917
925
*/
918
926
size_t required_blocks_count (SequenceGroup::CPtr seq_group) {
927
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
919
928
std::vector<Sequence::CPtr> running_sequences = seq_group->get_running_sequences ();
920
929
size_t blocks_count = 0 ; // total number of needed blocks for sequence group
921
930
std::set<size_t > last_block_ids; // unique last block indices
@@ -973,6 +982,7 @@ class BlockManager {
973
982
* @param seq_group Pointer to a sequence group.
974
983
*/
975
984
void free_empty_physical_blocks (SequenceGroup::Ptr seq_group) {
985
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
976
986
size_t num_logical_blocks = seq_group->get_num_logical_blocks ();
977
987
if (num_logical_blocks == 0 ) {
978
988
return ;
@@ -997,6 +1007,7 @@ class BlockManager {
997
1007
* indices into which the source block contents should be copied into separately.
998
1008
*/
999
1009
std::map<size_t , std::list<size_t >> append_slots (SequenceGroup::Ptr seq_group) {
1010
+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
1000
1011
// Will always allocate the identical number of new blocks (if any) to each of the "layers" to keep the
1001
1012
// number of blocks occupied by each "layer" identical at all times.
1002
1013
size_t num_logical_blocks = seq_group->get_num_logical_blocks ();
0 commit comments