diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000000..f108bd7550a --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-18 - Avoid List Comprehensions inside `sum()` and `all()` +**Learning:** Found several places where list comprehensions were evaluated fully in memory before being passed to `sum()` or `all()`. For `all()`, this completely defeats the short-circuiting behavior. +**Action:** Replaced `sum([...])` and `all([...])` with generator expressions `sum(...)` and `all(...)` to reduce memory allocations and enable short-circuiting. diff --git a/fastdeploy/cache_manager/cache_messager.py b/fastdeploy/cache_manager/cache_messager.py index b934c3e74c7..30582662d63 100644 --- a/fastdeploy/cache_manager/cache_messager.py +++ b/fastdeploy/cache_manager/cache_messager.py @@ -1004,7 +1004,7 @@ def main(): gpu_cache_kvs[f"value_cache_scales_{i}_rank{rank}_device{device}"], f"value_cache_scales_{i}_rank{rank}.device{device}", ) - cache_kv_size_byte = sum([tmp.numel() * 1 for key, tmp in gpu_cache_kvs.items()]) + cache_kv_size_byte = sum(tmp.numel() * 1 for key, tmp in gpu_cache_kvs.items()) logger.info(f"device :{device}") logger.info(f"cache_kv_size_byte : {cache_kv_size_byte}") logger.info(f"done init cache (full) gmem alloc : {memory_allocated}") diff --git a/fastdeploy/engine/resource_manager.py b/fastdeploy/engine/resource_manager.py index 609c88533bd..ba6ef21d51c 100644 --- a/fastdeploy/engine/resource_manager.py +++ b/fastdeploy/engine/resource_manager.py @@ -311,7 +311,7 @@ def allocate_resources_for_new_tasks(self, tasks): break # record batch size here - num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list]) + num_blocks_used_by_tasks = sum(len(task.block_tables) if task else 0 for task in self.tasks_list) main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks) main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch()) main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc()) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index ae0e0c798b3..f6317d8c2a9 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -946,7 +946,7 @@ def _allocate_decode_and_extend(): len(self.running) + len(self.to_be_rescheduled_request_id_set) + len(self.to_be_aborted_req_id_set) - + sum([req.status == RequestStatus.PREEMPTED for req in self.waiting]) + + sum(req.status == RequestStatus.PREEMPTED for req in self.waiting) >= self.max_num_seqs ): break @@ -1564,7 +1564,7 @@ def clear_data(self): def update_metrics(self, verbose=False): # Update metrics - num_tasks = sum([1 if task else 0 for task in self.tasks_list]) + num_tasks = sum(1 if task else 0 for task in self.tasks_list) blocks_used_by_tasks = set() for task in self.tasks_list: if task is not None: diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py index 6c5e78b68e9..998b9784085 100644 --- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py +++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py @@ -257,7 +257,7 @@ def forward( assert batch_size == 1 start = 0 - assert sum([np.prod(x) for x in flatten_image_grid_thw]) == embeddings.shape[1], ( + assert sum(np.prod(x) for x in flatten_image_grid_thw) == embeddings.shape[1], ( flatten_image_grid_thw, embeddings.shape, ) @@ -466,7 +466,7 @@ def forward( if use_rope is True: flatten_image_grid_thw = self.flatten_list(image_grid_thw) flatten_image_grid_thw = np.array(flatten_image_grid_thw) - assert sum([np.prod(x) for x in flatten_image_grid_thw]) == hidden_states.shape[1], ( + assert sum(np.prod(x) for x in flatten_image_grid_thw) == hidden_states.shape[1], ( flatten_image_grid_thw, hidden_states.shape, ) @@ -513,7 +513,7 @@ def forward( if use_window_attn: flatten_image_grid_thw = self.flatten_list(image_grid_thw) assert ( - sum([np.prod(x.astype("float32").cpu().numpy()) for x in flatten_image_grid_thw]) + sum(np.prod(x.astype("float32").cpu().numpy()) for x in flatten_image_grid_thw) == hidden_states.shape[1] ), (flatten_image_grid_thw, hidden_states.shape) diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index f1678e32411..95cffeff933 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -601,7 +601,7 @@ def event_loop_normal(self) -> None: # Let the ep group run control method synchronically if envs.FD_ENABLE_V1_UPDATE_WEIGHTS and self.parallel_config.use_ep: pendings = all_gather_values(len(self.cached_control_reqs), self.parallel_config.ep_group) - if all([p > 0 for p in pendings]): + if all(p > 0 for p in pendings): logger.info(f"Rank: {self.local_rank} Detected all ep ranks have pending control tasks.") self.run_control_method(self.cached_control_reqs.pop(0))