Skip to content
Open
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions tensorrt_llm/_torch/pyexecutor/py_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,9 +2236,14 @@ def _handle_canceled_requests(self):
# Remove cancel request in the waiting queue
self.executor_request_queue.update_waiting_queue()

# Create set from list of canceled request ids to speed up canceled test
canceled_req_ids = set(
self.executor_request_queue.get_canceled_req_ids())

still_pending_canceled_ids = []
for request in self.active_requests:
req_id = request.py_request_id if not request.is_child else request.parent_request_id
if req_id not in self.executor_request_queue.get_canceled_req_ids():
if req_id not in canceled_req_ids:
continue

is_cancelled = self._try_cancel_request(request)
Expand All @@ -2247,13 +2252,15 @@ def _handle_canceled_requests(self):
# to clean up the KV cache resources.
request.finish_by_reason(FinishReason.CANCELLED)
request.decoding_iter = request.py_decoding_iter
self.executor_request_queue.canceled_req_ids.remove(req_id)
else:
still_pending_canceled_ids.append(req_id)

if self.enable_attention_dp:
# TODO: revisit the cancel logic of attention dp
# When enable attention dp, each rank does not have full copy of requests
# so we need to remove the cancel requests not in the local rank
self.executor_request_queue.clear_canceled_req_ids()
# Clear list of canceled request ids
self.executor_request_queue.canceled_req_ids.clear()
if not self.enable_attention_dp:
# Add back requests that are still pending cancellation.
self.executor_request_queue.canceled_req_ids.extend(
still_pending_canceled_ids)

@nvtx_range("_enqueue_responses")
def _enqueue_responses(self, responses: Iterable[Tuple[int, LlmResponse]]):
Expand Down