pure Python LlmResponse

QiJune · QiJune · commit f0bb7c8234e8 · 2025-06-12T19:04:38.000+08:00
Signed-off-by: QI JUN &lt;22017000+QiJune@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/llm_request.py b/tensorrt_llm/_torch/pyexecutor/llm_request.py
@@ -221,16 +221,16 @@ class LlmResponse:
 
     def __init__(self,
                  request_id: int,
-                 error: str = None,
+                 error_msg: str = None,
                  result: LlmResult = None,
                  client_id: int = None):
         self.request_id = request_id
-        self.error = error
+        self.error_msg = error_msg
         self.result = result
         self.client_id = client_id
 
     def has_error(self):
-        return self.error is not None
+        return self.error_msg is not None
 
 
 class LlmRequest(tensorrt_llm.bindings.internal.batch_manager.LlmRequest):
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -1936,7 +1936,7 @@ def _handle_errors(self, error_msg: Optional[str] = None):
             self._terminate_request(request)
             error_responses[req_id] = LlmResponse(
                 request_id=req_id,
-                error=error_msg,
+                error_msg=error_msg,
                 client_id=request.py_client_id)
         self.active_requests.clear()
         self._enqueue_responses(error_responses)
diff --git a/tensorrt_llm/executor/proxy.py b/tensorrt_llm/executor/proxy.py
@@ -24,8 +24,7 @@
 from .request import CancellingRequest, GenerationRequest
 from .result import GenerationResult, IterationResult
 from .utils import (ErrorResponse, IntraProcessQueue, WorkerCommIpcAddrs,
-                    create_mpi_comm_session, get_spawn_proxy_process_env,
-                    is_llm_response)
+                    create_mpi_comm_session, get_spawn_proxy_process_env)
 from .worker import GenerationExecutorWorker, worker_main
 
 __all__ = [
@@ -172,8 +171,7 @@ def process_res(res):
                 event_loop = event_loop or queue.loop
             else:
                 queue.put(res)
-
-            if (is_llm_response(res) and res.result.is_final) or isinstance(
+            if (hasattr(res, "result") and res.result.is_final) or isinstance(
                     res, ErrorResponse):
                 self._results.pop(client_id)
 
diff --git a/tensorrt_llm/executor/utils.py b/tensorrt_llm/executor/utils.py
@@ -8,7 +8,6 @@
 from strenum import StrEnum
 
 from tensorrt_llm._utils import mpi_rank
-from tensorrt_llm.bindings.executor import Response
 from tensorrt_llm.llmapi.utils import print_colored_debug
 
 from ..llmapi.mpi_session import (MpiCommSession, MpiPoolSession, MpiSession,
@@ -141,11 +140,3 @@ class WorkerCommIpcAddrs(NamedTuple):
     result_queue_addr: tuple[str, Optional[bytes]]
     stats_queue_addr: tuple[str, Optional[bytes]]
     kv_cache_events_queue_addr: tuple[str, Optional[bytes]]
-
-
-def is_llm_response(instance):
-    from tensorrt_llm._torch.pyexecutor.llm_request import \
-        LlmResponse as PyLlmResponse
-
-    from .result import ResponseWrapper
-    return isinstance(instance, (Response, PyLlmResponse, ResponseWrapper))