OPTIONAL COMMIT: Debugging prints

tdene · tdene · commit c1726bcc0de0 · 2025-11-20T13:34:05.000-06:00
diff --git a/megatron/core/inference/data_parallel_inference_coordinator.py b/megatron/core/inference/data_parallel_inference_coordinator.py
@@ -148,7 +148,7 @@ def start(self):
                     )
                     continue
 
-                # print(f"New client connected: {sender_identity}")
+                logging.info(f"New client connected: {sender_identity}")
                 known_clients.add(sender_identity)
                 self.router_socket.send_multipart(
                     [sender_identity, msgpack.packb([Headers.ACK.value], use_bin_type=True)]
diff --git a/megatron/core/inference/engines/dynamic_engine.py b/megatron/core/inference/engines/dynamic_engine.py
@@ -18,6 +18,7 @@
 import torch
 from torch import Tensor
 from torch.cuda.nvtx import range_pop, range_push
+from megatron.core.utils import log_single_rank
 
 from megatron.core import parallel_state
 from megatron.core.inference.contexts.dynamic_context import (
@@ -488,6 +489,7 @@ async def start_listening_to_data_parallel_coordinator(
 
         # Finally run the engine infinite loop
         loop = get_asyncio_loop(loop)
+        logging.info(f"Creating engine loop task on loop {id(loop)} on rank {torch.distributed.get_rank()}")
         self.engine_loop_task = loop.create_task(
             self.run_engine_with_coordinator(loop=loop, verbose=verbose)
         )
@@ -1246,6 +1248,7 @@ def schedule_requests(self) -> int:
                 request_id, prompt, sampling_params = data[1:]
                 sampling_params = SamplingParams.deserialize(sampling_params)
                 self.add_request(request_id, prompt, sampling_params)
+                logging.info(f"Added request {request_id} on rank {torch.distributed.get_rank()}")
             elif header == Headers.PAUSE:
                 self.paused = True
             elif header == Headers.UNPAUSE:
@@ -1259,6 +1262,9 @@ def schedule_requests(self) -> int:
             else:
                 raise UnknownHeaderError(header)
 
+        if len(all_messages) > 0:
+            logging.info(f"Drained {len(all_messages)} messages from coordinator on rank {torch.distributed.get_rank()}")
+
         return len(all_messages)
 
     def stop(self):
@@ -1308,6 +1314,7 @@ async def run_engine_with_coordinator(
         """Continually steps the engine asynchronously."""
         self._loop = get_asyncio_loop(loop)
         try:
+            logging.info(f"Running engine with coordinator on rank {torch.distributed.get_rank()}")
             while True:
                 self.schedule_requests()
                 if self.stopped:
@@ -1327,6 +1334,7 @@ async def run_engine_with_coordinator(
                 # todo [Siddharth]: Can this hardcoded sleep be avoided
                 # with asyncio zmq sockets?
                 if self.paused:
+                    logging.info(f"Suspending engine on rank {torch.distributed.get_rank()}")
                     await asyncio.sleep(0.02)
                     continue
 
@@ -1344,11 +1352,14 @@ async def run_engine_with_coordinator(
                     self.context.get_active_request_count() == 0
                     and len(self.waiting_request_ids) == 0
                 ):
+                    logging.info(f"No requests to process on rank {torch.distributed.get_rank()}")
                     await asyncio.sleep(0.02)
                     continue
 
-                # Step.
-                engine_output = await self.async_step(verbose=verbose)
+                logging.info(f"Processing requests on rank {torch.distributed.get_rank()}")
+                logging.info(f"Active requests: {self.context.get_active_request_count()}")
+                logging.info(f"Waiting requests: {len(self.waiting_request_ids)}")
+                engine_output = await self.async_step(verbose=True)
 
                 # Send finished requests.
                 is_tp0_and_pp0 = (
diff --git a/megatron/core/inference/inference_client.py b/megatron/core/inference/inference_client.py
@@ -10,6 +10,8 @@
 from megatron.core.inference.sampling_params import SamplingParams
 from megatron.core.utils import get_asyncio_loop, trace_async_exceptions
 
+import torch.distributed as dist
+
 from .headers import Headers
 
 try:
@@ -99,6 +101,7 @@ def add_request(
             `DynamicInferenceRequestRecord` object containing the completed result.
         """
         request_id = self.next_request_id
+        logging.info(f"Adding request {request_id}")
         self.next_request_id += 1
         payload = [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params.serialize()]
         payload_serialized = msgpack.packb(payload, use_bin_type=True)
@@ -126,6 +129,7 @@ async def _listen_for_completed_requests(self):
                     request_id
                 )
                 completion_future = self.completion_futures.pop(request_id)
+                logging.info(f"Received reply for request {request_id}")
                 completion_future.set_result(DynamicInferenceRequestRecord.deserialize(reply))
             except zmq.Again:
                 await asyncio.sleep(0.005)
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
@@ -2087,14 +2087,18 @@ def maybe_cat(a, b, dim=0, *, required=False):
         return None
     return xs[0] if len(xs) == 1 else torch.cat(xs, dim=dim)
 
+_ASYNC_IO_LOOP : asyncio.AbstractEventLoop | None = None
 
 def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.AbstractEventLoop:
     """Creates an asyncio loop if necessary and then returns the current asyncio loop."""
+    global _ASYNC_IO_LOOP
     if loop is None:
+        if _ASYNC_IO_LOOP is not None:
+            return _ASYNC_IO_LOOP
         try:
             loop = asyncio.get_running_loop()
         except RuntimeError as e:
-            loop = asyncio.new_event_loop()
+            _ASYNC_IO_LOOP = loop =asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
     return loop
 
@@ -2130,7 +2134,6 @@ async def wrapper(*args, **kwargs):
             try:
                 return await fn(*args, **kwargs)
             except Exception as e:
-                logger.error(f"Exception in async function {fn.__name__}: {e}")
                 traceback.print_exc()
                 sys.exit(1)
             finally:
diff --git a/megatron/rl/inference/megatron.py b/megatron/rl/inference/megatron.py
@@ -177,6 +177,7 @@ async def base_generate(self, request: InferenceRequest):
         assert self._client is not None, "Client is not initialized"
 
         tokenizer = get_tokenizer()
+        print(f"Adding request to client on rank {dist.get_rank()}")
 
         sampling_params = SamplingParams(
             num_tokens_to_generate=None,
@@ -193,6 +194,7 @@ async def base_generate(self, request: InferenceRequest):
             self._client.add_request(prompt=prompt, sampling_params=sampling_params)
             for prompt in request.prompt
         ]
+        print(f"Waiting for responses on rank {dist.get_rank()}")
         responses = await asyncio.gather(
             *requests
         )

Original file line number	Diff line number	Diff line change
`@@ -148,7 +148,7 @@ def start(self):`
`148`	`148`	`)`
`149`	`149`	`continue`
`150`	`150`
`151`		`- # print(f"New client connected: {sender_identity}")`
	`151`	`+ logging.info(f"New client connected: {sender_identity}")`
`152`	`152`	`known_clients.add(sender_identity)`
`153`	`153`	`self.router_socket.send_multipart(`
`154`	`154`	`[sender_identity, msgpack.packb([Headers.ACK.value], use_bin_type=True)]`
Original file line number	Diff line number	Diff line change
`@@ -177,6 +177,7 @@ async def base_generate(self, request: InferenceRequest):`
`177`	`177`	`assert self._client is not None, "Client is not initialized"`
`178`	`178`
`179`	`179`	`tokenizer = get_tokenizer()`
	`180`	`+ print(f"Adding request to client on rank {dist.get_rank()}")`
`180`	`181`
`181`	`182`	`sampling_params = SamplingParams(`
`182`	`183`	`num_tokens_to_generate=None,`
`@@ -193,6 +194,7 @@ async def base_generate(self, request: InferenceRequest):`
`193`	`194`	`self._client.add_request(prompt=prompt, sampling_params=sampling_params)`
`194`	`195`	`for prompt in request.prompt`
`195`	`196`	`]`
	`197`	`+ print(f"Waiting for responses on rank {dist.get_rank()}")`
`196`	`198`	`responses = await asyncio.gather(`
`197`	`199`	`*requests`
`198`	`200`	`)`