[None][fix] change logging for weight loading on unified memory (#9177)

farazkh80 · SimengLiu-nv · web-flow · commit 49c45ebef1c5 · 2025-11-19T14:31:19.000-05:00
Signed-off-by: Faraz Khoubsirat &lt;58580514+farazkh80@users.noreply.github.com&gt;
Signed-off-by: Simeng Liu &lt;109828133+SimengLiu-nv@users.noreply.github.com&gt;
Co-authored-by: Simeng Liu &lt;109828133+SimengLiu-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/modules/linear.py b/tensorrt_llm/_torch/modules/linear.py
@@ -76,9 +76,9 @@ def load_weight_shard(
         # For integrated GPU systems (e.g., DGX Spark), CPU and GPU share limited physical memory.
         # Avoiding device transfers reduces memory consumption and unnecessary data copies,
         # enabling support for larger models on memory-constrained systems.
-        logger.warning(
-            f"[load_weight_shard] Skipping device transfer from {weight.device} to {device} on integrated GPU to conserve shared memory."
-        )
+        logger.warning_once(
+            f"[load_weight_shard] Skipping device transfer from {weight.device} to {device} on integrated GPU to conserve shared memory.",
+            key="load_weight_shard_skip_device_transfer_with_integrated_gpu")
         device = weight.device
     if isinstance(weight, torch.Tensor):
         tensor_shape = weight.shape