From 3203e198213079deb92fc3d0a239cfe0566610ca Mon Sep 17 00:00:00 2001
From: Soowon Jeong <soowon1106@gmail.com>
Date: Fri, 10 Apr 2026 23:55:18 +0900
Subject: [PATCH 1/3] [BugFix][Relax] Select target-specific pipeline in
 tvm.compile when target is provided

relax.build() with relax_pipeline="default" always resolved to
default_build_pipeline, which omits FuseOps, FuseTIR, and DLight
scheduling. On CUDA this left individual TIR functions (e.g. maximum,
minimum from Clip/ReLU6) without thread bindings, causing VerifyMemory
to fail:

  Memory verification failed: Variable X is directly accessed by host
  memory (it is not contained in a thread environment or in the
  function arguments).

When relax_pipeline="default" and a target is provided, prefer
relax.pipeline.get_default_pipeline(target), which includes the full
legalization + fusion + DLight scheduling pipeline. Falls back to
default_build_pipeline if no target-specific pipeline is registered
(e.g. ValueError or AttributeError from get_default_pipeline).
---
 python/tvm/relax/vm_build.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relax/vm_build.py b/python/tvm/relax/vm_build.py
index 68592d67f870..15b04f35c587 100644
--- a/python/tvm/relax/vm_build.py
+++ b/python/tvm/relax/vm_build.py
@@ -248,7 +248,16 @@ def _extract_attrs(mod: tvm.IRModule):
 
     if relax_pipeline is not None:
         if isinstance(relax_pipeline, str):
-            relax_pipeline = relax.get_pipeline(relax_pipeline)
+            # When a target is available, prefer the target-specific pipeline
+            # (which includes DLight scheduling) over the generic string-keyed
+            # pipeline that ignores target kind.
+            if relax_pipeline == "default" and target is not None:
+                try:
+                    relax_pipeline = relax.get_default_pipeline(target)
+                except (ValueError, AttributeError):
+                    relax_pipeline = relax.get_pipeline(relax_pipeline)
+            else:
+                relax_pipeline = relax.get_pipeline(relax_pipeline)
         if target is None:
             mod = relax_pipeline(mod)
         else:

From e6d872a16bb691ac1912a1645c7289c1803d79da Mon Sep 17 00:00:00 2001
From: Soowon Jeong <soowon1106@gmail.com>
Date: Sat, 11 Apr 2026 12:15:02 +0900
Subject: [PATCH 2/3] [BugFix][Relax] Add DispatchSampling/DispatchSortScan to
 CPU generic pipeline

`cpu_generic.get_default_pipeline` was missing `DispatchSampling` and
`DispatchSortScan` from its `library_dispatch_passes`, causing ops like
`relax.cumsum` and `relax.topk` to reach CodeGenVM without being
dispatched, resulting in "CodeGenVM cannot handle this intrinsic" errors
on CPU/llvm targets.
---
 python/tvm/relax/backend/cpu_generic/pipeline.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relax/backend/cpu_generic/pipeline.py b/python/tvm/relax/backend/cpu_generic/pipeline.py
index dc078ee25d68..d0b819cea7f8 100644
--- a/python/tvm/relax/backend/cpu_generic/pipeline.py
+++ b/python/tvm/relax/backend/cpu_generic/pipeline.py
@@ -22,7 +22,10 @@
 
 def library_dispatch_passes(target: tvm.target.Target):  # pylint: disable=unused-argument
     """The default library dispatch passes for CPU backend."""
-    return []
+    return [
+        relax.backend.DispatchSampling(),
+        relax.backend.DispatchSortScan(),
+    ]
 
 
 def legalize_passes(target: tvm.target.Target):  # pylint: disable=unused-argument

From 7a8cff7bc9736e51731a5ae842582820f6343944 Mon Sep 17 00:00:00 2001
From: Soowon Jeong <soowon1106@gmail.com>
Date: Sat, 11 Apr 2026 19:47:27 +0900
Subject: [PATCH 3/3] Fix get_default_pipeline applied to CPU targets in
 vm_build.py

The previous fix applied get_default_pipeline(target) whenever a target
was provided, including CPU (llvm). The CPU-specific pipeline includes
FoldConstant and FuseOps/FuseTIR which DCE unused call_pure_packed
calls -- correct per the pure semantics, but it broke existing tests
that relied on their side effects.

Narrow the scope: only use get_default_pipeline for GPU targets
(identified by 'gpu' in target.keys). CPU targets continue to use
get_pipeline('default'), which is the previous behaviour.
---
 python/tvm/relax/vm_build.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relax/vm_build.py b/python/tvm/relax/vm_build.py
index 15b04f35c587..adc0f7ad8383 100644
--- a/python/tvm/relax/vm_build.py
+++ b/python/tvm/relax/vm_build.py
@@ -248,10 +248,15 @@ def _extract_attrs(mod: tvm.IRModule):
 
     if relax_pipeline is not None:
         if isinstance(relax_pipeline, str):
-            # When a target is available, prefer the target-specific pipeline
-            # (which includes DLight scheduling) over the generic string-keyed
-            # pipeline that ignores target kind.
-            if relax_pipeline == "default" and target is not None:
+            # For GPU targets, prefer the target-specific pipeline which
+            # includes DLight scheduling. Without it, TIR functions generated
+            # from ops like Clip/ReLU6 lack thread bindings and fail
+            # VerifyMemory. CPU targets continue to use the generic pipeline
+            # since the CPU-specific pipeline applies fusion passes that can
+            # incorrectly remove call_pure_packed calls whose results are
+            # unused but whose side effects are relied upon.
+            _is_gpu = target is not None and "gpu" in target.keys
+            if relax_pipeline == "default" and _is_gpu:
                 try:
                     relax_pipeline = relax.get_default_pipeline(target)
                 except (ValueError, AttributeError):