From 3203e198213079deb92fc3d0a239cfe0566610ca Mon Sep 17 00:00:00 2001 From: Soowon Jeong Date: Fri, 10 Apr 2026 23:55:18 +0900 Subject: [PATCH 1/3] [BugFix][Relax] Select target-specific pipeline in tvm.compile when target is provided relax.build() with relax_pipeline="default" always resolved to default_build_pipeline, which omits FuseOps, FuseTIR, and DLight scheduling. On CUDA this left individual TIR functions (e.g. maximum, minimum from Clip/ReLU6) without thread bindings, causing VerifyMemory to fail: Memory verification failed: Variable X is directly accessed by host memory (it is not contained in a thread environment or in the function arguments). When relax_pipeline="default" and a target is provided, prefer relax.pipeline.get_default_pipeline(target), which includes the full legalization + fusion + DLight scheduling pipeline. Falls back to default_build_pipeline if no target-specific pipeline is registered (e.g. ValueError or AttributeError from get_default_pipeline). --- python/tvm/relax/vm_build.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/tvm/relax/vm_build.py b/python/tvm/relax/vm_build.py index 68592d67f870..15b04f35c587 100644 --- a/python/tvm/relax/vm_build.py +++ b/python/tvm/relax/vm_build.py @@ -248,7 +248,16 @@ def _extract_attrs(mod: tvm.IRModule): if relax_pipeline is not None: if isinstance(relax_pipeline, str): - relax_pipeline = relax.get_pipeline(relax_pipeline) + # When a target is available, prefer the target-specific pipeline + # (which includes DLight scheduling) over the generic string-keyed + # pipeline that ignores target kind. + if relax_pipeline == "default" and target is not None: + try: + relax_pipeline = relax.get_default_pipeline(target) + except (ValueError, AttributeError): + relax_pipeline = relax.get_pipeline(relax_pipeline) + else: + relax_pipeline = relax.get_pipeline(relax_pipeline) if target is None: mod = relax_pipeline(mod) else: From e6d872a16bb691ac1912a1645c7289c1803d79da Mon Sep 17 00:00:00 2001 From: Soowon Jeong Date: Sat, 11 Apr 2026 12:15:02 +0900 Subject: [PATCH 2/3] [BugFix][Relax] Add DispatchSampling/DispatchSortScan to CPU generic pipeline `cpu_generic.get_default_pipeline` was missing `DispatchSampling` and `DispatchSortScan` from its `library_dispatch_passes`, causing ops like `relax.cumsum` and `relax.topk` to reach CodeGenVM without being dispatched, resulting in "CodeGenVM cannot handle this intrinsic" errors on CPU/llvm targets. --- python/tvm/relax/backend/cpu_generic/pipeline.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/tvm/relax/backend/cpu_generic/pipeline.py b/python/tvm/relax/backend/cpu_generic/pipeline.py index dc078ee25d68..d0b819cea7f8 100644 --- a/python/tvm/relax/backend/cpu_generic/pipeline.py +++ b/python/tvm/relax/backend/cpu_generic/pipeline.py @@ -22,7 +22,10 @@ def library_dispatch_passes(target: tvm.target.Target): # pylint: disable=unused-argument """The default library dispatch passes for CPU backend.""" - return [] + return [ + relax.backend.DispatchSampling(), + relax.backend.DispatchSortScan(), + ] def legalize_passes(target: tvm.target.Target): # pylint: disable=unused-argument From 7a8cff7bc9736e51731a5ae842582820f6343944 Mon Sep 17 00:00:00 2001 From: Soowon Jeong Date: Sat, 11 Apr 2026 19:47:27 +0900 Subject: [PATCH 3/3] Fix get_default_pipeline applied to CPU targets in vm_build.py The previous fix applied get_default_pipeline(target) whenever a target was provided, including CPU (llvm). The CPU-specific pipeline includes FoldConstant and FuseOps/FuseTIR which DCE unused call_pure_packed calls -- correct per the pure semantics, but it broke existing tests that relied on their side effects. Narrow the scope: only use get_default_pipeline for GPU targets (identified by 'gpu' in target.keys). CPU targets continue to use get_pipeline('default'), which is the previous behaviour. --- python/tvm/relax/vm_build.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/python/tvm/relax/vm_build.py b/python/tvm/relax/vm_build.py index 15b04f35c587..adc0f7ad8383 100644 --- a/python/tvm/relax/vm_build.py +++ b/python/tvm/relax/vm_build.py @@ -248,10 +248,15 @@ def _extract_attrs(mod: tvm.IRModule): if relax_pipeline is not None: if isinstance(relax_pipeline, str): - # When a target is available, prefer the target-specific pipeline - # (which includes DLight scheduling) over the generic string-keyed - # pipeline that ignores target kind. - if relax_pipeline == "default" and target is not None: + # For GPU targets, prefer the target-specific pipeline which + # includes DLight scheduling. Without it, TIR functions generated + # from ops like Clip/ReLU6 lack thread bindings and fail + # VerifyMemory. CPU targets continue to use the generic pipeline + # since the CPU-specific pipeline applies fusion passes that can + # incorrectly remove call_pure_packed calls whose results are + # unused but whose side effects are relied upon. + _is_gpu = target is not None and "gpu" in target.keys + if relax_pipeline == "default" and _is_gpu: try: relax_pipeline = relax.get_default_pipeline(target) except (ValueError, AttributeError):