Merge remote-tracking branch 'gmarkall/vk/target_extension' into vk/target_extension

VijayKandiah · VijayKandiah · commit 911b1788dea8 · 2025-10-29T11:22:17.000-07:00
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
@@ -741,16 +741,17 @@ def compile_cuda(
     flags.max_registers = max_registers
     flags.lto = lto
 
-    cres = compile_extra(
-        typingctx=typingctx,
-        targetctx=targetctx,
-        func=pyfunc,
-        args=args,
-        return_type=return_type,
-        flags=flags,
-        locals={},
-        pipeline_class=CUDACompiler,
-    )
+    with utils.numba_target_override():
+        cres = compile_extra(
+            typingctx=typingctx,
+            targetctx=targetctx,
+            func=pyfunc,
+            args=args,
+            return_type=return_type,
+            flags=flags,
+            locals={},
+            pipeline_class=CUDACompiler,
+        )
 
     library = cres.library
     library.finalize()
diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py
@@ -3,6 +3,7 @@
 
 from collections import defaultdict
 import copy
+import importlib
 import sys
 from itertools import permutations, takewhile
 from contextlib import contextmanager
@@ -212,10 +213,15 @@ def enable_boundscheck(self, value):
     def __init__(self, typing_context, target):
         self.address_size = utils.MACHINE_BITS
         self.typing_context = typing_context
-        from numba.cuda.descriptor import cuda_target
-
         self.target_name = target
-        self.target = cuda_target
+
+        if importlib.util.find_spec("numba"):
+            from numba.core.target_extension import CUDA
+
+            # Used only in Numba's target_extension implementation.
+            # Numba-CUDA has the target_extension implementation removed, and
+            # references to it hardcoded to values specific to the CUDA target.
+            self.target = CUDA
 
         # A mapping of installed registries to their loaders
         self._registries = {}
diff --git a/numba_cuda/numba/cuda/descriptor.py b/numba_cuda/numba/cuda/descriptor.py
@@ -33,34 +33,3 @@ def target_context(self):
 
 
 cuda_target = CUDATarget("cuda")
-
-# Monkey-patch numba's get_local_target and order_by_target_specificity for CUDATarget
-try:
-    from numba.core import target_extension
-    from numba.cuda.utils import order_by_target_specificity
-    from numba.core import utils as numba_utils
-
-    def _is_cuda_context(obj):
-        return (
-            isinstance(obj, CUDATarget)
-            or (hasattr(obj, "__class__") and "CUDA" in obj.__class__.__name__)
-            or (hasattr(obj, "target") and isinstance(obj.target, CUDATarget))
-        )
-
-    def _patch_numba_for_cuda_target():
-        _orig_get_local = target_extension.get_local_target
-
-        def get_local_target_cuda(context):
-            return (
-                cuda_target
-                if _is_cuda_context(context)
-                else _orig_get_local(context)
-            )
-
-        target_extension.get_local_target = get_local_target_cuda
-        numba_utils.order_by_target_specificity = order_by_target_specificity
-
-    _patch_numba_for_cuda_target()
-
-except ImportError:
-    pass
diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py
@@ -727,7 +727,8 @@ class CUDACache(Cache):
 
     def load_overload(self, sig, target_context):
         # Loading an overload refreshes the context to ensure it is initialized.
-        return super().load_overload(sig, target_context)
+        with utils.numba_target_override():
+            return super().load_overload(sig, target_context)
 
 
 class OmittedArg(object):
diff --git a/numba_cuda/numba/cuda/types/cuda_functions.py b/numba_cuda/numba/cuda/types/cuda_functions.py
@@ -314,10 +314,8 @@ def get_call_type(self, context, args, kws):
             context, self, args, kws, depth=self._depth
         )
 
-        from numba.cuda.descriptor import cuda_target
-
         order = utils.order_by_target_specificity(
-            cuda_target, self.templates, fnkey=self.key[0]
+            self.templates, fnkey=self.key[0]
         )
 
         self._depth += 1
diff --git a/numba_cuda/numba/cuda/typing/context.py b/numba_cuda/numba/cuda/typing/context.py
@@ -290,9 +290,7 @@ def core(typ):
     def find_matching_getattr_template(self, typ, attr):
         templates = list(self._get_attribute_templates(typ))
 
-        from numba.cuda.descriptor import cuda_target
-
-        order = order_by_target_specificity(cuda_target, templates, fnkey=attr)
+        order = order_by_target_specificity(templates, fnkey=attr)
 
         for template in order:
             return_type = template.resolve(typ, attr)
diff --git a/numba_cuda/numba/cuda/utils.py b/numba_cuda/numba/cuda/utils.py
@@ -8,6 +8,7 @@
 
 import atexit
 import builtins
+import importlib
 import inspect
 import operator
 import timeit
@@ -311,7 +312,7 @@ def __hash__(self):
         return hash(tuple(sorted(self._values.items())))
 
 
-def order_by_target_specificity(target, templates, fnkey=""):
+def order_by_target_specificity(templates, fnkey=""):
     """This orders the given templates from most to least specific against the
     current "target". "fnkey" is an indicative typing key for use in the
     exception message in the case that there's no usable templates for the
@@ -345,7 +346,7 @@ def key(x):
     if not order:
         msg = (
             f"Function resolution cannot find any matches for function "
-            f"'{fnkey}' for the current target: '{target}'."
+            f"'{fnkey}'."
         )
         from numba.core.errors import UnsupportedError
 
@@ -710,3 +711,14 @@ def _readenv(name, ctor, default):
 def cached_file_read(filepath, how="r"):
     with open(filepath, how) as f:
         return f.read()
+
+
+@contextlib.contextmanager
+def numba_target_override():
+    if importlib.util.find_spec("numba"):
+        from numba.core.target_extension import target_override
+
+        with target_override("cuda"):
+            yield
+    else:
+        yield

Original file line number	Diff line number	Diff line change
`@@ -314,10 +314,8 @@ def get_call_type(self, context, args, kws):`
`314`	`314`	`context, self, args, kws, depth=self._depth`
`315`	`315`	`)`
`316`	`316`
`317`		`- from numba.cuda.descriptor import cuda_target`
`318`		`-`
`319`	`317`	`order = utils.order_by_target_specificity(`
`320`		`- cuda_target, self.templates, fnkey=self.key[0]`
	`318`	`+ self.templates, fnkey=self.key[0]`
`321`	`319`	`)`
`322`	`320`
`323`	`321`	`self._depth += 1`