Remove dependency on numba.core.target_extension for CUDATarget

VijayKandiah · VijayKandiah · commit dc4f6605b51d · 2025-10-22T23:25:39.000-07:00
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
@@ -741,20 +741,16 @@ def compile_cuda(
     flags.max_registers = max_registers
     flags.lto = lto
 
-    # Run compilation pipeline
-    from numba.core.target_extension import target_override
-
-    with target_override("cuda"):
-        cres = compile_extra(
-            typingctx=typingctx,
-            targetctx=targetctx,
-            func=pyfunc,
-            args=args,
-            return_type=return_type,
-            flags=flags,
-            locals={},
-            pipeline_class=CUDACompiler,
-        )
+    cres = compile_extra(
+        typingctx=typingctx,
+        targetctx=targetctx,
+        func=pyfunc,
+        args=args,
+        return_type=return_type,
+        flags=flags,
+        locals={},
+        pipeline_class=CUDACompiler,
+    )
 
     library = cres.library
     library.finalize()
diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py
@@ -212,10 +212,10 @@ def enable_boundscheck(self, value):
     def __init__(self, typing_context, target):
         self.address_size = utils.MACHINE_BITS
         self.typing_context = typing_context
-        from numba.core.target_extension import target_registry
+        from numba.cuda.descriptor import cuda_target
 
         self.target_name = target
-        self.target = target_registry[target]
+        self.target = cuda_target
 
         # A mapping of installed registries to their loaders
         self._registries = {}
diff --git a/numba_cuda/numba/cuda/cuda_types/functions.py b/numba_cuda/numba/cuda/cuda_types/functions.py
@@ -314,14 +314,10 @@ def get_call_type(self, context, args, kws):
             context, self, args, kws, depth=self._depth
         )
 
-        # get the order in which to try templates
-        from numba.core.target_extension import (
-            get_local_target,
-        )  # circular
+        from numba.cuda.descriptor import cuda_target
 
-        target_hw = get_local_target(context)
         order = utils.order_by_target_specificity(
-            target_hw, self.templates, fnkey=self.key[0]
+            cuda_target, self.templates, fnkey=self.key[0]
         )
 
         self._depth += 1
diff --git a/numba_cuda/numba/cuda/descriptor.py b/numba_cuda/numba/cuda/descriptor.py
@@ -33,3 +33,34 @@ def target_context(self):
 
 
 cuda_target = CUDATarget("cuda")
+
+# Monkey-patch numba's get_local_target and order_by_target_specificity for CUDATarget
+try:
+    from numba.core import target_extension
+    from numba.cuda.utils import order_by_target_specificity
+    from numba.core import utils as numba_utils
+
+    def _is_cuda_context(obj):
+        return (
+            isinstance(obj, CUDATarget)
+            or (hasattr(obj, "__class__") and "CUDA" in obj.__class__.__name__)
+            or (hasattr(obj, "target") and isinstance(obj.target, CUDATarget))
+        )
+
+    def _patch_numba_for_cuda_target():
+        _orig_get_local = target_extension.get_local_target
+
+        def get_local_target_cuda(context):
+            return (
+                cuda_target
+                if _is_cuda_context(context)
+                else _orig_get_local(context)
+            )
+
+        target_extension.get_local_target = get_local_target_cuda
+        numba_utils.order_by_target_specificity = order_by_target_specificity
+
+    _patch_numba_for_cuda_target()
+
+except ImportError:
+    pass
diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py
@@ -726,13 +726,8 @@ class CUDACache(Cache):
     _impl_class = CUDACacheImpl
 
     def load_overload(self, sig, target_context):
-        # Loading an overload refreshes the context to ensure it is
-        # initialized. To initialize the correct (i.e. CUDA) target, we need to
-        # enforce that the current target is the CUDA target.
-        from numba.core.target_extension import target_override
-
-        with target_override("cuda"):
-            return super().load_overload(sig, target_context)
+        # Loading an overload refreshes the context to ensure it is initialized.
+        return super().load_overload(sig, target_context)
 
 
 class OmittedArg(object):
diff --git a/numba_cuda/numba/cuda/initialize.py b/numba_cuda/numba/cuda/initialize.py
@@ -5,15 +5,3 @@
 def initialize_all():
     # Import models to register them with the data model manager
     import numba.cuda.models  # noqa: F401
-
-    from numba.cuda.decorators import jit
-    from numba.cuda.dispatcher import CUDADispatcher
-    from numba.core.target_extension import (
-        target_registry,
-        dispatcher_registry,
-        jit_registry,
-    )
-
-    cuda_target = target_registry["cuda"]
-    jit_registry[cuda_target] = jit
-    dispatcher_registry[cuda_target] = CUDADispatcher
diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py
@@ -1239,12 +1239,9 @@ def _lower_call_normal(self, fnty, expr, signature):
             )
         tname = expr.target
         if tname is not None:
-            from numba.core.target_extension import (
-                resolve_dispatcher_from_str,
-            )
+            from numba.cuda.descriptor import cuda_target
 
-            disp = resolve_dispatcher_from_str(tname)
-            hw_ctx = disp.targetdescr.target_context
+            hw_ctx = cuda_target.target_context
             impl = hw_ctx.get_function(fnty, signature)
         else:
             impl = self.context.get_function(fnty, signature)
diff --git a/numba_cuda/numba/cuda/tests/core/test_serialize.py b/numba_cuda/numba/cuda/tests/core/test_serialize.py
@@ -13,10 +13,18 @@
 import numba
 from numba.core.errors import TypingError
 from numba.cuda.tests.support import TestCase
-from numba.core.target_extension import resolve_dispatcher_from_str
 from numba.cuda.cloudpickle import dumps, loads
 
+try:
+    from numba.core.target_extension import resolve_dispatcher_from_str
+except ImportError:
+    resolve_dispatcher_from_str = None
 
+
+@unittest.skipIf(
+    resolve_dispatcher_from_str is None,
+    "numba.core.target_extension not available",
+)
 class TestDispatcherPickling(TestCase):
     def run_with_protocols(self, meth, *args, **kwargs):
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
diff --git a/numba_cuda/numba/cuda/typing/context.py b/numba_cuda/numba/cuda/typing/context.py
@@ -290,11 +290,9 @@ def core(typ):
     def find_matching_getattr_template(self, typ, attr):
         templates = list(self._get_attribute_templates(typ))
 
-        # get the order in which to try templates
-        from numba.core.target_extension import get_local_target
+        from numba.cuda.descriptor import cuda_target
 
-        target_hw = get_local_target(self)
-        order = order_by_target_specificity(target_hw, templates, fnkey=attr)
+        order = order_by_target_specificity(cuda_target, templates, fnkey=attr)
 
         for template in order:
             return_type = template.resolve(typ, attr)
@@ -446,13 +444,6 @@ def install_registry(self, registry, external_defs_only=False):
             loader = templates.RegistryLoader(registry)
             self._registries[registry] = loader
 
-        from numba.core.target_extension import (
-            get_local_target,
-            resolve_target_str,
-        )
-
-        current_target = get_local_target(self)
-
         def is_for_this_target(ftcls):
             metadata = getattr(ftcls, "metadata", None)
             if metadata is None:
@@ -462,31 +453,11 @@ def is_for_this_target(ftcls):
             if target_str is None:
                 return True
 
-            # There may be pending registrations for nonexistent targets.
-            # Ideally it would be impossible to leave a registration pending
-            # for an invalid target, but in practice this is exceedingly
-            # difficult to guard against - many things are registered at import
-            # time, and eagerly reporting an error when registering for invalid
-            # targets would require that all target registration code is
-            # executed prior to all typing registrations during the import
-            # process; attempting to enforce this would impose constraints on
-            # execution order during import that would be very difficult to
-            # resolve and maintain in the presence of typical code maintenance.
-            # Furthermore, these constraints would be imposed not only on
-            # Numba internals, but also on its dependents.
-            #
-            # Instead of that enforcement, we simply catch any occurrences of
-            # registrations for targets that don't exist, and report that
-            # they're not for this target. They will then not be encountered
-            # again during future typing context refreshes (because the
-            # loader's new registrations are a stream_list that doesn't yield
-            # previously-yielded items).
-            try:
-                ft_target = resolve_target_str(target_str)
-            except errors.NonexistentTargetError:
-                return False
+            # Accept both "cuda" and "generic" targets
+            if target_str in ("cuda", "generic"):
+                return True
 
-            return current_target.inherits_from(ft_target)
+            return False
 
         def is_external(obj):
             """Check if obj is from outside numba.* namespace."""
diff --git a/numba_cuda/numba/cuda/typing/templates.py b/numba_cuda/numba/cuda/typing/templates.py
@@ -778,37 +778,9 @@ def _get_impl(self, args, kws):
 
     def _get_jit_decorator(self):
         """Gets a jit decorator suitable for the current target"""
+        from numba.cuda.decorators import jit
 
-        from numba.core.target_extension import (
-            target_registry,
-            get_local_target,
-            jit_registry,
-        )
-
-        jitter_str = self.metadata.get("target", "generic")
-        jitter = jit_registry.get(jitter_str, None)
-
-        if jitter is None:
-            # No JIT known for target string, see if something is
-            # registered for the string and report if not.
-            target_class = target_registry.get(jitter_str, None)
-            if target_class is None:
-                msg = ("Unknown target '{}', has it been ", "registered?")
-                raise ValueError(msg.format(jitter_str))
-
-            target_hw = get_local_target(self.context)
-
-            # check that the requested target is in the hierarchy for the
-            # current frame's target.
-            if not issubclass(target_hw, target_class):
-                msg = "No overloads exist for the requested target: {}."
-
-            jitter = jit_registry[target_hw]
-
-        if jitter is None:
-            raise ValueError("Cannot find a suitable jit decorator")
-
-        return jitter
+        return jit
 
     def _build_impl(self, cache_key, args, kws):
         """Build and cache the implementation.
@@ -988,16 +960,9 @@ def _get_target_registry(self, reason):
         -------
         reg : a registry suitable for the current target.
         """
-        from numba.core.target_extension import (
-            _get_local_target_checked,
-            dispatcher_registry,
-        )
+        from numba.cuda.descriptor import cuda_target
 
-        hwstr = self.metadata.get("target", "generic")
-        target_hw = _get_local_target_checked(self.context, hwstr, reason)
-        # Get registry for the current hardware
-        disp = dispatcher_registry[target_hw]
-        tgtctx = disp.targetdescr.target_context
+        tgtctx = cuda_target.target_context
 
         # ---------------------------------------------------------------------
         # XXX: In upstream Numba, this function would prefer the builtin
diff --git a/numba_cuda/numba/cuda/utils.py b/numba_cuda/numba/cuda/utils.py
@@ -320,8 +320,6 @@ def order_by_target_specificity(target, templates, fnkey=""):
     if templates == []:
         return []
 
-    from numba.core.target_extension import target_registry
-
     # fish out templates that are specific to the target if a target is
     # specified
     DEFAULT_TARGET = "generic"
@@ -331,13 +329,15 @@ def order_by_target_specificity(target, templates, fnkey=""):
         md = getattr(temp_cls, "metadata", {})
         hw = md.get("target", DEFAULT_TARGET)
         if hw is not None:
-            hw_clazz = target_registry[hw]
-            if target.inherits_from(hw_clazz):
-                usable.append((temp_cls, hw_clazz, ix))
+            if hw in ("generic", "cuda"):
+                usable.append((temp_cls, ix))
 
     # sort templates based on target specificity
+    # cuda-specific templates get priority before generic ones
     def key(x):
-        return target.__mro__.index(x[1])
+        md = getattr(x[0], "metadata", {})
+        hw = md.get("target", DEFAULT_TARGET)
+        return (0 if hw == "cuda" else 1, x[1])
 
     order = [x[0] for x in sorted(usable, key=key)]
 

Original file line number	Diff line number	Diff line change
`@@ -314,14 +314,10 @@ def get_call_type(self, context, args, kws):`
`314`	`314`	`context, self, args, kws, depth=self._depth`
`315`	`315`	`)`
`316`	`316`
`317`		`- # get the order in which to try templates`
`318`		`- from numba.core.target_extension import (`
`319`		`- get_local_target,`
`320`		`- ) # circular`
	`317`	`+ from numba.cuda.descriptor import cuda_target`
`321`	`318`
`322`		`- target_hw = get_local_target(context)`
`323`	`319`	`order = utils.order_by_target_specificity(`
`324`		`- target_hw, self.templates, fnkey=self.key[0]`
	`320`	`+ cuda_target, self.templates, fnkey=self.key[0]`
`325`	`321`	`)`
`326`	`322`
`327`	`323`	`self._depth += 1`