Remove dependencies on target_extension for CUDA target (#555)

VijayKandiah · gmarkall · web-flow · commit 5aeb63cf2623 · 2025-10-30T08:06:27.000Z
This PR removes dependency on `numba.core.target_extension`. This import
was primarily used to get the local target which is `CUDA` in our case.

---------

Co-authored-by: Graham Markall &lt;gmarkall@nvidia.com&gt;
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
@@ -741,10 +741,7 @@ def compile_cuda(
     flags.max_registers = max_registers
     flags.lto = lto
 
-    # Run compilation pipeline
-    from numba.core.target_extension import target_override
-
-    with target_override("cuda"):
+    with utils.numba_target_override():
         cres = compile_extra(
             typingctx=typingctx,
             targetctx=targetctx,
diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py
@@ -3,6 +3,7 @@
 
 from collections import defaultdict
 import copy
+import importlib
 import sys
 from itertools import permutations, takewhile
 from contextlib import contextmanager
@@ -212,10 +213,15 @@ def enable_boundscheck(self, value):
     def __init__(self, typing_context, target):
         self.address_size = utils.MACHINE_BITS
         self.typing_context = typing_context
-        from numba.core.target_extension import target_registry
-
         self.target_name = target
-        self.target = target_registry[target]
+
+        if importlib.util.find_spec("numba"):
+            from numba.core.target_extension import CUDA
+
+            # Used only in Numba's target_extension implementation.
+            # Numba-CUDA has the target_extension implementation removed, and
+            # references to it hardcoded to values specific to the CUDA target.
+            self.target = CUDA
 
         # A mapping of installed registries to their loaders
         self._registries = {}
diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py
@@ -726,12 +726,8 @@ class CUDACache(Cache):
     _impl_class = CUDACacheImpl
 
     def load_overload(self, sig, target_context):
-        # Loading an overload refreshes the context to ensure it is
-        # initialized. To initialize the correct (i.e. CUDA) target, we need to
-        # enforce that the current target is the CUDA target.
-        from numba.core.target_extension import target_override
-
-        with target_override("cuda"):
+        # Loading an overload refreshes the context to ensure it is initialized.
+        with utils.numba_target_override():
             return super().load_overload(sig, target_context)
 
 
diff --git a/numba_cuda/numba/cuda/initialize.py b/numba_cuda/numba/cuda/initialize.py
@@ -5,15 +5,3 @@
 def initialize_all():
     # Import models to register them with the data model manager
     import numba.cuda.models  # noqa: F401
-
-    from numba.cuda.decorators import jit
-    from numba.cuda.dispatcher import CUDADispatcher
-    from numba.core.target_extension import (
-        target_registry,
-        dispatcher_registry,
-        jit_registry,
-    )
-
-    cuda_target = target_registry["cuda"]
-    jit_registry[cuda_target] = jit
-    dispatcher_registry[cuda_target] = CUDADispatcher
diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py
@@ -1239,12 +1239,9 @@ def _lower_call_normal(self, fnty, expr, signature):
             )
         tname = expr.target
         if tname is not None:
-            from numba.core.target_extension import (
-                resolve_dispatcher_from_str,
-            )
+            from numba.cuda.descriptor import cuda_target
 
-            disp = resolve_dispatcher_from_str(tname)
-            hw_ctx = disp.targetdescr.target_context
+            hw_ctx = cuda_target.target_context
             impl = hw_ctx.get_function(fnty, signature)
         else:
             impl = self.context.get_function(fnty, signature)
diff --git a/numba_cuda/numba/cuda/tests/core/test_serialize.py b/numba_cuda/numba/cuda/tests/core/test_serialize.py
@@ -13,10 +13,18 @@
 import numba
 from numba.core.errors import TypingError
 from numba.cuda.tests.support import TestCase
-from numba.core.target_extension import resolve_dispatcher_from_str
 from numba.cuda.cloudpickle import dumps, loads
 
+try:
+    from numba.core.target_extension import resolve_dispatcher_from_str
+except ImportError:
+    resolve_dispatcher_from_str = None
 
+
+@unittest.skipIf(
+    resolve_dispatcher_from_str is None,
+    "numba.core.target_extension not available",
+)
 class TestDispatcherPickling(TestCase):
     def run_with_protocols(self, meth, *args, **kwargs):
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
diff --git a/numba_cuda/numba/cuda/types/cuda_functions.py b/numba_cuda/numba/cuda/types/cuda_functions.py
@@ -314,14 +314,8 @@ def get_call_type(self, context, args, kws):
             context, self, args, kws, depth=self._depth
         )
 
-        # get the order in which to try templates
-        from numba.core.target_extension import (
-            get_local_target,
-        )  # circular
-
-        target_hw = get_local_target(context)
         order = utils.order_by_target_specificity(
-            target_hw, self.templates, fnkey=self.key[0]
+            self.templates, fnkey=self.key[0]
         )
 
         self._depth += 1
diff --git a/numba_cuda/numba/cuda/typing/context.py b/numba_cuda/numba/cuda/typing/context.py
@@ -290,11 +290,7 @@ def core(typ):
     def find_matching_getattr_template(self, typ, attr):
         templates = list(self._get_attribute_templates(typ))
 
-        # get the order in which to try templates
-        from numba.core.target_extension import get_local_target
-
-        target_hw = get_local_target(self)
-        order = order_by_target_specificity(target_hw, templates, fnkey=attr)
+        order = order_by_target_specificity(templates, fnkey=attr)
 
         for template in order:
             return_type = template.resolve(typ, attr)
@@ -446,13 +442,6 @@ def install_registry(self, registry, external_defs_only=False):
             loader = templates.RegistryLoader(registry)
             self._registries[registry] = loader
 
-        from numba.core.target_extension import (
-            get_local_target,
-            resolve_target_str,
-        )
-
-        current_target = get_local_target(self)
-
         def is_for_this_target(ftcls):
             metadata = getattr(ftcls, "metadata", None)
             if metadata is None:
@@ -462,31 +451,11 @@ def is_for_this_target(ftcls):
             if target_str is None:
                 return True
 
-            # There may be pending registrations for nonexistent targets.
-            # Ideally it would be impossible to leave a registration pending
-            # for an invalid target, but in practice this is exceedingly
-            # difficult to guard against - many things are registered at import
-            # time, and eagerly reporting an error when registering for invalid
-            # targets would require that all target registration code is
-            # executed prior to all typing registrations during the import
-            # process; attempting to enforce this would impose constraints on
-            # execution order during import that would be very difficult to
-            # resolve and maintain in the presence of typical code maintenance.
-            # Furthermore, these constraints would be imposed not only on
-            # Numba internals, but also on its dependents.
-            #
-            # Instead of that enforcement, we simply catch any occurrences of
-            # registrations for targets that don't exist, and report that
-            # they're not for this target. They will then not be encountered
-            # again during future typing context refreshes (because the
-            # loader's new registrations are a stream_list that doesn't yield
-            # previously-yielded items).
-            try:
-                ft_target = resolve_target_str(target_str)
-            except errors.NonexistentTargetError:
-                return False
+            # Accept both "cuda" and "generic" targets
+            if target_str in ("cuda", "generic"):
+                return True
 
-            return current_target.inherits_from(ft_target)
+            return False
 
         def is_external(obj):
             """Check if obj is from outside numba.* namespace."""
diff --git a/numba_cuda/numba/cuda/typing/templates.py b/numba_cuda/numba/cuda/typing/templates.py
@@ -778,37 +778,9 @@ def _get_impl(self, args, kws):
 
     def _get_jit_decorator(self):
         """Gets a jit decorator suitable for the current target"""
+        from numba.cuda.decorators import jit
 
-        from numba.core.target_extension import (
-            target_registry,
-            get_local_target,
-            jit_registry,
-        )
-
-        jitter_str = self.metadata.get("target", "generic")
-        jitter = jit_registry.get(jitter_str, None)
-
-        if jitter is None:
-            # No JIT known for target string, see if something is
-            # registered for the string and report if not.
-            target_class = target_registry.get(jitter_str, None)
-            if target_class is None:
-                msg = ("Unknown target '{}', has it been ", "registered?")
-                raise ValueError(msg.format(jitter_str))
-
-            target_hw = get_local_target(self.context)
-
-            # check that the requested target is in the hierarchy for the
-            # current frame's target.
-            if not issubclass(target_hw, target_class):
-                msg = "No overloads exist for the requested target: {}."
-
-            jitter = jit_registry[target_hw]
-
-        if jitter is None:
-            raise ValueError("Cannot find a suitable jit decorator")
-
-        return jitter
+        return jit
 
     def _build_impl(self, cache_key, args, kws):
         """Build and cache the implementation.
@@ -988,16 +960,9 @@ def _get_target_registry(self, reason):
         -------
         reg : a registry suitable for the current target.
         """
-        from numba.core.target_extension import (
-            _get_local_target_checked,
-            dispatcher_registry,
-        )
+        from numba.cuda.descriptor import cuda_target
 
-        hwstr = self.metadata.get("target", "generic")
-        target_hw = _get_local_target_checked(self.context, hwstr, reason)
-        # Get registry for the current hardware
-        disp = dispatcher_registry[target_hw]
-        tgtctx = disp.targetdescr.target_context
+        tgtctx = cuda_target.target_context
 
         # ---------------------------------------------------------------------
         # XXX: In upstream Numba, this function would prefer the builtin
diff --git a/numba_cuda/numba/cuda/utils.py b/numba_cuda/numba/cuda/utils.py
@@ -8,6 +8,7 @@
 
 import atexit
 import builtins
+import importlib
 import inspect
 import operator
 import timeit
@@ -311,7 +312,7 @@ def __hash__(self):
         return hash(tuple(sorted(self._values.items())))
 
 
-def order_by_target_specificity(target, templates, fnkey=""):
+def order_by_target_specificity(templates, fnkey=""):
     """This orders the given templates from most to least specific against the
     current "target". "fnkey" is an indicative typing key for use in the
     exception message in the case that there's no usable templates for the
@@ -321,8 +322,6 @@ def order_by_target_specificity(target, templates, fnkey=""):
     if templates == []:
         return []
 
-    from numba.core.target_extension import target_registry
-
     # fish out templates that are specific to the target if a target is
     # specified
     DEFAULT_TARGET = "generic"
@@ -332,20 +331,22 @@ def order_by_target_specificity(target, templates, fnkey=""):
         md = getattr(temp_cls, "metadata", {})
         hw = md.get("target", DEFAULT_TARGET)
         if hw is not None:
-            hw_clazz = target_registry[hw]
-            if target.inherits_from(hw_clazz):
-                usable.append((temp_cls, hw_clazz, ix))
+            if hw in ("generic", "cuda"):
+                usable.append((temp_cls, ix))
 
     # sort templates based on target specificity
+    # cuda-specific templates get priority before generic ones
     def key(x):
-        return target.__mro__.index(x[1])
+        md = getattr(x[0], "metadata", {})
+        hw = md.get("target", DEFAULT_TARGET)
+        return (0 if hw == "cuda" else 1, x[1])
 
     order = [x[0] for x in sorted(usable, key=key)]
 
     if not order:
         msg = (
             f"Function resolution cannot find any matches for function "
-            f"'{fnkey}' for the current target: '{target}'."
+            f"'{fnkey}'."
         )
         from numba.core.errors import UnsupportedError
 
@@ -710,3 +711,14 @@ def _readenv(name, ctor, default):
 def cached_file_read(filepath, how="r"):
     with open(filepath, how) as f:
         return f.read()
+
+
+@contextlib.contextmanager
+def numba_target_override():
+    if importlib.util.find_spec("numba"):
+        from numba.core.target_extension import target_override
+
+        with target_override("cuda"):
+            yield
+    else:
+        yield

Original file line number	Diff line number	Diff line change
`@@ -314,14 +314,8 @@ def get_call_type(self, context, args, kws):`
`314`	`314`	`context, self, args, kws, depth=self._depth`
`315`	`315`	`)`
`316`	`316`
`317`		`- # get the order in which to try templates`
`318`		`- from numba.core.target_extension import (`
`319`		`- get_local_target,`
`320`		`- ) # circular`
`321`		`-`
`322`		`- target_hw = get_local_target(context)`
`323`	`317`	`order = utils.order_by_target_specificity(`
`324`		`- target_hw, self.templates, fnkey=self.key[0]`
	`318`	`+ self.templates, fnkey=self.key[0]`
`325`	`319`	`)`
`326`	`320`
`327`	`321`	`self._depth += 1`