Vendor in _helperlib cext for CUDA-specific changes (#512)

VijayKandiah · gmarkall · web-flow · commit c83f37963c01 · 2025-10-10T14:56:55.000Z
This PR vendors in the necessary functionality of `_helperlib` C
extention from Numba for CUDA-specific changes and expansion in the
future. This PR also removes the dependency on `_dynfunc` C extension as
it is not needed for the CUDA target.

Co-authored-by: Graham Markall &lt;gmarkall@nvidia.com&gt;
diff --git a/numba_cuda/numba/cuda/cext/_helperlib.c b/numba_cuda/numba/cuda/cext/_helperlib.c
@@ -0,0 +1,71 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: BSD-2-Clause
+
+/*
+ * Helper functions used by Numba CUDA at runtime.
+ * This C file is meant to be included after defining the
+ * NUMBA_EXPORT_FUNC() and NUMBA_EXPORT_DATA() macros.
+ */
+
+#include "_pymodule.h"
+#include <stddef.h>
+
+/*
+ * Unicode helpers
+ */
+
+/* Developer note:
+ *
+ * The hash value of unicode objects is obtained via:
+ * ((PyASCIIObject *)(obj))->hash;
+ * The use comes from this definition:
+ * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L119-L120
+ * and it's used extensively throughout the `cpython/Object/unicodeobject.c`
+ * source, not least in `unicode_hash` itself:
+ * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L11662-L11679
+ *
+ * The Unicode string struct layouts are described here:
+ * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Include/cpython/unicodeobject.h#L82-L161
+ * essentially, all the unicode string layouts start with a `PyASCIIObject` at
+ * offset 0 (as of commit 6d43f6f081023b680d9db4542d19b9e382149f0a, somewhere
+ * in the 3.8 development cycle).
+ *
+ * For safety against future CPython internal changes, the code checks that the
+ * _base members of the unicode structs are what is expected in 3.7, and that
+ * their offset is 0. It then walks the struct to the hash location to make sure
+ * the offset is indeed the same as PyASCIIObject->hash.
+ * Note: The large condition in the if should evaluate to a compile time
+ * constant.
+ */
+
+#define MEMBER_SIZE(structure, member) sizeof(((structure *)0)->member)
+
+NUMBA_EXPORT_FUNC(void *)
+numba_extract_unicode(PyObject *obj, Py_ssize_t *length, int *kind,
+                      unsigned int *ascii, Py_ssize_t *hash) {
+    if (!PyUnicode_READY(obj)) {
+        *length = PyUnicode_GET_LENGTH(obj);
+        *kind = PyUnicode_KIND(obj);
+        /* could also use PyUnicode_IS_ASCII but it is not publicly advertised in https://docs.python.org/3/c-api/unicode.html */
+        *ascii = (unsigned int)(PyUnicode_MAX_CHAR_VALUE(obj) == (0x7f));
+        /* this is here as a crude check for safe casting of all unicode string
+         * structs to a PyASCIIObject */
+        if (MEMBER_SIZE(PyCompactUnicodeObject, _base) == sizeof(PyASCIIObject)             &&
+            MEMBER_SIZE(PyUnicodeObject, _base) == sizeof(PyCompactUnicodeObject)           &&
+            offsetof(PyCompactUnicodeObject, _base) == 0                                    &&
+            offsetof(PyUnicodeObject, _base) == 0                                           &&
+            offsetof(PyCompactUnicodeObject, _base.hash) == offsetof(PyASCIIObject, hash)   &&
+            offsetof(PyUnicodeObject, _base._base.hash) == offsetof(PyASCIIObject, hash)
+           ) {
+            /* Grab the hash from the type object cache, do not compute it. */
+            *hash = ((PyASCIIObject *)(obj))->hash;
+        }
+        else {
+            /* cast is not safe, fail */
+            return NULL;
+        }
+        return PyUnicode_DATA(obj);
+    } else {
+        return NULL;
+    }
+}
diff --git a/numba_cuda/numba/cuda/cext/_helpermod.c b/numba_cuda/numba/cuda/cext/_helpermod.c
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: BSD-2-Clause
+
+/*
+ * Expose all functions as pointers in a dedicated C extension.
+ */
+
+/* Import _pymodule.h first, for a recent _POSIX_C_SOURCE */
+#include "_pymodule.h"
+
+/* Visibility control macros */
+#if defined(_WIN32) || defined(_WIN64)
+    #define VISIBILITY_HIDDEN
+    #define VISIBILITY_GLOBAL __declspec(dllexport)
+#else
+    #define VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
+    #define VISIBILITY_GLOBAL __attribute__((visibility("default")))
+#endif
+
+/* Define all runtime-required symbols in this C module, but do not
+   export them outside the shared library if possible. */
+#define NUMBA_EXPORT_FUNC(_rettype) VISIBILITY_HIDDEN _rettype
+#define NUMBA_EXPORT_DATA(_vartype) VISIBILITY_HIDDEN _vartype
+
+/* Numba CUDA C helpers */
+#include "_helperlib.c"
+
+static PyObject *
+build_c_helpers_dict(void)
+{
+    PyObject *dct = PyDict_New();
+    if (dct == NULL)
+        goto error;
+
+#define _declpointer(name, value) do {                 \
+    PyObject *o = PyLong_FromVoidPtr(value);           \
+    if (o == NULL) goto error;                         \
+    if (PyDict_SetItemString(dct, name, o)) {          \
+        Py_DECREF(o);                                  \
+        goto error;                                    \
+    }                                                  \
+    Py_DECREF(o);                                      \
+} while (0)
+
+#define declmethod(func) _declpointer(#func, &numba_##func)
+
+    /* Unicode string support */
+    declmethod(extract_unicode);
+
+#undef declmethod
+    return dct;
+error:
+    Py_XDECREF(dct);
+    return NULL;
+}
+
+static PyMethodDef ext_methods[] = {
+    { NULL },
+};
+
+MOD_INIT(_helperlib) {
+    PyObject *m;
+    MOD_DEF(m, "_helperlib", "No docs", ext_methods)
+    if (m == NULL)
+        return MOD_ERROR_VAL;
+
+    PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
+    PyModule_AddIntConstant(m, "long_min", LONG_MIN);
+    PyModule_AddIntConstant(m, "long_max", LONG_MAX);
+    PyModule_AddIntConstant(m, "py_buffer_size", sizeof(Py_buffer));
+    PyModule_AddIntConstant(m, "py_gil_state_size", sizeof(PyGILState_STATE));
+    PyModule_AddIntConstant(m, "py_unicode_1byte_kind", PyUnicode_1BYTE_KIND);
+    PyModule_AddIntConstant(m, "py_unicode_2byte_kind", PyUnicode_2BYTE_KIND);
+    PyModule_AddIntConstant(m, "py_unicode_4byte_kind", PyUnicode_4BYTE_KIND);
+#if (PY_MAJOR_VERSION == 3)
+#if ((PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
+    PyModule_AddIntConstant(m, "py_unicode_wchar_kind", PyUnicode_WCHAR_KIND);
+#endif
+#endif
+
+    return MOD_SUCCESS_VAL(m);
+}
diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py
@@ -10,7 +10,6 @@
 
 from llvmlite import ir as llvmir
 from llvmlite.ir import Constant
-import llvmlite.binding as ll
 
 from numba.core import (
     types,
@@ -21,7 +20,6 @@
 from numba.cuda import cgutils, debuginfo, utils
 from numba.core import errors
 from numba.cuda.core import targetconfig, funcdesc
-from numba import _dynfunc, _helperlib
 from numba.core.compiler_lock import global_compiler_lock
 from numba.cuda.core.pythonapi import PythonAPI
 from numba.core.imputils import (
@@ -158,26 +156,6 @@ def append(self, value, sig):
         self._cache.clear()
 
 
-@utils.runonce
-def _load_global_helpers():
-    """
-    Execute once to install special symbols into the LLVM symbol table.
-    """
-    # This is Py_None's real C name
-    ll.add_symbol("_Py_NoneStruct", id(None))
-
-    # Add Numba C helper functions
-    for c_helpers in (_helperlib.c_helpers, _dynfunc.c_helpers):
-        for py_name, c_address in c_helpers.items():
-            c_name = "numba_" + py_name
-            ll.add_symbol(c_name, c_address)
-
-    # Add all built-in exception classes
-    for obj in utils.builtins.__dict__.values():
-        if isinstance(obj, type) and issubclass(obj, BaseException):
-            ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj))
-
-
 class BaseContext(object):
     """
 
@@ -238,8 +216,6 @@ def enable_boundscheck(self, value):
     fndesc = None
 
     def __init__(self, typing_context, target):
-        _load_global_helpers()
-
         self.address_size = utils.MACHINE_BITS
         self.typing_context = typing_context
         from numba.core.target_extension import target_registry
diff --git a/numba_cuda/numba/cuda/core/environment.py b/numba_cuda/numba/cuda/core/environment.py
@@ -4,10 +4,8 @@
 import weakref
 import importlib
 
-from numba import _dynfunc
 
-
-class Environment(_dynfunc.Environment):
+class Environment:
     """Stores globals and constant pyobjects for runtime.
 
     It is often needed to convert b/w nopython objects and pyobjects.
@@ -24,7 +22,7 @@ def from_fndesc(cls, fndesc):
             # Avoid creating new Env
             return cls._memo[fndesc.env_name]
         except KeyError:
-            inst = cls(fndesc.lookup_globals())
+            inst = cls()
             inst.env_name = fndesc.env_name
             cls._memo[fndesc.env_name] = inst
             return inst
diff --git a/numba_cuda/numba/cuda/core/pythonapi.py b/numba_cuda/numba/cuda/core/pythonapi.py
@@ -10,7 +10,7 @@
 from llvmlite.ir import Constant
 
 import ctypes
-from numba import _helperlib
+from numba.cuda.cext import _helperlib
 from numba.core import (
     errors,
     types,
diff --git a/numba_cuda/numba/cuda/cpython/unicode.py b/numba_cuda/numba/cuda/cpython/unicode.py
@@ -33,7 +33,7 @@
     PY_UNICODE_2BYTE_KIND,
     PY_UNICODE_4BYTE_KIND,
 )
-from numba._helperlib import c_helpers
+from numba.cuda.cext._helperlib import c_helpers
 from numba.cuda.core.unsafe.bytes import memcpy_region
 from numba.core.errors import TypingError
 from numba.cuda.cpython.unicode_support import (
diff --git a/numba_cuda/numba/cuda/extending.py b/numba_cuda/numba/cuda/extending.py
@@ -27,7 +27,6 @@
     lower_cast,
 )  # noqa: F401
 from numba.cuda.core.pythonapi import box, unbox, reflect, NativeValue  # noqa: F401
-from numba._helperlib import _import_cython_function  # noqa: F401
 from numba.cuda.serialize import ReduceMixin
 from numba.core.datamodel import models as core_models  # noqa: F401
 
diff --git a/setup.py b/setup.py
@@ -91,7 +91,23 @@ def get_ext_modules():
     # Append our cext dir to include_dirs
     ext_dispatcher.include_dirs.append("numba_cuda/numba/cuda/cext")
 
-    return [ext_dispatcher, ext_typeconv, ext_mviewbuf, ext_devicearray]
+    ext_helperlib = Extension(
+        name="numba_cuda.numba.cuda.cext._helperlib",
+        sources=["numba_cuda/numba/cuda/cext/_helpermod.c"],
+        depends=[
+            "numba_cuda/numba/cuda/cext/_pymodule.h",
+            "numba_cuda/numba/cuda/cext/_helperlib.c",
+        ],
+        include_dirs=["numba_cuda/numba/cuda/cext"],
+    )
+
+    return [
+        ext_dispatcher,
+        ext_typeconv,
+        ext_helperlib,
+        ext_mviewbuf,
+        ext_devicearray,
+    ]
 
 
 def is_building():

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@`
`33`	`33`	`PY_UNICODE_2BYTE_KIND,`
`34`	`34`	`PY_UNICODE_4BYTE_KIND,`
`35`	`35`	`)`
`36`		`-from numba._helperlib import c_helpers`
	`36`	`+from numba.cuda.cext._helperlib import c_helpers`
`37`	`37`	`from numba.cuda.core.unsafe.bytes import memcpy_region`
`38`	`38`	`from numba.core.errors import TypingError`
`39`	`39`	`from numba.cuda.cpython.unicode_support import (`