Skip to content

Commit 9eb9e15

Browse files
Vendor in the CPU module for CUDA refactoring (#447)
The CPU module has no standalone tests and is likely well-covered by existing tests to the extent it is used. It will be used by the registry module once that is vendored in. Depends-on: - #440 --------- Co-authored-by: Graham Markall <[email protected]>
1 parent 24967a0 commit 9eb9e15

File tree

1 file changed

+370
-0
lines changed
  • numba_cuda/numba/cuda/core

1 file changed

+370
-0
lines changed

numba_cuda/numba/cuda/core/cpu.py

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: BSD-2-Clause
3+
4+
import platform
5+
6+
import llvmlite.binding as ll
7+
from llvmlite import ir
8+
9+
from numba import _dynfunc
10+
from numba.core.callwrapper import PyCallWrapper
11+
from numba.core.base import BaseContext
12+
from numba.core import (
13+
utils,
14+
types,
15+
config,
16+
cgutils,
17+
callconv,
18+
codegen,
19+
externals,
20+
fastmathpass,
21+
intrinsics,
22+
)
23+
from numba.core.options import TargetOptions, include_default_options
24+
from numba.core.runtime import rtsys
25+
from numba.core.compiler_lock import global_compiler_lock
26+
import numba.core.entrypoints
27+
28+
# Re-export these options, they are used from the cpu module throughout the code
29+
# base.
30+
from numba.cuda.core.options import (
31+
ParallelOptions, # noqa F401
32+
FastMathOptions, # noqa F401
33+
InlineOptions, # noqa F401
34+
) # noqa F401
35+
from numba.np import ufunc_db
36+
37+
# Keep those structures in sync with _dynfunc.c.
38+
39+
40+
class ClosureBody(cgutils.Structure):
41+
_fields = [("env", types.pyobject)]
42+
43+
44+
class EnvBody(cgutils.Structure):
45+
_fields = [
46+
("globals", types.pyobject),
47+
("consts", types.pyobject),
48+
]
49+
50+
51+
class CPUContext(BaseContext):
52+
"""
53+
Changes BaseContext calling convention
54+
"""
55+
56+
allow_dynamic_globals = True
57+
58+
def __init__(self, typingctx, target="cpu"):
59+
super().__init__(typingctx, target)
60+
61+
# Overrides
62+
def create_module(self, name):
63+
return self._internal_codegen._create_empty_module(name)
64+
65+
@global_compiler_lock
66+
def init(self):
67+
self.is32bit = utils.MACHINE_BITS == 32
68+
self._internal_codegen = codegen.JITCPUCodegen("numba.exec")
69+
70+
# Add ARM ABI functions from libgcc_s
71+
if platform.machine() == "armv7l":
72+
ll.load_library_permanently("libgcc_s.so.1")
73+
74+
# Map external C functions.
75+
externals.c_math_functions.install(self)
76+
77+
def load_additional_registries(self):
78+
# Only initialize the NRT once something is about to be compiled. The
79+
# "initialized" state doesn't need to be threadsafe, there's a lock
80+
# around the internal compilation and the rtsys.initialize call can be
81+
# made multiple times, worse case init just gets called a bit more often
82+
# than optimal.
83+
rtsys.initialize(self)
84+
85+
# Add implementations that work via import
86+
from numba.cpython import (
87+
enumimpl, # noqa F401
88+
iterators, # noqa F401
89+
rangeobj, # noqa F401
90+
tupleobj, # noqa F401
91+
) # noqa F401
92+
from numba.core import optional, inline_closurecall # noqa F401
93+
from numba.misc import gdb_hook, literal # noqa F401
94+
from numba.np import linalg, arraymath, arrayobj # noqa F401
95+
from numba.np.random import generator_core, generator_methods # noqa F401
96+
from numba.np.polynomial import polynomial_core, polynomial_functions # noqa F401
97+
from numba.typed import typeddict, dictimpl # noqa F401
98+
from numba.typed import typedlist, listobject # noqa F401
99+
from numba.experimental import jitclass, function_type # noqa F401
100+
from numba.np import npdatetime # noqa F401
101+
102+
# Add target specific implementations
103+
from numba.np import npyimpl
104+
from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl
105+
from numba.misc import cffiimpl
106+
from numba.experimental.jitclass.base import (
107+
ClassBuilder as jitclassimpl,
108+
)
109+
110+
self.install_registry(cmathimpl.registry)
111+
self.install_registry(cffiimpl.registry)
112+
self.install_registry(mathimpl.registry)
113+
self.install_registry(npyimpl.registry)
114+
self.install_registry(printimpl.registry)
115+
self.install_registry(randomimpl.registry)
116+
self.install_registry(jitclassimpl.class_impl_registry)
117+
118+
# load 3rd party extensions
119+
numba.core.entrypoints.init_all()
120+
121+
# fix for #8940
122+
from numba.np.unsafe import ndarray # noqa F401
123+
124+
@property
125+
def target_data(self):
126+
return self._internal_codegen.target_data
127+
128+
def with_aot_codegen(self, name, **aot_options):
129+
aot_codegen = codegen.AOTCPUCodegen(name, **aot_options)
130+
return self.subtarget(_internal_codegen=aot_codegen, aot_mode=True)
131+
132+
def codegen(self):
133+
return self._internal_codegen
134+
135+
@property
136+
def call_conv(self):
137+
return callconv.CPUCallConv(self)
138+
139+
def get_env_body(self, builder, envptr):
140+
"""
141+
From the given *envptr* (a pointer to a _dynfunc.Environment object),
142+
get a EnvBody allowing structured access to environment fields.
143+
"""
144+
body_ptr = cgutils.pointer_add(
145+
builder, envptr, _dynfunc._impl_info["offsetof_env_body"]
146+
)
147+
return EnvBody(self, builder, ref=body_ptr, cast_ref=True)
148+
149+
def get_env_manager(self, builder, return_pyobject=False):
150+
envgv = self.declare_env_global(
151+
builder.module, self.get_env_name(self.fndesc)
152+
)
153+
envarg = builder.load(envgv)
154+
pyapi = self.get_python_api(builder)
155+
pyapi.emit_environment_sentry(
156+
envarg,
157+
return_pyobject=return_pyobject,
158+
debug_msg=self.fndesc.env_name,
159+
)
160+
env_body = self.get_env_body(builder, envarg)
161+
return pyapi.get_env_manager(self.environment, env_body, envarg)
162+
163+
def get_generator_state(self, builder, genptr, return_type):
164+
"""
165+
From the given *genptr* (a pointer to a _dynfunc.Generator object),
166+
get a pointer to its state area.
167+
"""
168+
return cgutils.pointer_add(
169+
builder,
170+
genptr,
171+
_dynfunc._impl_info["offsetof_generator_state"],
172+
return_type=return_type,
173+
)
174+
175+
def build_list(self, builder, list_type, items):
176+
"""
177+
Build a list from the Numba *list_type* and its initial *items*.
178+
"""
179+
from numba.cpython import listobj
180+
181+
return listobj.build_list(self, builder, list_type, items)
182+
183+
def build_set(self, builder, set_type, items):
184+
"""
185+
Build a set from the Numba *set_type* and its initial *items*.
186+
"""
187+
from numba.cpython import setobj
188+
189+
return setobj.build_set(self, builder, set_type, items)
190+
191+
def build_map(self, builder, dict_type, item_types, items):
192+
from numba.typed import dictobject
193+
194+
return dictobject.build_map(self, builder, dict_type, item_types, items)
195+
196+
def post_lowering(self, mod, library):
197+
if self.fastmath:
198+
fastmathpass.rewrite_module(mod, self.fastmath)
199+
200+
if self.is32bit:
201+
# 32-bit machine needs to replace all 64-bit div/rem to avoid
202+
# calls to compiler-rt
203+
intrinsics.fix_divmod(mod)
204+
205+
library.add_linking_library(rtsys.library)
206+
207+
def create_cpython_wrapper(
208+
self, library, fndesc, env, call_helper, release_gil=False
209+
):
210+
wrapper_module = self.create_module("wrapper")
211+
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
212+
wrapper_callee = ir.Function(
213+
wrapper_module, fnty, fndesc.llvm_func_name
214+
)
215+
builder = PyCallWrapper(
216+
self,
217+
wrapper_module,
218+
wrapper_callee,
219+
fndesc,
220+
env,
221+
call_helper=call_helper,
222+
release_gil=release_gil,
223+
)
224+
builder.build()
225+
library.add_ir_module(wrapper_module)
226+
227+
def create_cfunc_wrapper(self, library, fndesc, env, call_helper):
228+
wrapper_module = self.create_module("cfunc_wrapper")
229+
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
230+
wrapper_callee = ir.Function(
231+
wrapper_module, fnty, fndesc.llvm_func_name
232+
)
233+
234+
ll_argtypes = [self.get_value_type(ty) for ty in fndesc.argtypes]
235+
ll_return_type = self.get_value_type(fndesc.restype)
236+
wrapty = ir.FunctionType(ll_return_type, ll_argtypes)
237+
wrapfn = ir.Function(
238+
wrapper_module, wrapty, fndesc.llvm_cfunc_wrapper_name
239+
)
240+
builder = ir.IRBuilder(wrapfn.append_basic_block("entry"))
241+
242+
status, out = self.call_conv.call_function(
243+
builder,
244+
wrapper_callee,
245+
fndesc.restype,
246+
fndesc.argtypes,
247+
wrapfn.args,
248+
attrs=("noinline",),
249+
)
250+
251+
with builder.if_then(status.is_error, likely=False):
252+
# If (and only if) an error occurred, acquire the GIL
253+
# and use the interpreter to write out the exception.
254+
pyapi = self.get_python_api(builder)
255+
gil_state = pyapi.gil_ensure()
256+
self.call_conv.raise_error(builder, pyapi, status)
257+
cstr = self.insert_const_string(builder.module, repr(self))
258+
strobj = pyapi.string_from_string(cstr)
259+
pyapi.err_write_unraisable(strobj)
260+
pyapi.decref(strobj)
261+
pyapi.gil_release(gil_state)
262+
263+
builder.ret(out)
264+
library.add_ir_module(wrapper_module)
265+
266+
def get_executable(self, library, fndesc, env):
267+
"""
268+
Returns
269+
-------
270+
(cfunc, fnptr)
271+
272+
- cfunc
273+
callable function (Can be None)
274+
- fnptr
275+
callable function address
276+
- env
277+
an execution environment (from _dynfunc)
278+
"""
279+
# Code generation
280+
fnptr = library.get_pointer_to_function(
281+
fndesc.llvm_cpython_wrapper_name
282+
)
283+
284+
# Note: we avoid reusing the original docstring to avoid encoding
285+
# issues on Python 2, see issue #1908
286+
doc = "compiled wrapper for %r" % (fndesc.qualname,)
287+
cfunc = _dynfunc.make_function(
288+
fndesc.lookup_module(),
289+
fndesc.qualname.split(".")[-1],
290+
doc,
291+
fnptr,
292+
env,
293+
# objects to keepalive with the function
294+
(library,),
295+
)
296+
library.codegen.set_env(self.get_env_name(fndesc), env)
297+
return cfunc
298+
299+
def calc_array_sizeof(self, ndim):
300+
"""
301+
Calculate the size of an array struct on the CPU target
302+
"""
303+
aryty = types.Array(types.int32, ndim, "A")
304+
return self.get_abi_sizeof(self.get_value_type(aryty))
305+
306+
# Overrides
307+
def get_ufunc_info(self, ufunc_key):
308+
return ufunc_db.get_ufunc_info(ufunc_key)
309+
310+
311+
# ----------------------------------------------------------------------------
312+
# TargetOptions
313+
314+
_options_mixin = include_default_options(
315+
"nopython",
316+
"forceobj",
317+
"looplift",
318+
"_nrt",
319+
"debug",
320+
"boundscheck",
321+
"nogil",
322+
"no_rewrites",
323+
"no_cpython_wrapper",
324+
"no_cfunc_wrapper",
325+
"parallel",
326+
"fastmath",
327+
"error_model",
328+
"inline",
329+
"forceinline",
330+
"_dbg_extend_lifetimes",
331+
"_dbg_optnone",
332+
)
333+
334+
335+
class CPUTargetOptions(_options_mixin, TargetOptions):
336+
def finalize(self, flags, options):
337+
if not flags.is_set("enable_pyobject"):
338+
flags.enable_pyobject = True
339+
340+
if not flags.is_set("enable_looplift"):
341+
flags.enable_looplift = True
342+
343+
flags.inherit_if_not_set("nrt", default=True)
344+
345+
if not flags.is_set("debuginfo"):
346+
flags.debuginfo = config.DEBUGINFO_DEFAULT
347+
348+
if not flags.is_set("dbg_extend_lifetimes"):
349+
if flags.debuginfo:
350+
# auto turn on extend-lifetimes if debuginfo is on and
351+
# dbg_extend_lifetimes is not set
352+
flags.dbg_extend_lifetimes = True
353+
else:
354+
# set flag using env-var config
355+
flags.dbg_extend_lifetimes = config.EXTEND_VARIABLE_LIFETIMES
356+
357+
if not flags.is_set("boundscheck"):
358+
flags.boundscheck = flags.debuginfo
359+
360+
flags.enable_pyobject_looplift = True
361+
362+
flags.inherit_if_not_set("fastmath")
363+
364+
flags.inherit_if_not_set("error_model", default="python")
365+
366+
flags.inherit_if_not_set("forceinline")
367+
368+
if flags.forceinline:
369+
# forceinline turns off optnone, just like clang.
370+
flags.dbg_optnone = False

0 commit comments

Comments
 (0)