|
| 1 | +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +# SPDX-License-Identifier: BSD-2-Clause |
| 3 | + |
| 4 | +import platform |
| 5 | + |
| 6 | +import llvmlite.binding as ll |
| 7 | +from llvmlite import ir |
| 8 | + |
| 9 | +from numba import _dynfunc |
| 10 | +from numba.core.callwrapper import PyCallWrapper |
| 11 | +from numba.core.base import BaseContext |
| 12 | +from numba.core import ( |
| 13 | + utils, |
| 14 | + types, |
| 15 | + config, |
| 16 | + cgutils, |
| 17 | + callconv, |
| 18 | + codegen, |
| 19 | + externals, |
| 20 | + fastmathpass, |
| 21 | + intrinsics, |
| 22 | +) |
| 23 | +from numba.core.options import TargetOptions, include_default_options |
| 24 | +from numba.core.runtime import rtsys |
| 25 | +from numba.core.compiler_lock import global_compiler_lock |
| 26 | +import numba.core.entrypoints |
| 27 | + |
| 28 | +# Re-export these options, they are used from the cpu module throughout the code |
| 29 | +# base. |
| 30 | +from numba.cuda.core.options import ( |
| 31 | + ParallelOptions, # noqa F401 |
| 32 | + FastMathOptions, # noqa F401 |
| 33 | + InlineOptions, # noqa F401 |
| 34 | +) # noqa F401 |
| 35 | +from numba.np import ufunc_db |
| 36 | + |
| 37 | +# Keep those structures in sync with _dynfunc.c. |
| 38 | + |
| 39 | + |
| 40 | +class ClosureBody(cgutils.Structure): |
| 41 | + _fields = [("env", types.pyobject)] |
| 42 | + |
| 43 | + |
| 44 | +class EnvBody(cgutils.Structure): |
| 45 | + _fields = [ |
| 46 | + ("globals", types.pyobject), |
| 47 | + ("consts", types.pyobject), |
| 48 | + ] |
| 49 | + |
| 50 | + |
| 51 | +class CPUContext(BaseContext): |
| 52 | + """ |
| 53 | + Changes BaseContext calling convention |
| 54 | + """ |
| 55 | + |
| 56 | + allow_dynamic_globals = True |
| 57 | + |
| 58 | + def __init__(self, typingctx, target="cpu"): |
| 59 | + super().__init__(typingctx, target) |
| 60 | + |
| 61 | + # Overrides |
| 62 | + def create_module(self, name): |
| 63 | + return self._internal_codegen._create_empty_module(name) |
| 64 | + |
| 65 | + @global_compiler_lock |
| 66 | + def init(self): |
| 67 | + self.is32bit = utils.MACHINE_BITS == 32 |
| 68 | + self._internal_codegen = codegen.JITCPUCodegen("numba.exec") |
| 69 | + |
| 70 | + # Add ARM ABI functions from libgcc_s |
| 71 | + if platform.machine() == "armv7l": |
| 72 | + ll.load_library_permanently("libgcc_s.so.1") |
| 73 | + |
| 74 | + # Map external C functions. |
| 75 | + externals.c_math_functions.install(self) |
| 76 | + |
| 77 | + def load_additional_registries(self): |
| 78 | + # Only initialize the NRT once something is about to be compiled. The |
| 79 | + # "initialized" state doesn't need to be threadsafe, there's a lock |
| 80 | + # around the internal compilation and the rtsys.initialize call can be |
| 81 | + # made multiple times, worse case init just gets called a bit more often |
| 82 | + # than optimal. |
| 83 | + rtsys.initialize(self) |
| 84 | + |
| 85 | + # Add implementations that work via import |
| 86 | + from numba.cpython import ( |
| 87 | + enumimpl, # noqa F401 |
| 88 | + iterators, # noqa F401 |
| 89 | + rangeobj, # noqa F401 |
| 90 | + tupleobj, # noqa F401 |
| 91 | + ) # noqa F401 |
| 92 | + from numba.core import optional, inline_closurecall # noqa F401 |
| 93 | + from numba.misc import gdb_hook, literal # noqa F401 |
| 94 | + from numba.np import linalg, arraymath, arrayobj # noqa F401 |
| 95 | + from numba.np.random import generator_core, generator_methods # noqa F401 |
| 96 | + from numba.np.polynomial import polynomial_core, polynomial_functions # noqa F401 |
| 97 | + from numba.typed import typeddict, dictimpl # noqa F401 |
| 98 | + from numba.typed import typedlist, listobject # noqa F401 |
| 99 | + from numba.experimental import jitclass, function_type # noqa F401 |
| 100 | + from numba.np import npdatetime # noqa F401 |
| 101 | + |
| 102 | + # Add target specific implementations |
| 103 | + from numba.np import npyimpl |
| 104 | + from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl |
| 105 | + from numba.misc import cffiimpl |
| 106 | + from numba.experimental.jitclass.base import ( |
| 107 | + ClassBuilder as jitclassimpl, |
| 108 | + ) |
| 109 | + |
| 110 | + self.install_registry(cmathimpl.registry) |
| 111 | + self.install_registry(cffiimpl.registry) |
| 112 | + self.install_registry(mathimpl.registry) |
| 113 | + self.install_registry(npyimpl.registry) |
| 114 | + self.install_registry(printimpl.registry) |
| 115 | + self.install_registry(randomimpl.registry) |
| 116 | + self.install_registry(jitclassimpl.class_impl_registry) |
| 117 | + |
| 118 | + # load 3rd party extensions |
| 119 | + numba.core.entrypoints.init_all() |
| 120 | + |
| 121 | + # fix for #8940 |
| 122 | + from numba.np.unsafe import ndarray # noqa F401 |
| 123 | + |
| 124 | + @property |
| 125 | + def target_data(self): |
| 126 | + return self._internal_codegen.target_data |
| 127 | + |
| 128 | + def with_aot_codegen(self, name, **aot_options): |
| 129 | + aot_codegen = codegen.AOTCPUCodegen(name, **aot_options) |
| 130 | + return self.subtarget(_internal_codegen=aot_codegen, aot_mode=True) |
| 131 | + |
| 132 | + def codegen(self): |
| 133 | + return self._internal_codegen |
| 134 | + |
| 135 | + @property |
| 136 | + def call_conv(self): |
| 137 | + return callconv.CPUCallConv(self) |
| 138 | + |
| 139 | + def get_env_body(self, builder, envptr): |
| 140 | + """ |
| 141 | + From the given *envptr* (a pointer to a _dynfunc.Environment object), |
| 142 | + get a EnvBody allowing structured access to environment fields. |
| 143 | + """ |
| 144 | + body_ptr = cgutils.pointer_add( |
| 145 | + builder, envptr, _dynfunc._impl_info["offsetof_env_body"] |
| 146 | + ) |
| 147 | + return EnvBody(self, builder, ref=body_ptr, cast_ref=True) |
| 148 | + |
| 149 | + def get_env_manager(self, builder, return_pyobject=False): |
| 150 | + envgv = self.declare_env_global( |
| 151 | + builder.module, self.get_env_name(self.fndesc) |
| 152 | + ) |
| 153 | + envarg = builder.load(envgv) |
| 154 | + pyapi = self.get_python_api(builder) |
| 155 | + pyapi.emit_environment_sentry( |
| 156 | + envarg, |
| 157 | + return_pyobject=return_pyobject, |
| 158 | + debug_msg=self.fndesc.env_name, |
| 159 | + ) |
| 160 | + env_body = self.get_env_body(builder, envarg) |
| 161 | + return pyapi.get_env_manager(self.environment, env_body, envarg) |
| 162 | + |
| 163 | + def get_generator_state(self, builder, genptr, return_type): |
| 164 | + """ |
| 165 | + From the given *genptr* (a pointer to a _dynfunc.Generator object), |
| 166 | + get a pointer to its state area. |
| 167 | + """ |
| 168 | + return cgutils.pointer_add( |
| 169 | + builder, |
| 170 | + genptr, |
| 171 | + _dynfunc._impl_info["offsetof_generator_state"], |
| 172 | + return_type=return_type, |
| 173 | + ) |
| 174 | + |
| 175 | + def build_list(self, builder, list_type, items): |
| 176 | + """ |
| 177 | + Build a list from the Numba *list_type* and its initial *items*. |
| 178 | + """ |
| 179 | + from numba.cpython import listobj |
| 180 | + |
| 181 | + return listobj.build_list(self, builder, list_type, items) |
| 182 | + |
| 183 | + def build_set(self, builder, set_type, items): |
| 184 | + """ |
| 185 | + Build a set from the Numba *set_type* and its initial *items*. |
| 186 | + """ |
| 187 | + from numba.cpython import setobj |
| 188 | + |
| 189 | + return setobj.build_set(self, builder, set_type, items) |
| 190 | + |
| 191 | + def build_map(self, builder, dict_type, item_types, items): |
| 192 | + from numba.typed import dictobject |
| 193 | + |
| 194 | + return dictobject.build_map(self, builder, dict_type, item_types, items) |
| 195 | + |
| 196 | + def post_lowering(self, mod, library): |
| 197 | + if self.fastmath: |
| 198 | + fastmathpass.rewrite_module(mod, self.fastmath) |
| 199 | + |
| 200 | + if self.is32bit: |
| 201 | + # 32-bit machine needs to replace all 64-bit div/rem to avoid |
| 202 | + # calls to compiler-rt |
| 203 | + intrinsics.fix_divmod(mod) |
| 204 | + |
| 205 | + library.add_linking_library(rtsys.library) |
| 206 | + |
| 207 | + def create_cpython_wrapper( |
| 208 | + self, library, fndesc, env, call_helper, release_gil=False |
| 209 | + ): |
| 210 | + wrapper_module = self.create_module("wrapper") |
| 211 | + fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) |
| 212 | + wrapper_callee = ir.Function( |
| 213 | + wrapper_module, fnty, fndesc.llvm_func_name |
| 214 | + ) |
| 215 | + builder = PyCallWrapper( |
| 216 | + self, |
| 217 | + wrapper_module, |
| 218 | + wrapper_callee, |
| 219 | + fndesc, |
| 220 | + env, |
| 221 | + call_helper=call_helper, |
| 222 | + release_gil=release_gil, |
| 223 | + ) |
| 224 | + builder.build() |
| 225 | + library.add_ir_module(wrapper_module) |
| 226 | + |
| 227 | + def create_cfunc_wrapper(self, library, fndesc, env, call_helper): |
| 228 | + wrapper_module = self.create_module("cfunc_wrapper") |
| 229 | + fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) |
| 230 | + wrapper_callee = ir.Function( |
| 231 | + wrapper_module, fnty, fndesc.llvm_func_name |
| 232 | + ) |
| 233 | + |
| 234 | + ll_argtypes = [self.get_value_type(ty) for ty in fndesc.argtypes] |
| 235 | + ll_return_type = self.get_value_type(fndesc.restype) |
| 236 | + wrapty = ir.FunctionType(ll_return_type, ll_argtypes) |
| 237 | + wrapfn = ir.Function( |
| 238 | + wrapper_module, wrapty, fndesc.llvm_cfunc_wrapper_name |
| 239 | + ) |
| 240 | + builder = ir.IRBuilder(wrapfn.append_basic_block("entry")) |
| 241 | + |
| 242 | + status, out = self.call_conv.call_function( |
| 243 | + builder, |
| 244 | + wrapper_callee, |
| 245 | + fndesc.restype, |
| 246 | + fndesc.argtypes, |
| 247 | + wrapfn.args, |
| 248 | + attrs=("noinline",), |
| 249 | + ) |
| 250 | + |
| 251 | + with builder.if_then(status.is_error, likely=False): |
| 252 | + # If (and only if) an error occurred, acquire the GIL |
| 253 | + # and use the interpreter to write out the exception. |
| 254 | + pyapi = self.get_python_api(builder) |
| 255 | + gil_state = pyapi.gil_ensure() |
| 256 | + self.call_conv.raise_error(builder, pyapi, status) |
| 257 | + cstr = self.insert_const_string(builder.module, repr(self)) |
| 258 | + strobj = pyapi.string_from_string(cstr) |
| 259 | + pyapi.err_write_unraisable(strobj) |
| 260 | + pyapi.decref(strobj) |
| 261 | + pyapi.gil_release(gil_state) |
| 262 | + |
| 263 | + builder.ret(out) |
| 264 | + library.add_ir_module(wrapper_module) |
| 265 | + |
| 266 | + def get_executable(self, library, fndesc, env): |
| 267 | + """ |
| 268 | + Returns |
| 269 | + ------- |
| 270 | + (cfunc, fnptr) |
| 271 | +
|
| 272 | + - cfunc |
| 273 | + callable function (Can be None) |
| 274 | + - fnptr |
| 275 | + callable function address |
| 276 | + - env |
| 277 | + an execution environment (from _dynfunc) |
| 278 | + """ |
| 279 | + # Code generation |
| 280 | + fnptr = library.get_pointer_to_function( |
| 281 | + fndesc.llvm_cpython_wrapper_name |
| 282 | + ) |
| 283 | + |
| 284 | + # Note: we avoid reusing the original docstring to avoid encoding |
| 285 | + # issues on Python 2, see issue #1908 |
| 286 | + doc = "compiled wrapper for %r" % (fndesc.qualname,) |
| 287 | + cfunc = _dynfunc.make_function( |
| 288 | + fndesc.lookup_module(), |
| 289 | + fndesc.qualname.split(".")[-1], |
| 290 | + doc, |
| 291 | + fnptr, |
| 292 | + env, |
| 293 | + # objects to keepalive with the function |
| 294 | + (library,), |
| 295 | + ) |
| 296 | + library.codegen.set_env(self.get_env_name(fndesc), env) |
| 297 | + return cfunc |
| 298 | + |
| 299 | + def calc_array_sizeof(self, ndim): |
| 300 | + """ |
| 301 | + Calculate the size of an array struct on the CPU target |
| 302 | + """ |
| 303 | + aryty = types.Array(types.int32, ndim, "A") |
| 304 | + return self.get_abi_sizeof(self.get_value_type(aryty)) |
| 305 | + |
| 306 | + # Overrides |
| 307 | + def get_ufunc_info(self, ufunc_key): |
| 308 | + return ufunc_db.get_ufunc_info(ufunc_key) |
| 309 | + |
| 310 | + |
| 311 | +# ---------------------------------------------------------------------------- |
| 312 | +# TargetOptions |
| 313 | + |
| 314 | +_options_mixin = include_default_options( |
| 315 | + "nopython", |
| 316 | + "forceobj", |
| 317 | + "looplift", |
| 318 | + "_nrt", |
| 319 | + "debug", |
| 320 | + "boundscheck", |
| 321 | + "nogil", |
| 322 | + "no_rewrites", |
| 323 | + "no_cpython_wrapper", |
| 324 | + "no_cfunc_wrapper", |
| 325 | + "parallel", |
| 326 | + "fastmath", |
| 327 | + "error_model", |
| 328 | + "inline", |
| 329 | + "forceinline", |
| 330 | + "_dbg_extend_lifetimes", |
| 331 | + "_dbg_optnone", |
| 332 | +) |
| 333 | + |
| 334 | + |
| 335 | +class CPUTargetOptions(_options_mixin, TargetOptions): |
| 336 | + def finalize(self, flags, options): |
| 337 | + if not flags.is_set("enable_pyobject"): |
| 338 | + flags.enable_pyobject = True |
| 339 | + |
| 340 | + if not flags.is_set("enable_looplift"): |
| 341 | + flags.enable_looplift = True |
| 342 | + |
| 343 | + flags.inherit_if_not_set("nrt", default=True) |
| 344 | + |
| 345 | + if not flags.is_set("debuginfo"): |
| 346 | + flags.debuginfo = config.DEBUGINFO_DEFAULT |
| 347 | + |
| 348 | + if not flags.is_set("dbg_extend_lifetimes"): |
| 349 | + if flags.debuginfo: |
| 350 | + # auto turn on extend-lifetimes if debuginfo is on and |
| 351 | + # dbg_extend_lifetimes is not set |
| 352 | + flags.dbg_extend_lifetimes = True |
| 353 | + else: |
| 354 | + # set flag using env-var config |
| 355 | + flags.dbg_extend_lifetimes = config.EXTEND_VARIABLE_LIFETIMES |
| 356 | + |
| 357 | + if not flags.is_set("boundscheck"): |
| 358 | + flags.boundscheck = flags.debuginfo |
| 359 | + |
| 360 | + flags.enable_pyobject_looplift = True |
| 361 | + |
| 362 | + flags.inherit_if_not_set("fastmath") |
| 363 | + |
| 364 | + flags.inherit_if_not_set("error_model", default="python") |
| 365 | + |
| 366 | + flags.inherit_if_not_set("forceinline") |
| 367 | + |
| 368 | + if flags.forceinline: |
| 369 | + # forceinline turns off optnone, just like clang. |
| 370 | + flags.dbg_optnone = False |
0 commit comments