|
3 | 3 | import llvmlite.binding as ll |
4 | 4 | from llvmlite import ir |
5 | 5 |
|
6 | | -from numba.core import cgutils, config, itanium_mangler, types, typing, utils |
| 6 | +from numba.core import cgutils, config, itanium_mangler, types, typing |
7 | 7 | from numba.core.dispatcher import Dispatcher |
8 | 8 | from numba.core.base import BaseContext |
9 | 9 | from numba.core.callconv import BaseCallConv, MinimalCallConv |
10 | 10 | from numba.core.typing import cmathdecl |
11 | 11 | from numba.core import datamodel |
12 | 12 |
|
13 | 13 | from .cudadrv import nvvm |
14 | | -from numba.cuda import codegen, nvvmutils, ufuncs |
| 14 | +from numba.cuda import codegen, ufuncs |
15 | 15 | from numba.cuda.debuginfo import CUDADIBuilder |
16 | 16 | from numba.cuda.models import cuda_data_manager |
17 | 17 |
|
@@ -150,136 +150,6 @@ def mangler(self, name, argtypes, *, abi_tags=(), uid=None): |
150 | 150 | return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, |
151 | 151 | uid=uid) |
152 | 152 |
|
153 | | - def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo, |
154 | | - nvvm_options, filename, linenum, |
155 | | - max_registers=None, lto=False): |
156 | | - """ |
157 | | - Adapt a code library ``codelib`` with the numba compiled CUDA kernel |
158 | | - with name ``fname`` and arguments ``argtypes`` for NVVM. |
159 | | - A new library is created with a wrapper function that can be used as |
160 | | - the kernel entry point for the given kernel. |
161 | | -
|
162 | | - Returns the new code library and the wrapper function. |
163 | | -
|
164 | | - Parameters: |
165 | | -
|
166 | | - codelib: The CodeLibrary containing the device function to wrap |
167 | | - in a kernel call. |
168 | | - fndesc: The FunctionDescriptor of the source function. |
169 | | - debug: Whether to compile with debug. |
170 | | - lineinfo: Whether to emit line info. |
171 | | - nvvm_options: Dict of NVVM options used when compiling the new library. |
172 | | - filename: The source filename that the function is contained in. |
173 | | - linenum: The source line that the function is on. |
174 | | - max_registers: The max_registers argument for the code library. |
175 | | - """ |
176 | | - kernel_name = itanium_mangler.prepend_namespace( |
177 | | - fndesc.llvm_func_name, ns='cudapy', |
178 | | - ) |
179 | | - library = self.codegen().create_library(f'{codelib.name}_kernel_', |
180 | | - entry_name=kernel_name, |
181 | | - nvvm_options=nvvm_options, |
182 | | - max_registers=max_registers, |
183 | | - lto=lto |
184 | | - ) |
185 | | - library.add_linking_library(codelib) |
186 | | - wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name, |
187 | | - debug, lineinfo, filename, |
188 | | - linenum) |
189 | | - return library, wrapper |
190 | | - |
191 | | - def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug, |
192 | | - lineinfo, filename, linenum): |
193 | | - """ |
194 | | - Generate the kernel wrapper in the given ``library``. |
195 | | - The function being wrapped is described by ``fndesc``. |
196 | | - The wrapper function is returned. |
197 | | - """ |
198 | | - |
199 | | - argtypes = fndesc.argtypes |
200 | | - arginfo = self.get_arg_packer(argtypes) |
201 | | - argtys = list(arginfo.argument_types) |
202 | | - wrapfnty = ir.FunctionType(ir.VoidType(), argtys) |
203 | | - wrapper_module = self.create_module("cuda.kernel.wrapper") |
204 | | - fnty = ir.FunctionType(ir.IntType(32), |
205 | | - [self.call_conv.get_return_type(types.pyobject)] |
206 | | - + argtys) |
207 | | - func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) |
208 | | - |
209 | | - prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') |
210 | | - wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed) |
211 | | - builder = ir.IRBuilder(wrapfn.append_basic_block('')) |
212 | | - |
213 | | - if debug or lineinfo: |
214 | | - directives_only = lineinfo and not debug |
215 | | - debuginfo = self.DIBuilder(module=wrapper_module, |
216 | | - filepath=filename, |
217 | | - cgctx=self, |
218 | | - directives_only=directives_only) |
219 | | - debuginfo.mark_subprogram( |
220 | | - wrapfn, kernel_name, fndesc.args, argtypes, linenum, |
221 | | - ) |
222 | | - debuginfo.mark_location(builder, linenum) |
223 | | - |
224 | | - # Define error handling variable |
225 | | - def define_error_gv(postfix): |
226 | | - name = wrapfn.name + postfix |
227 | | - gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32), |
228 | | - name) |
229 | | - gv.initializer = ir.Constant(gv.type.pointee, None) |
230 | | - return gv |
231 | | - |
232 | | - gv_exc = define_error_gv("__errcode__") |
233 | | - gv_tid = [] |
234 | | - gv_ctaid = [] |
235 | | - for i in 'xyz': |
236 | | - gv_tid.append(define_error_gv("__tid%s__" % i)) |
237 | | - gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) |
238 | | - |
239 | | - callargs = arginfo.from_arguments(builder, wrapfn.args) |
240 | | - status, _ = self.call_conv.call_function( |
241 | | - builder, func, types.void, argtypes, callargs) |
242 | | - |
243 | | - if debug: |
244 | | - # Check error status |
245 | | - with cgutils.if_likely(builder, status.is_ok): |
246 | | - builder.ret_void() |
247 | | - |
248 | | - with builder.if_then(builder.not_(status.is_python_exc)): |
249 | | - # User exception raised |
250 | | - old = ir.Constant(gv_exc.type.pointee, None) |
251 | | - |
252 | | - # Use atomic cmpxchg to prevent rewriting the error status |
253 | | - # Only the first error is recorded |
254 | | - |
255 | | - xchg = builder.cmpxchg(gv_exc, old, status.code, |
256 | | - 'monotonic', 'monotonic') |
257 | | - changed = builder.extract_value(xchg, 1) |
258 | | - |
259 | | - # If the xchange is successful, save the thread ID. |
260 | | - sreg = nvvmutils.SRegBuilder(builder) |
261 | | - with builder.if_then(changed): |
262 | | - for dim, ptr, in zip("xyz", gv_tid): |
263 | | - val = sreg.tid(dim) |
264 | | - builder.store(val, ptr) |
265 | | - |
266 | | - for dim, ptr, in zip("xyz", gv_ctaid): |
267 | | - val = sreg.ctaid(dim) |
268 | | - builder.store(val, ptr) |
269 | | - |
270 | | - builder.ret_void() |
271 | | - |
272 | | - nvvm.set_cuda_kernel(wrapfn) |
273 | | - library.add_ir_module(wrapper_module) |
274 | | - if debug or lineinfo: |
275 | | - debuginfo.finalize() |
276 | | - library.finalize() |
277 | | - |
278 | | - if config.DUMP_LLVM: |
279 | | - utils.dump_llvm(fndesc, wrapper_module) |
280 | | - |
281 | | - return library.get_function(wrapfn.name) |
282 | | - |
283 | 153 | def make_constant_array(self, builder, aryty, arr): |
284 | 154 | """ |
285 | 155 | Unlike the parent version. This returns a a pointer in the constant |
|
0 commit comments