Skip to content

Commit 2fa741c

Browse files
authored
Fix lineinfo generation when compile_internal used (#271) (#287)
Lineinfo generation is broken by function implementations that generate code via `context.compile_internal()`. The root cause is that the implementation eventually reaches upstream Numba's `BaseContext._compile_subroutine_no_cache()` method, which ignores the flags in the context stack and creates new ones. The outcome of this is that the debug info kind is forgotten, leading to a default debug info kind of `"FullDebug"` being emitted, which then enables the PTX `debug` target, leading to deoptimized code. This change works around the issue (pending a fix upstream) by overriding the `_compile_subroutine_no_cache()` implementation to use flags from the context stack when they are otherwise not provided. The fix to upstream will look like a similar modification of the method. The `CUDAFlags` class is moved to its own module to avoid circular import dependencies between `compiler.py` and `target.py`. Fixes #271.
1 parent 55379b1 commit 2fa741c

File tree

4 files changed

+110
-39
lines changed

4 files changed

+110
-39
lines changed

numba_cuda/numba/cuda/compiler.py

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
sanitize_compile_result_entries,
1515
CompilerBase,
1616
DefaultPassBuilder,
17-
Flags,
18-
Option,
1917
CompileResult,
2018
)
2119
from numba.core.compiler_lock import global_compiler_lock
@@ -39,45 +37,11 @@
3937
from numba.cuda.codegen import ExternalCodeLibrary
4038
from numba.cuda.cudadrv import nvvm
4139
from numba.cuda.descriptor import cuda_target
40+
from numba.cuda.flags import CUDAFlags
4241
from numba.cuda.target import CUDACABICallConv
4342
from numba.cuda import lowering
4443

4544

46-
def _nvvm_options_type(x):
47-
if x is None:
48-
return None
49-
50-
else:
51-
assert isinstance(x, dict)
52-
return x
53-
54-
55-
def _optional_int_type(x):
56-
if x is None:
57-
return None
58-
59-
else:
60-
assert isinstance(x, int)
61-
return x
62-
63-
64-
class CUDAFlags(Flags):
65-
nvvm_options = Option(
66-
type=_nvvm_options_type,
67-
default=None,
68-
doc="NVVM options",
69-
)
70-
compute_capability = Option(
71-
type=tuple,
72-
default=None,
73-
doc="Compute Capability",
74-
)
75-
max_registers = Option(
76-
type=_optional_int_type, default=None, doc="Max registers"
77-
)
78-
lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
79-
80-
8145
# The CUDACompileResult (CCR) has a specially-defined entry point equal to its
8246
# id. This is because the entry point is used as a key into a dict of
8347
# overloads by the base dispatcher. The id of the CCR is the only small and

numba_cuda/numba/cuda/flags.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from numba.core.compiler import Flags, Option
2+
3+
4+
def _nvvm_options_type(x):
5+
if x is None:
6+
return None
7+
8+
else:
9+
assert isinstance(x, dict)
10+
return x
11+
12+
13+
def _optional_int_type(x):
14+
if x is None:
15+
return None
16+
17+
else:
18+
assert isinstance(x, int)
19+
return x
20+
21+
22+
class CUDAFlags(Flags):
23+
nvvm_options = Option(
24+
type=_nvvm_options_type,
25+
default=None,
26+
doc="NVVM options",
27+
)
28+
compute_capability = Option(
29+
type=tuple,
30+
default=None,
31+
doc="Compute Capability",
32+
)
33+
max_registers = Option(
34+
type=_optional_int_type, default=None, doc="Max registers"
35+
)
36+
lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")

numba_cuda/numba/cuda/target.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,20 @@
22
from functools import cached_property
33
import llvmlite.binding as ll
44
from llvmlite import ir
5-
6-
from numba.core import cgutils, config, itanium_mangler, types, typing
5+
import warnings
6+
7+
from numba.core import (
8+
cgutils,
9+
compiler,
10+
config,
11+
itanium_mangler,
12+
targetconfig,
13+
types,
14+
typing,
15+
)
16+
from numba.core.compiler_lock import global_compiler_lock
717
from numba.core.dispatcher import Dispatcher
18+
from numba.core.errors import NumbaWarning
819
from numba.core.base import BaseContext
920
from numba.core.callconv import BaseCallConv, MinimalCallConv
1021
from numba.core.typing import cmathdecl
@@ -13,6 +24,7 @@
1324
from .cudadrv import nvvm
1425
from numba.cuda import codegen, ufuncs
1526
from numba.cuda.debuginfo import CUDADIBuilder
27+
from numba.cuda.flags import CUDAFlags
1628
from numba.cuda.models import cuda_data_manager
1729

1830
# -----------------------------------------------------------------------------
@@ -288,6 +300,47 @@ def optimize_function(self, func):
288300
def get_ufunc_info(self, ufunc_key):
289301
return ufuncs.get_ufunc_info(ufunc_key)
290302

303+
def _compile_subroutine_no_cache(
304+
self, builder, impl, sig, locals=None, flags=None
305+
):
306+
# Overrides numba.core.base.BaseContext._compile_subroutine_no_cache().
307+
# Modified to use flags from the context stack if they are not provided
308+
# (pending a fix in Numba upstream).
309+
310+
if locals is None:
311+
locals = {}
312+
313+
with global_compiler_lock:
314+
codegen = self.codegen()
315+
library = codegen.create_library(impl.__name__)
316+
if flags is None:
317+
cstk = targetconfig.ConfigStack()
318+
if cstk:
319+
flags = cstk.top().copy()
320+
else:
321+
msg = "There should always be a context stack; none found."
322+
warnings.warn(msg, NumbaWarning)
323+
flags = CUDAFlags()
324+
325+
flags.no_compile = True
326+
flags.no_cpython_wrapper = True
327+
flags.no_cfunc_wrapper = True
328+
329+
cres = compiler.compile_internal(
330+
self.typing_context,
331+
self,
332+
library,
333+
impl,
334+
sig.args,
335+
sig.return_type,
336+
flags,
337+
locals=locals,
338+
)
339+
340+
# Allow inlining the function inside callers
341+
self.active_code_library.add_linking_library(cres.library)
342+
return cres
343+
291344

292345
class CUDACallConv(MinimalCallConv):
293346
def decorate_function(self, fn, args, fe_argtypes, noalias=False):

numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,24 @@ def f():
198198
"debug and lineinfo are mutually exclusive", str(w[0].message)
199199
)
200200

201+
def test_lineinfo_with_compile_internal(self):
202+
# Calling a function implemented using compile_internal should not
203+
# enable full debug info generation. See Numba-CUDA Issue #271,
204+
# https://github.com/NVIDIA/numba-cuda/issues/271
205+
206+
@cuda.jit("void(complex128[::1], complex128[::1])", lineinfo=True)
207+
def complex_abs_use(r, x):
208+
r[0] = abs(x[0])
209+
210+
cc = cuda.get_current_device().compute_capability
211+
ov = complex_abs_use.overloads[complex_abs_use.signatures[0]]
212+
ptx = ov.inspect_asm(cc)
213+
214+
target = ".target sm_%s%s" % cc
215+
target_debug = f"{target}, debug"
216+
self.assertIn(target, ptx)
217+
self.assertNotIn(target_debug, ptx)
218+
201219

202220
if __name__ == "__main__":
203221
unittest.main()

0 commit comments

Comments
 (0)