Skip to content

Commit 173e000

Browse files
dlee992gmarkall
authored andcommitted
Optimization and debuginfo flag fixes
The default optimization level now follows the `config.OPT` setting - optimizations are on by default for any nonzero value of `config.OPT`. The default debuginfo generation now also follows the setting of `config.CUDA_DEBUGINFO_DEFEAULT`. Test cases which violated the requirement to disable optimization when generating debuginfo have been fixed, which results in the test suite no longer emitting warnings about this.
1 parent cea934b commit 173e000

File tree

9 files changed

+69
-24
lines changed

9 files changed

+69
-24
lines changed

numba_cuda/numba/cuda/compiler.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,8 @@ def cabi_wrap_function(context, lib, fndesc, wrapper_function_name,
253253

254254

255255
@global_compiler_lock
256-
def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
257-
fastmath=False, cc=None, opt=True, abi="c", abi_info=None,
256+
def compile(pyfunc, sig, debug=None, lineinfo=False, device=True,
257+
fastmath=False, cc=None, opt=None, abi="c", abi_info=None,
258258
output='ptx'):
259259
"""Compile a Python function to PTX or LTO-IR for a given set of argument
260260
types.
@@ -283,7 +283,7 @@ def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
283283
:param cc: Compute capability to compile for, as a tuple
284284
``(MAJOR, MINOR)``. Defaults to ``(5, 0)``.
285285
:type cc: tuple
286-
:param opt: Enable optimizations. Defaults to ``True``.
286+
:param opt: Whether to enable optimizations in the compiled code.
287287
:type opt: bool
288288
:param abi: The ABI for a compiled function - either ``"numba"`` or
289289
``"c"``. Note that the Numba ABI is not considered stable.
@@ -307,8 +307,11 @@ def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
307307
if output not in ("ptx", "ltoir"):
308308
raise NotImplementedError(f'Unsupported output type: {output}')
309309

310+
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
311+
opt = (config.OPT != 0) if opt is None else opt
312+
310313
if debug and opt:
311-
msg = ("debug=True with opt=True (the default) "
314+
msg = ("debug=True with opt=True "
312315
"is not supported by CUDA. This may result in a crash"
313316
" - set debug=False or opt=False.")
314317
warn(NumbaInvalidConfigWarning(msg))
@@ -359,8 +362,8 @@ def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
359362
return code, resty
360363

361364

362-
def compile_for_current_device(pyfunc, sig, debug=False, lineinfo=False,
363-
device=True, fastmath=False, opt=True,
365+
def compile_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
366+
device=True, fastmath=False, opt=None,
364367
abi="c", abi_info=None, output='ptx'):
365368
"""Compile a Python function to PTX or LTO-IR for a given signature for the
366369
current device's compute capabilility. This calls :func:`compile` with an
@@ -371,8 +374,8 @@ def compile_for_current_device(pyfunc, sig, debug=False, lineinfo=False,
371374
abi_info=abi_info, output=output)
372375

373376

374-
def compile_ptx(pyfunc, sig, debug=False, lineinfo=False, device=False,
375-
fastmath=False, cc=None, opt=True, abi="numba", abi_info=None):
377+
def compile_ptx(pyfunc, sig, debug=None, lineinfo=False, device=False,
378+
fastmath=False, cc=None, opt=None, abi="numba", abi_info=None):
376379
"""Compile a Python function to PTX for a given signature. See
377380
:func:`compile`. The defaults for this function are to compile a kernel
378381
with the Numba ABI, rather than :func:`compile`'s default of compiling a
@@ -382,8 +385,8 @@ def compile_ptx(pyfunc, sig, debug=False, lineinfo=False, device=False,
382385
abi_info=abi_info, output='ptx')
383386

384387

385-
def compile_ptx_for_current_device(pyfunc, sig, debug=False, lineinfo=False,
386-
device=False, fastmath=False, opt=True,
388+
def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
389+
device=False, fastmath=False, opt=None,
387390
abi="numba", abi_info=None):
388391
"""Compile a Python function to PTX for a given signature for the current
389392
device's compute capabilility. See :func:`compile_ptx`."""

numba_cuda/numba/cuda/decorators.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313

1414
def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
15-
opt=True, lineinfo=False, cache=False, **kws):
15+
opt=None, lineinfo=False, cache=False, **kws):
1616
"""
1717
JIT compile a Python function for CUDA GPUs.
1818
@@ -42,9 +42,9 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
4242
this number of registers per thread. The limit may not be respected if
4343
the ABI requires a greater number of registers than that requested.
4444
Useful for increasing occupancy.
45-
:param opt: Whether to compile from LLVM IR to PTX with optimization
46-
enabled. When ``True``, ``-opt=3`` is passed to NVVM. When
47-
``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``.
45+
:param opt: Whether to compile with optimization enabled. If unspecified,
46+
the OPT configuration variable is decided by ``NUMBA_OPT```; all
47+
non-zero values will enable optimization.
4848
:type opt: bool
4949
:param lineinfo: If True, generate a line mapping between source code and
5050
assembly code. This enables inspection of the source code in NVIDIA
@@ -71,11 +71,12 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
7171
raise DeprecationError(msg)
7272

7373
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
74+
opt = (config.OPT != 0) if opt is None else opt
7475
fastmath = kws.get('fastmath', False)
7576
extensions = kws.get('extensions', [])
7677

7778
if debug and opt:
78-
msg = ("debug=True with opt=True (the default) "
79+
msg = ("debug=True with opt=True "
7980
"is not supported by CUDA. This may result in a crash"
8081
" - set debug=False or opt=False.")
8182
warn(NumbaInvalidConfigWarning(msg))

numba_cuda/numba/cuda/simulator/api.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from .cudadrv.devices import require_context, reset, gpus # noqa: F401
1010
from .kernel import FakeCUDAKernel
1111
from numba.core.sigutils import is_signature
12+
from numba.core import config
1213
from warnings import warn
1314
from ..args import In, Out, InOut # noqa: F401
1415

@@ -80,9 +81,9 @@ def elapsed_time(self, event):
8081
event = Event
8182

8283

83-
def jit(func_or_sig=None, device=False, debug=False, argtypes=None,
84+
def jit(func_or_sig=None, device=False, debug=None, argtypes=None,
8485
inline=False, restype=None, fastmath=False, link=None,
85-
boundscheck=None, opt=True, cache=None
86+
boundscheck=None, opt=None, cache=None
8687
):
8788
# Here for API compatibility
8889
if boundscheck:
@@ -91,6 +92,8 @@ def jit(func_or_sig=None, device=False, debug=False, argtypes=None,
9192
if link is not None:
9293
raise NotImplementedError('Cannot link PTX in the simulator')
9394

95+
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
96+
9497
# Check for first argument specifying types - in that case the
9598
# decorator is not being passed a function
9699
if (func_or_sig is None or is_signature(func_or_sig)

numba_cuda/numba/cuda/tests/cudapy/test_compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,15 @@ def test_device_function_with_debug(self):
101101
def f():
102102
pass
103103

104-
ptx, resty = compile_ptx(f, (), device=True, debug=True)
104+
ptx, resty = compile_ptx(f, (), device=True, debug=True, opt=False)
105105
self.check_debug_info(ptx)
106106

107107
def test_kernel_with_debug(self):
108108
# Inspired by (but not originally affected by) Issue #6719
109109
def f():
110110
pass
111111

112-
ptx, resty = compile_ptx(f, (), debug=True)
112+
ptx, resty = compile_ptx(f, (), debug=True, opt=False)
113113
self.check_debug_info(ptx)
114114

115115
def check_line_info(self, ptx):

numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_jit_debug_simulator(self):
8181
# Ensure that the jit decorator accepts the debug kwarg when the
8282
# simulator is in use - see Issue #6615.
8383
with override_config('ENABLE_CUDASIM', 1):
84-
@cuda.jit(debug=True)
84+
@cuda.jit(debug=True, opt=False)
8585
def f(x):
8686
pass
8787

numba_cuda/numba/cuda/tests/cudapy/test_exception.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def test_raise_in_device_function(self):
160160
def f():
161161
raise ValueError(msg)
162162

163-
@cuda.jit(debug=True)
163+
@cuda.jit(debug=True, opt=False)
164164
def kernel():
165165
f()
166166

numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ def f10(r, x, y):
198198
r[0] = x / y
199199

200200
sig = (float32[::1], float32, float32)
201-
fastver = cuda.jit(sig, fastmath=True, debug=True)(f10)
202-
precver = cuda.jit(sig, debug=True)(f10)
201+
fastver = cuda.jit(sig, fastmath=True, debug=True, opt=False)(f10)
202+
precver = cuda.jit(sig, debug=True, opt=False)(f10)
203203
nelem = 10
204204
ary = np.empty(nelem, dtype=np.float32)
205205
with self.assertRaises(ZeroDivisionError):

numba_cuda/numba/cuda/tests/cudapy/test_userexc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def setUp(self):
2222
self.skip_if_lto("Exceptions not supported with LTO")
2323

2424
def test_user_exception(self):
25-
@cuda.jit("void(int32)", debug=True)
25+
@cuda.jit("void(int32)", debug=True, opt=False)
2626
def test_exc(x):
2727
if x == 1:
2828
raise MyError

numba_cuda/numba/cuda/tests/cudapy/test_warning.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
44
from numba.tests.support import linux_only, override_config
55
from numba.core.errors import NumbaPerformanceWarning
6+
from numba.core import config
67
import warnings
78

89

@@ -134,6 +135,43 @@ def test_no_warn_with_no_debug_and_opt_kwargs(self):
134135

135136
self.assertEqual(len(w), 0)
136137

138+
def test_no_warn_on_debug_and_opt_with_config(self):
139+
with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
140+
with override_config('OPT', config._OptLevel(0)):
141+
with warnings.catch_warnings(record=True) as w:
142+
cuda.jit()
143+
144+
self.assertEqual(len(w), 0)
145+
146+
with warnings.catch_warnings(record=True) as w:
147+
cuda.jit(opt=False)
148+
149+
self.assertEqual(len(w), 0)
150+
151+
with override_config('OPT', config._OptLevel(0)):
152+
with warnings.catch_warnings(record=True) as w:
153+
cuda.jit(debug=True)
154+
155+
self.assertEqual(len(w), 0)
156+
157+
def test_warn_on_debug_and_opt_with_config(self):
158+
with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
159+
for opt in (1, 2, 3, 'max'):
160+
with override_config('OPT', config._OptLevel(opt)):
161+
with warnings.catch_warnings(record=True) as w:
162+
cuda.jit()
163+
164+
self.assertEqual(len(w), 1)
165+
self.assertIn('not supported by CUDA', str(w[0].message))
166+
167+
for opt in (1, 2, 3, 'max'):
168+
with override_config('OPT', config._OptLevel(opt)):
169+
with warnings.catch_warnings(record=True) as w:
170+
cuda.jit(debug=True)
171+
172+
self.assertEqual(len(w), 1)
173+
self.assertIn('not supported by CUDA', str(w[0].message))
174+
137175

138176
if __name__ == '__main__':
139177
unittest.main()

0 commit comments

Comments
 (0)