Optimization and debuginfo flag fixes

dlee992 · gmarkall · commit 173e00035245 · 2024-10-07T13:23:17.000+01:00
The default optimization level now follows the `config.OPT` setting -
optimizations are on by default for any nonzero value of `config.OPT`.

The default debuginfo generation now also follows the setting of
`config.CUDA_DEBUGINFO_DEFEAULT`.

Test cases which violated the requirement to disable optimization when
generating debuginfo have been fixed, which results in the test suite no longer
emitting warnings about this.
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
@@ -253,8 +253,8 @@ def cabi_wrap_function(context, lib, fndesc, wrapper_function_name,
 
 
 @global_compiler_lock
-def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
-            fastmath=False, cc=None, opt=True, abi="c", abi_info=None,
+def compile(pyfunc, sig, debug=None, lineinfo=False, device=True,
+            fastmath=False, cc=None, opt=None, abi="c", abi_info=None,
             output='ptx'):
     """Compile a Python function to PTX or LTO-IR for a given set of argument
     types.
@@ -283,7 +283,7 @@ def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
     :param cc: Compute capability to compile for, as a tuple
                ``(MAJOR, MINOR)``. Defaults to ``(5, 0)``.
     :type cc: tuple
-    :param opt: Enable optimizations. Defaults to ``True``.
+    :param opt: Whether to enable optimizations in the compiled code.
     :type opt: bool
     :param abi: The ABI for a compiled function - either ``"numba"`` or
                 ``"c"``. Note that the Numba ABI is not considered stable.
@@ -307,8 +307,11 @@ def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
     if output not in ("ptx", "ltoir"):
         raise NotImplementedError(f'Unsupported output type: {output}')
 
+    debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
+    opt = (config.OPT != 0) if opt is None else opt
+
     if debug and opt:
-        msg = ("debug=True with opt=True (the default) "
+        msg = ("debug=True with opt=True "
                "is not supported by CUDA. This may result in a crash"
                " - set debug=False or opt=False.")
         warn(NumbaInvalidConfigWarning(msg))
@@ -359,8 +362,8 @@ def compile(pyfunc, sig, debug=False, lineinfo=False, device=True,
     return code, resty
 
 
-def compile_for_current_device(pyfunc, sig, debug=False, lineinfo=False,
-                               device=True, fastmath=False, opt=True,
+def compile_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
+                               device=True, fastmath=False, opt=None,
                                abi="c", abi_info=None, output='ptx'):
     """Compile a Python function to PTX or LTO-IR for a given signature for the
     current device's compute capabilility. This calls :func:`compile` with an
@@ -371,8 +374,8 @@ def compile_for_current_device(pyfunc, sig, debug=False, lineinfo=False,
                    abi_info=abi_info, output=output)
 
 
-def compile_ptx(pyfunc, sig, debug=False, lineinfo=False, device=False,
-                fastmath=False, cc=None, opt=True, abi="numba", abi_info=None):
+def compile_ptx(pyfunc, sig, debug=None, lineinfo=False, device=False,
+                fastmath=False, cc=None, opt=None, abi="numba", abi_info=None):
     """Compile a Python function to PTX for a given signature. See
     :func:`compile`. The defaults for this function are to compile a kernel
     with the Numba ABI, rather than :func:`compile`'s default of compiling a
@@ -382,8 +385,8 @@ def compile_ptx(pyfunc, sig, debug=False, lineinfo=False, device=False,
                    abi_info=abi_info, output='ptx')
 
 
-def compile_ptx_for_current_device(pyfunc, sig, debug=False, lineinfo=False,
-                                   device=False, fastmath=False, opt=True,
+def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
+                                   device=False, fastmath=False, opt=None,
                                    abi="numba", abi_info=None):
     """Compile a Python function to PTX for a given signature for the current
     device's compute capabilility. See :func:`compile_ptx`."""
diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py
@@ -12,7 +12,7 @@
 
 
 def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
-        opt=True, lineinfo=False, cache=False, **kws):
+        opt=None, lineinfo=False, cache=False, **kws):
     """
     JIT compile a Python function for CUDA GPUs.
 
@@ -42,9 +42,9 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
        this number of registers per thread. The limit may not be respected if
        the ABI requires a greater number of registers than that requested.
        Useful for increasing occupancy.
-    :param opt: Whether to compile from LLVM IR to PTX with optimization
-                enabled. When ``True``, ``-opt=3`` is passed to NVVM. When
-                ``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``.
+    :param opt: Whether to compile with optimization enabled. If unspecified,
+       the OPT configuration variable is decided by ``NUMBA_OPT```; all
+       non-zero values will enable optimization.
     :type opt: bool
     :param lineinfo: If True, generate a line mapping between source code and
        assembly code. This enables inspection of the source code in NVIDIA
@@ -71,11 +71,12 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
         raise DeprecationError(msg)
 
     debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
+    opt = (config.OPT != 0) if opt is None else opt
     fastmath = kws.get('fastmath', False)
     extensions = kws.get('extensions', [])
 
     if debug and opt:
-        msg = ("debug=True with opt=True (the default) "
+        msg = ("debug=True with opt=True "
                "is not supported by CUDA. This may result in a crash"
                " - set debug=False or opt=False.")
         warn(NumbaInvalidConfigWarning(msg))
diff --git a/numba_cuda/numba/cuda/simulator/api.py b/numba_cuda/numba/cuda/simulator/api.py
@@ -9,6 +9,7 @@
 from .cudadrv.devices import require_context, reset, gpus  # noqa: F401
 from .kernel import FakeCUDAKernel
 from numba.core.sigutils import is_signature
+from numba.core import config
 from warnings import warn
 from ..args import In, Out, InOut  # noqa: F401
 
@@ -80,9 +81,9 @@ def elapsed_time(self, event):
 event = Event
 
 
-def jit(func_or_sig=None, device=False, debug=False, argtypes=None,
+def jit(func_or_sig=None, device=False, debug=None, argtypes=None,
         inline=False, restype=None, fastmath=False, link=None,
-        boundscheck=None, opt=True, cache=None
+        boundscheck=None, opt=None, cache=None
         ):
     # Here for API compatibility
     if boundscheck:
@@ -91,6 +92,8 @@ def jit(func_or_sig=None, device=False, debug=False, argtypes=None,
     if link is not None:
         raise NotImplementedError('Cannot link PTX in the simulator')
 
+    debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
+
     # Check for first argument specifying types - in that case the
     # decorator is not being passed a function
     if (func_or_sig is None or is_signature(func_or_sig)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
@@ -101,15 +101,15 @@ def test_device_function_with_debug(self):
         def f():
             pass
 
-        ptx, resty = compile_ptx(f, (), device=True, debug=True)
+        ptx, resty = compile_ptx(f, (), device=True, debug=True, opt=False)
         self.check_debug_info(ptx)
 
     def test_kernel_with_debug(self):
         # Inspired by (but not originally affected by) Issue #6719
         def f():
             pass
 
-        ptx, resty = compile_ptx(f, (), debug=True)
+        ptx, resty = compile_ptx(f, (), debug=True, opt=False)
         self.check_debug_info(ptx)
 
     def check_line_info(self, ptx):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -81,7 +81,7 @@ def test_jit_debug_simulator(self):
         # Ensure that the jit decorator accepts the debug kwarg when the
         # simulator is in use - see Issue #6615.
         with override_config('ENABLE_CUDASIM', 1):
-            @cuda.jit(debug=True)
+            @cuda.jit(debug=True, opt=False)
             def f(x):
                 pass
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_exception.py b/numba_cuda/numba/cuda/tests/cudapy/test_exception.py
@@ -160,7 +160,7 @@ def test_raise_in_device_function(self):
         def f():
             raise ValueError(msg)
 
-        @cuda.jit(debug=True)
+        @cuda.jit(debug=True, opt=False)
         def kernel():
             f()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py b/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py
@@ -198,8 +198,8 @@ def f10(r, x, y):
             r[0] = x / y
 
         sig = (float32[::1], float32, float32)
-        fastver = cuda.jit(sig, fastmath=True, debug=True)(f10)
-        precver = cuda.jit(sig, debug=True)(f10)
+        fastver = cuda.jit(sig, fastmath=True, debug=True, opt=False)(f10)
+        precver = cuda.jit(sig, debug=True, opt=False)(f10)
         nelem = 10
         ary = np.empty(nelem, dtype=np.float32)
         with self.assertRaises(ZeroDivisionError):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py b/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py
@@ -22,7 +22,7 @@ def setUp(self):
         self.skip_if_lto("Exceptions not supported with LTO")
 
     def test_user_exception(self):
-        @cuda.jit("void(int32)", debug=True)
+        @cuda.jit("void(int32)", debug=True, opt=False)
         def test_exc(x):
             if x == 1:
                 raise MyError
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_warning.py b/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
@@ -3,6 +3,7 @@
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
 from numba.tests.support import linux_only, override_config
 from numba.core.errors import NumbaPerformanceWarning
+from numba.core import config
 import warnings
 
 
@@ -134,6 +135,43 @@ def test_no_warn_with_no_debug_and_opt_kwargs(self):
 
         self.assertEqual(len(w), 0)
 
+    def test_no_warn_on_debug_and_opt_with_config(self):
+        with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
+            with override_config('OPT', config._OptLevel(0)):
+                with warnings.catch_warnings(record=True) as w:
+                    cuda.jit()
+
+            self.assertEqual(len(w), 0)
+
+            with warnings.catch_warnings(record=True) as w:
+                cuda.jit(opt=False)
+
+            self.assertEqual(len(w), 0)
+
+        with override_config('OPT', config._OptLevel(0)):
+            with warnings.catch_warnings(record=True) as w:
+                cuda.jit(debug=True)
+
+            self.assertEqual(len(w), 0)
+
+    def test_warn_on_debug_and_opt_with_config(self):
+        with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
+            for opt in (1, 2, 3, 'max'):
+                with override_config('OPT', config._OptLevel(opt)):
+                    with warnings.catch_warnings(record=True) as w:
+                        cuda.jit()
+
+                self.assertEqual(len(w), 1)
+                self.assertIn('not supported by CUDA', str(w[0].message))
+
+        for opt in (1, 2, 3, 'max'):
+            with override_config('OPT', config._OptLevel(opt)):
+                with warnings.catch_warnings(record=True) as w:
+                    cuda.jit(debug=True)
+
+                self.assertEqual(len(w), 1)
+                self.assertIn('not supported by CUDA', str(w[0].message))
+
 
 if __name__ == '__main__':
     unittest.main()