Add support for default overload values

ZzEeKkAa · ZzEeKkAa · commit ddfda6d5a9eb · 2025-03-14T06:06:07.000-07:00
diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py
@@ -968,6 +968,10 @@ def get_call_template(self, args, kws):
 
         A (template, pysig, args, kws) tuple is returned.
         """
+        # Fold keyword arguments and resolve default values
+        pysig, args = self._compiler.fold_argument_types(args, kws)
+        kws = {}
+
         # Ensure an exactly-matching overload is available if we can
         # compile. We proceed with the typing even if we can't compile
         # because we may be able to force a cast on the caller side.
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nested_calls.py b/numba_cuda/numba/cuda/tests/cudapy/test_nested_calls.py
@@ -0,0 +1,55 @@
+"""
+Test problems in nested calls.
+Usually due to invalid type conversion between function boundaries.
+"""
+
+
+from numba import cuda
+from numba.core import types
+from numba.cuda.testing import CUDATestCase
+from numba.extending import overload
+import unittest
+import numpy as np
+
+
+def generated_inner(out, x, y=5, z=6):
+    # Provide implementation for the simulation.
+    if isinstance(x, complex):
+        out[0], out[1] = x + y, z
+    else:
+        out[0], out[1] = x - y, z
+
+
+@overload(generated_inner)
+def ol_generated_inner(out, x, y=5, z=6):
+    if isinstance(x, types.Complex):
+        def impl(out, x, y=5, z=6):
+            out[0], out[1] = x + y, z
+    else:
+        def impl(out, x, y=5, z=6):
+            out[0], out[1] = x - y, z
+    return impl
+
+
+def call_generated(a, b, out):
+    generated_inner(out, a, z=b)
+
+
+class TestNestedCall(CUDATestCase):
+    def test_call_generated(self):
+        """
+        Test a nested function call to a generated jit function.
+        """
+        cfunc = cuda.jit(call_generated)
+
+        out = np.empty(2, dtype=np.int64)
+        cfunc[1,1](1, 2, out)
+        self.assertPreciseEqual(tuple(out), (-4, 2))
+
+        out = np.empty(2, dtype=np.complex64)
+        cfunc[1,1](1j, 2, out)
+        self.assertPreciseEqual(tuple(map(complex,out)), (5 + 1j, 2 + 0j))
+
+
+if __name__ == '__main__':
+    unittest.main()