Skip to content

Commit 8805e99

Browse files
committed
Passing no return test
1 parent 81118de commit 8805e99

File tree

3 files changed

+29
-23
lines changed

3 files changed

+29
-23
lines changed

numba_cuda/numba/cuda/dispatcher.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,6 @@ def launch(self, args, griddim, blockdim, stream=0, sharedmem=0):
341341
# Prepare kernel
342342
cufunc = self._codelibrary.get_cufunc()
343343

344-
rtsys.allocate()
345-
346344
if self.debug:
347345
excname = cufunc.name + "__errcode__"
348346
excmem, excsz = cufunc.module.get_global_symbol(excname)
@@ -364,8 +362,11 @@ def launch(self, args, griddim, blockdim, stream=0, sharedmem=0):
364362

365363
stream_handle = stream and stream.handle or zero_stream
366364

365+
rtsys.allocate(stream_handle)
367366
rtsys.set_memsys_to_module(cufunc.module, stream_handle)
368367
rtsys.initialize(stream_handle)
368+
rtsys.enable(stream_handle)
369+
rtsys.print_memsys(0)
369370

370371
# Invoke kernel
371372
driver.launch_kernel(cufunc.handle,

numba_cuda/numba/cuda/runtime/nrt.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def _compile_memsys_module(self):
3838

3939
self._memsys_module = module
4040

41-
def _ensure_allocate(self):
41+
def _ensure_allocate(self, stream):
4242
if self._memsys is not None:
4343
return
4444

45-
self.allocate()
45+
self.allocate(stream)
4646

47-
def allocate(self):
47+
def allocate(self, stream):
4848
from numba.cuda import device_array
4949

5050
if self._memsys_module is None:
@@ -53,7 +53,10 @@ def allocate(self):
5353
if self._memsys is None:
5454
# Allocate space for NRT_MemSys
5555
# TODO: determine the size of NRT_MemSys at runtime
56-
self._memsys = device_array((40,), dtype="i1")
56+
self._memsys = device_array((40,), dtype="i1", stream=stream)
57+
# TODO: Memsys module needs a stream that's consistent with the
58+
# system's stream.
59+
self.set_memsys_to_module(self._memsys_module, stream=stream)
5760

5861
def _single_thread_launch(self, module, stream, name, params=()):
5962
func = module.get_function(name)
@@ -84,18 +87,17 @@ def initialize(self, stream):
8487

8588
def enable(self, stream):
8689
self._single_thread_launch(
87-
self._memsys_module, stream, "NR_MemSys_enable")
90+
self._memsys_module, stream, "NRT_MemSys_enable")
8891

8992
def disable(self, stream):
9093
self._single_thread_launch(
91-
self._memsys_module, stream, "NR_MemSys_disable")
94+
self._memsys_module, stream, "NRT_MemSys_disable")
9295

9396
def _copy_memsys_to_host(self, stream=0):
94-
self._ensure_allocate()
97+
self._ensure_allocate(stream)
9598
self._ensure_initialize(stream)
9699

97100
# Q: What stream should we execute this on?
98-
# read the stats
99101
dt = np.dtype([
100102
('alloc', np.uint64),
101103
('free', np.uint64),
@@ -116,33 +118,34 @@ def _copy_memsys_to_host(self, stream=0):
116118
return stats_for_read[0]
117119

118120
def get_allocation_stats(self):
119-
# This is commented out to test the 700 error code from cuda.
120-
# if self._memsys is None or (not self._initialized):
121-
# return _nrt_mstats(
122-
# alloc=0,
123-
# free=0,
124-
# mi_alloc=0,
125-
# mi_free=0
126-
# )
127121
memsys = self._copy_memsys_to_host()
128122
return _nrt_mstats(
129-
alloc=memsys.alloc,
130-
free=memsys.free,
131-
mi_alloc=memsys.mi_alloc,
132-
mi_free=memsys.mi_free
123+
alloc=memsys["alloc"],
124+
free=memsys["free"],
125+
mi_alloc=memsys["mi_alloc"],
126+
mi_free=memsys["mi_free"]
133127
)
134128

135129
def set_memsys_to_module(self, module, stream):
136130
if self._memsys is None:
137131
raise RuntimeError(
138132
"Please allocate NRT Memsys first before initializing.")
139133

134+
print(f"Setting {self._memsys.device_ctypes_pointer} to {module}")
140135
self._single_thread_launch(
141136
module,
142137
stream,
143138
"NRT_MemSys_set",
144139
[self._memsys.device_ctypes_pointer,]
145140
)
146141

142+
def print_memsys(self, stream):
143+
cuda.synchronize()
144+
self._single_thread_launch(
145+
self._memsys_module,
146+
stream,
147+
"NRT_MemSys_print"
148+
)
149+
147150

148151
rtsys = _Runtime()

numba_cuda/numba/cuda/tests/nrt/test_nrt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_no_return(self):
2525
"""
2626
n = 10
2727

28-
@cuda.jit
28+
@cuda.jit(debug=True)
2929
def kernel():
3030
for i in range(n):
3131
temp = cuda_empty(2, np.float64) # noqa: F841
@@ -35,6 +35,8 @@ def kernel():
3535

3636
with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
3737
kernel[1,1]()
38+
print("After kernel launch...")
39+
rtsys.print_memsys(0)
3840
cur_stats = rtsys.get_allocation_stats()
3941
self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
4042
self.assertEqual(cur_stats.free - init_stats.free, n)

0 commit comments

Comments
 (0)