@@ -38,13 +38,13 @@ def _compile_memsys_module(self):
3838
3939 self ._memsys_module = module
4040
41- def _ensure_allocate (self ):
41+ def _ensure_allocate (self , stream ):
4242 if self ._memsys is not None :
4343 return
4444
45- self .allocate ()
45+ self .allocate (stream )
4646
47- def allocate (self ):
47+ def allocate (self , stream ):
4848 from numba .cuda import device_array
4949
5050 if self ._memsys_module is None :
@@ -53,7 +53,10 @@ def allocate(self):
5353 if self ._memsys is None :
5454 # Allocate space for NRT_MemSys
5555 # TODO: determine the size of NRT_MemSys at runtime
56- self ._memsys = device_array ((40 ,), dtype = "i1" )
56+ self ._memsys = device_array ((40 ,), dtype = "i1" , stream = stream )
57+ # TODO: Memsys module needs a stream that's consistent with the
58+ # system's stream.
59+ self .set_memsys_to_module (self ._memsys_module , stream = stream )
5760
5861 def _single_thread_launch (self , module , stream , name , params = ()):
5962 func = module .get_function (name )
@@ -84,18 +87,17 @@ def initialize(self, stream):
8487
8588 def enable (self , stream ):
8689 self ._single_thread_launch (
87- self ._memsys_module , stream , "NR_MemSys_enable " )
90+ self ._memsys_module , stream , "NRT_MemSys_enable " )
8891
8992 def disable (self , stream ):
9093 self ._single_thread_launch (
91- self ._memsys_module , stream , "NR_MemSys_disable " )
94+ self ._memsys_module , stream , "NRT_MemSys_disable " )
9295
9396 def _copy_memsys_to_host (self , stream = 0 ):
94- self ._ensure_allocate ()
97+ self ._ensure_allocate (stream )
9598 self ._ensure_initialize (stream )
9699
97100 # Q: What stream should we execute this on?
98- # read the stats
99101 dt = np .dtype ([
100102 ('alloc' , np .uint64 ),
101103 ('free' , np .uint64 ),
@@ -116,33 +118,34 @@ def _copy_memsys_to_host(self, stream=0):
116118 return stats_for_read [0 ]
117119
118120 def get_allocation_stats (self ):
119- # This is commented out to test the 700 error code from cuda.
120- # if self._memsys is None or (not self._initialized):
121- # return _nrt_mstats(
122- # alloc=0,
123- # free=0,
124- # mi_alloc=0,
125- # mi_free=0
126- # )
127121 memsys = self ._copy_memsys_to_host ()
128122 return _nrt_mstats (
129- alloc = memsys . alloc ,
130- free = memsys . free ,
131- mi_alloc = memsys . mi_alloc ,
132- mi_free = memsys . mi_free
123+ alloc = memsys [ " alloc" ] ,
124+ free = memsys [ " free" ] ,
125+ mi_alloc = memsys [ " mi_alloc" ] ,
126+ mi_free = memsys [ " mi_free" ]
133127 )
134128
135129 def set_memsys_to_module (self , module , stream ):
136130 if self ._memsys is None :
137131 raise RuntimeError (
138132 "Please allocate NRT Memsys first before initializing." )
139133
134+ print (f"Setting { self ._memsys .device_ctypes_pointer } to { module } " )
140135 self ._single_thread_launch (
141136 module ,
142137 stream ,
143138 "NRT_MemSys_set" ,
144139 [self ._memsys .device_ctypes_pointer ,]
145140 )
146141
142+ def print_memsys (self , stream ):
143+ cuda .synchronize ()
144+ self ._single_thread_launch (
145+ self ._memsys_module ,
146+ stream ,
147+ "NRT_MemSys_print"
148+ )
149+
147150
148151rtsys = _Runtime ()
0 commit comments