Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion numba_cuda/numba/cuda/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
gpus = devices.gpus


@require_context
def from_cuda_array_interface(desc, owner=None, sync=True):
"""Create a DeviceNDArray from a cuda-array-interface description.
The ``owner`` is the owner of the underlying memory.
Expand All @@ -47,7 +48,9 @@ def from_cuda_array_interface(desc, owner=None, sync=True):

cudevptr_class = driver.binding.CUdeviceptr
devptr = cudevptr_class(desc["data"][0])
data = driver.MemoryPointer(devptr, size=size, owner=owner)
data = driver.MemoryPointer(
current_context(), devptr, size=size, owner=owner
)
stream_ptr = desc.get("stream", None)
if stream_ptr is not None:
stream = external_stream(stream_ptr)
Expand Down
4 changes: 3 additions & 1 deletion numba_cuda/numba/cuda/cudadrv/devicearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
else:
# Make NULL pointer for empty allocation
null = _driver.binding.CUdeviceptr(0)
gpu_data = _driver.MemoryPointer(pointer=null, size=0)
gpu_data = _driver.MemoryPointer(
context=devices.get_context(), pointer=null, size=0
)
self.alloc_size = 0

self.gpu_data = gpu_data
Expand Down
52 changes: 34 additions & 18 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,13 +814,14 @@ def allocator():
alloc_key = pointer

finalizer = _hostalloc_finalizer(self, pointer, alloc_key, size, mapped)
ctx = weakref.proxy(self.context)

if mapped:
mem = MappedMemory(pointer, size, finalizer=finalizer)
mem = MappedMemory(ctx, pointer, size, finalizer=finalizer)
self.allocations[alloc_key] = mem
return mem.own()
else:
return PinnedMemory(pointer, size, finalizer=finalizer)
return PinnedMemory(ctx, pointer, size, finalizer=finalizer)

def mempin(self, owner, pointer, size, mapped=False):
"""Implements the pinning of host memory.
Expand All @@ -847,13 +848,18 @@ def allocator():
allocator()

finalizer = _pin_finalizer(self, pointer, alloc_key, mapped)
ctx = weakref.proxy(self.context)

if mapped:
mem = MappedMemory(pointer, size, owner=owner, finalizer=finalizer)
mem = MappedMemory(
ctx, pointer, size, owner=owner, finalizer=finalizer
)
self.allocations[alloc_key] = mem
return mem.own()
else:
return PinnedMemory(pointer, size, owner=owner, finalizer=finalizer)
return PinnedMemory(
ctx, pointer, size, owner=owner, finalizer=finalizer
)

def memallocmanaged(self, size, attach_global):
def allocator():
Expand All @@ -871,7 +877,8 @@ def allocator():
alloc_key = ptr

finalizer = _alloc_finalizer(self, ptr, alloc_key, size)
mem = ManagedMemory(ptr, size, finalizer=finalizer)
ctx = weakref.proxy(self.context)
mem = ManagedMemory(ctx, ptr, size, finalizer=finalizer)
self.allocations[alloc_key] = mem
return mem.own()

Expand Down Expand Up @@ -934,7 +941,8 @@ def allocator():
alloc_key = ptr

finalizer = _alloc_finalizer(self, ptr, alloc_key, size)
mem = AutoFreePointer(ptr, size, finalizer=finalizer)
ctx = weakref.proxy(self.context)
mem = AutoFreePointer(ctx, ptr, size, finalizer=finalizer)
self.allocations[alloc_key] = mem
return mem.own()

Expand Down Expand Up @@ -1265,7 +1273,9 @@ def open_ipc_handle(self, handle, size):
dptr = driver.cuIpcOpenMemHandle(handle, flags)

# wrap it
return MemoryPointer(pointer=dptr, size=size)
return MemoryPointer(
context=weakref.proxy(self), pointer=dptr, size=size
)

def enable_peer_access(self, peer_context, flags=0):
"""Enable peer access between the current context and the peer context"""
Expand Down Expand Up @@ -1751,7 +1761,7 @@ def _rebuild(cls, handle_ary, size, source_info, offset):
)


class MemoryPointer:
class MemoryPointer(object):
"""A memory pointer that owns a buffer, with an optional finalizer. Memory
pointers provide reference counting, and instances are initialized with a
reference count of 1.
Expand All @@ -1767,6 +1777,8 @@ class MemoryPointer:
tie the buffer lifetime to the reference count, so that the buffer is freed
when there are no more references.

:param context: The context in which the pointer was allocated.
:type context: Context
:param pointer: The address of the buffer.
:type pointer: ctypes.c_void_p
:param size: The size of the allocation in bytes.
Expand All @@ -1783,10 +1795,11 @@ class MemoryPointer:

__cuda_memory__ = True

def __init__(self, pointer, size, owner=None, finalizer=None):
def __init__(self, context, pointer, size, owner=None, finalizer=None):
if isinstance(pointer, ctypes.c_void_p):
pointer = binding.CUdeviceptr(pointer.value)

self.context = context
self.device_pointer = pointer
self.size = size
self._cuda_memsize_ = size
Expand Down Expand Up @@ -1842,7 +1855,7 @@ def view(self, start, stop=None):
pointer = binding.CUdeviceptr()
ctypes_ptr = drvapi.cu_device_ptr.from_address(pointer.getPtr())
ctypes_ptr.value = base
view = MemoryPointer(pointer, size, owner=self.owner)
view = MemoryPointer(self.context, pointer, size, owner=self.owner)

if isinstance(self.owner, (MemoryPointer, OwnedPointer)):
# Owned by a numba-managed memory segment, take an owned reference
Expand Down Expand Up @@ -1871,7 +1884,7 @@ class AutoFreePointer(MemoryPointer):

def __init__(self, *args, **kwargs):
super(AutoFreePointer, self).__init__(*args, **kwargs)
# Release the self reference to the buffer, so that the finalizer
# Releease the self reference to the buffer, so that the finalizer
# is invoked if all the derived pointers are gone.
self.refct -= 1

Expand All @@ -1898,15 +1911,17 @@ class MappedMemory(AutoFreePointer):

__cuda_memory__ = True

def __init__(self, pointer, size, owner=None, finalizer=None):
def __init__(self, context, pointer, size, owner=None, finalizer=None):
self.owned = owner
self.host_pointer = pointer

devptr = driver.cuMemHostGetDevicePointer(pointer, 0)
self._bufptr_ = self.host_pointer

self.device_pointer = devptr
super(MappedMemory, self).__init__(devptr, size, finalizer=finalizer)
super(MappedMemory, self).__init__(
context, devptr, size, finalizer=finalizer
)
self.handle = self.host_pointer

# For buffer interface
Expand Down Expand Up @@ -1935,7 +1950,8 @@ class PinnedMemory(mviewbuf.MemAlloc):
:type finalizer: function
"""

def __init__(self, pointer, size, owner=None, finalizer=None):
def __init__(self, context, pointer, size, owner=None, finalizer=None):
self.context = context
self.owned = owner
self.size = size
self.host_pointer = pointer
Expand Down Expand Up @@ -1975,10 +1991,10 @@ class ManagedMemory(AutoFreePointer):

__cuda_memory__ = True

def __init__(self, pointer, size, owner=None, finalizer=None):
def __init__(self, context, pointer, size, owner=None, finalizer=None):
self.owned = owner
devptr = pointer
super().__init__(devptr, size, finalizer=finalizer)
super().__init__(context, devptr, size, finalizer=finalizer)

# For buffer interface
self._buflen_ = self.size
Expand Down Expand Up @@ -2302,7 +2318,7 @@ def get_global_symbol(self, name):
driver.cuModuleGetGlobal(
byref(ptr), byref(size), self.handle, name.encode("utf8")
)
return MemoryPointer(ptr, size), size.value
return MemoryPointer(self.context, ptr, size), size.value


class CudaPythonModule(Module):
Expand All @@ -2312,7 +2328,7 @@ def get_function(self, name):

def get_global_symbol(self, name):
ptr, size = driver.cuModuleGetGlobal(self.handle, name.encode("utf8"))
return MemoryPointer(ptr, size), size
return MemoryPointer(self.context, ptr, size), size


FuncAttr = namedtuple(
Expand Down
8 changes: 6 additions & 2 deletions numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,17 @@ def dtor():
dtor_invoked[0] += 1

# Ensure finalizer is called when pointer is deleted
ptr = driver.MemoryPointer(pointer=fake_ptr, size=40, finalizer=dtor)
ptr = driver.MemoryPointer(
context=self.context, pointer=fake_ptr, size=40, finalizer=dtor
)
self.assertEqual(dtor_invoked[0], 0)
del ptr
self.assertEqual(dtor_invoked[0], 1)

# Ensure removing derived pointer doesn't call finalizer
ptr = driver.MemoryPointer(pointer=fake_ptr, size=40, finalizer=dtor)
ptr = driver.MemoryPointer(
context=self.context, pointer=fake_ptr, size=40, finalizer=dtor
)
owned = ptr.own()
del owned
self.assertEqual(dtor_invoked[0], 1)
Expand Down
4 changes: 3 additions & 1 deletion numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import ctypes
import numpy as np
import weakref

from numba import cuda
from numba.cuda.core import config
Expand Down Expand Up @@ -57,9 +58,10 @@ def finalizer():

# We use an AutoFreePointer so that the finalizer will be run when
# the reference count drops to zero.
ctx = weakref.proxy(self.context)
ptr = ctypes.c_void_p(alloc_count)
return cuda.cudadrv.driver.AutoFreePointer(
ptr, size, finalizer=finalizer
ctx, ptr, size, finalizer=finalizer
)

def initialize(self):
Expand Down
Loading