Skip to content

Commit ae7b3bf

Browse files
authored
Revert #536 "perf: remove context threading in various pointer abstractions" (#611)
This reverts commit 9a56516. This changed the public API of `MemoryPointer` and related classes, and the context that they held was used by Arrow (see apache/arrow#48259 (comment)): > Numba interop tests fail with: ``` arrow-dev/lib/python3.12/site-packages/pyarrow/tests/test_cuda_numba_interop.py:233: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E TypeError: MemoryPointer.__init__() got multiple values for argument 'pointer' ``` This commit reverts the change, as it was intended to improve performance without changing functionality, but has had a functional change as a side effect. Following the merge of this PR, we should be able to remove some of the `@require_context` decorators with some more targeted changes.
1 parent 835845c commit ae7b3bf

File tree

5 files changed

+50
-23
lines changed

5 files changed

+50
-23
lines changed

numba_cuda/numba/cuda/api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
gpus = devices.gpus
2222

2323

24+
@require_context
2425
def from_cuda_array_interface(desc, owner=None, sync=True):
2526
"""Create a DeviceNDArray from a cuda-array-interface description.
2627
The ``owner`` is the owner of the underlying memory.
@@ -47,7 +48,9 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
4748

4849
cudevptr_class = driver.binding.CUdeviceptr
4950
devptr = cudevptr_class(desc["data"][0])
50-
data = driver.MemoryPointer(devptr, size=size, owner=owner)
51+
data = driver.MemoryPointer(
52+
current_context(), devptr, size=size, owner=owner
53+
)
5154
stream_ptr = desc.get("stream", None)
5255
if stream_ptr is not None:
5356
stream = external_stream(stream_ptr)

numba_cuda/numba/cuda/cudadrv/devicearray.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
108108
else:
109109
# Make NULL pointer for empty allocation
110110
null = _driver.binding.CUdeviceptr(0)
111-
gpu_data = _driver.MemoryPointer(pointer=null, size=0)
111+
gpu_data = _driver.MemoryPointer(
112+
context=devices.get_context(), pointer=null, size=0
113+
)
112114
self.alloc_size = 0
113115

114116
self.gpu_data = gpu_data

numba_cuda/numba/cuda/cudadrv/driver.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -814,13 +814,14 @@ def allocator():
814814
alloc_key = pointer
815815

816816
finalizer = _hostalloc_finalizer(self, pointer, alloc_key, size, mapped)
817+
ctx = weakref.proxy(self.context)
817818

818819
if mapped:
819-
mem = MappedMemory(pointer, size, finalizer=finalizer)
820+
mem = MappedMemory(ctx, pointer, size, finalizer=finalizer)
820821
self.allocations[alloc_key] = mem
821822
return mem.own()
822823
else:
823-
return PinnedMemory(pointer, size, finalizer=finalizer)
824+
return PinnedMemory(ctx, pointer, size, finalizer=finalizer)
824825

825826
def mempin(self, owner, pointer, size, mapped=False):
826827
"""Implements the pinning of host memory.
@@ -847,13 +848,18 @@ def allocator():
847848
allocator()
848849

849850
finalizer = _pin_finalizer(self, pointer, alloc_key, mapped)
851+
ctx = weakref.proxy(self.context)
850852

851853
if mapped:
852-
mem = MappedMemory(pointer, size, owner=owner, finalizer=finalizer)
854+
mem = MappedMemory(
855+
ctx, pointer, size, owner=owner, finalizer=finalizer
856+
)
853857
self.allocations[alloc_key] = mem
854858
return mem.own()
855859
else:
856-
return PinnedMemory(pointer, size, owner=owner, finalizer=finalizer)
860+
return PinnedMemory(
861+
ctx, pointer, size, owner=owner, finalizer=finalizer
862+
)
857863

858864
def memallocmanaged(self, size, attach_global):
859865
def allocator():
@@ -871,7 +877,8 @@ def allocator():
871877
alloc_key = ptr
872878

873879
finalizer = _alloc_finalizer(self, ptr, alloc_key, size)
874-
mem = ManagedMemory(ptr, size, finalizer=finalizer)
880+
ctx = weakref.proxy(self.context)
881+
mem = ManagedMemory(ctx, ptr, size, finalizer=finalizer)
875882
self.allocations[alloc_key] = mem
876883
return mem.own()
877884

@@ -934,7 +941,8 @@ def allocator():
934941
alloc_key = ptr
935942

936943
finalizer = _alloc_finalizer(self, ptr, alloc_key, size)
937-
mem = AutoFreePointer(ptr, size, finalizer=finalizer)
944+
ctx = weakref.proxy(self.context)
945+
mem = AutoFreePointer(ctx, ptr, size, finalizer=finalizer)
938946
self.allocations[alloc_key] = mem
939947
return mem.own()
940948

@@ -1265,7 +1273,9 @@ def open_ipc_handle(self, handle, size):
12651273
dptr = driver.cuIpcOpenMemHandle(handle, flags)
12661274

12671275
# wrap it
1268-
return MemoryPointer(pointer=dptr, size=size)
1276+
return MemoryPointer(
1277+
context=weakref.proxy(self), pointer=dptr, size=size
1278+
)
12691279

12701280
def enable_peer_access(self, peer_context, flags=0):
12711281
"""Enable peer access between the current context and the peer context"""
@@ -1751,7 +1761,7 @@ def _rebuild(cls, handle_ary, size, source_info, offset):
17511761
)
17521762

17531763

1754-
class MemoryPointer:
1764+
class MemoryPointer(object):
17551765
"""A memory pointer that owns a buffer, with an optional finalizer. Memory
17561766
pointers provide reference counting, and instances are initialized with a
17571767
reference count of 1.
@@ -1767,6 +1777,8 @@ class MemoryPointer:
17671777
tie the buffer lifetime to the reference count, so that the buffer is freed
17681778
when there are no more references.
17691779
1780+
:param context: The context in which the pointer was allocated.
1781+
:type context: Context
17701782
:param pointer: The address of the buffer.
17711783
:type pointer: ctypes.c_void_p
17721784
:param size: The size of the allocation in bytes.
@@ -1783,10 +1795,11 @@ class MemoryPointer:
17831795

17841796
__cuda_memory__ = True
17851797

1786-
def __init__(self, pointer, size, owner=None, finalizer=None):
1798+
def __init__(self, context, pointer, size, owner=None, finalizer=None):
17871799
if isinstance(pointer, ctypes.c_void_p):
17881800
pointer = binding.CUdeviceptr(pointer.value)
17891801

1802+
self.context = context
17901803
self.device_pointer = pointer
17911804
self.size = size
17921805
self._cuda_memsize_ = size
@@ -1842,7 +1855,7 @@ def view(self, start, stop=None):
18421855
pointer = binding.CUdeviceptr()
18431856
ctypes_ptr = drvapi.cu_device_ptr.from_address(pointer.getPtr())
18441857
ctypes_ptr.value = base
1845-
view = MemoryPointer(pointer, size, owner=self.owner)
1858+
view = MemoryPointer(self.context, pointer, size, owner=self.owner)
18461859

18471860
if isinstance(self.owner, (MemoryPointer, OwnedPointer)):
18481861
# Owned by a numba-managed memory segment, take an owned reference
@@ -1871,7 +1884,7 @@ class AutoFreePointer(MemoryPointer):
18711884

18721885
def __init__(self, *args, **kwargs):
18731886
super(AutoFreePointer, self).__init__(*args, **kwargs)
1874-
# Release the self reference to the buffer, so that the finalizer
1887+
# Releease the self reference to the buffer, so that the finalizer
18751888
# is invoked if all the derived pointers are gone.
18761889
self.refct -= 1
18771890

@@ -1898,15 +1911,17 @@ class MappedMemory(AutoFreePointer):
18981911

18991912
__cuda_memory__ = True
19001913

1901-
def __init__(self, pointer, size, owner=None, finalizer=None):
1914+
def __init__(self, context, pointer, size, owner=None, finalizer=None):
19021915
self.owned = owner
19031916
self.host_pointer = pointer
19041917

19051918
devptr = driver.cuMemHostGetDevicePointer(pointer, 0)
19061919
self._bufptr_ = self.host_pointer
19071920

19081921
self.device_pointer = devptr
1909-
super(MappedMemory, self).__init__(devptr, size, finalizer=finalizer)
1922+
super(MappedMemory, self).__init__(
1923+
context, devptr, size, finalizer=finalizer
1924+
)
19101925
self.handle = self.host_pointer
19111926

19121927
# For buffer interface
@@ -1935,7 +1950,8 @@ class PinnedMemory(mviewbuf.MemAlloc):
19351950
:type finalizer: function
19361951
"""
19371952

1938-
def __init__(self, pointer, size, owner=None, finalizer=None):
1953+
def __init__(self, context, pointer, size, owner=None, finalizer=None):
1954+
self.context = context
19391955
self.owned = owner
19401956
self.size = size
19411957
self.host_pointer = pointer
@@ -1975,10 +1991,10 @@ class ManagedMemory(AutoFreePointer):
19751991

19761992
__cuda_memory__ = True
19771993

1978-
def __init__(self, pointer, size, owner=None, finalizer=None):
1994+
def __init__(self, context, pointer, size, owner=None, finalizer=None):
19791995
self.owned = owner
19801996
devptr = pointer
1981-
super().__init__(devptr, size, finalizer=finalizer)
1997+
super().__init__(context, devptr, size, finalizer=finalizer)
19821998

19831999
# For buffer interface
19842000
self._buflen_ = self.size
@@ -2302,7 +2318,7 @@ def get_global_symbol(self, name):
23022318
driver.cuModuleGetGlobal(
23032319
byref(ptr), byref(size), self.handle, name.encode("utf8")
23042320
)
2305-
return MemoryPointer(ptr, size), size.value
2321+
return MemoryPointer(self.context, ptr, size), size.value
23062322

23072323

23082324
class CudaPythonModule(Module):
@@ -2312,7 +2328,7 @@ def get_function(self, name):
23122328

23132329
def get_global_symbol(self, name):
23142330
ptr, size = driver.cuModuleGetGlobal(self.handle, name.encode("utf8"))
2315-
return MemoryPointer(ptr, size), size
2331+
return MemoryPointer(self.context, ptr, size), size
23162332

23172333

23182334
FuncAttr = namedtuple(

numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,17 @@ def dtor():
8787
dtor_invoked[0] += 1
8888

8989
# Ensure finalizer is called when pointer is deleted
90-
ptr = driver.MemoryPointer(pointer=fake_ptr, size=40, finalizer=dtor)
90+
ptr = driver.MemoryPointer(
91+
context=self.context, pointer=fake_ptr, size=40, finalizer=dtor
92+
)
9193
self.assertEqual(dtor_invoked[0], 0)
9294
del ptr
9395
self.assertEqual(dtor_invoked[0], 1)
9496

9597
# Ensure removing derived pointer doesn't call finalizer
96-
ptr = driver.MemoryPointer(pointer=fake_ptr, size=40, finalizer=dtor)
98+
ptr = driver.MemoryPointer(
99+
context=self.context, pointer=fake_ptr, size=40, finalizer=dtor
100+
)
97101
owned = ptr.own()
98102
del owned
99103
self.assertEqual(dtor_invoked[0], 1)

numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import ctypes
55
import numpy as np
6+
import weakref
67

78
from numba import cuda
89
from numba.cuda.core import config
@@ -57,9 +58,10 @@ def finalizer():
5758

5859
# We use an AutoFreePointer so that the finalizer will be run when
5960
# the reference count drops to zero.
61+
ctx = weakref.proxy(self.context)
6062
ptr = ctypes.c_void_p(alloc_count)
6163
return cuda.cudadrv.driver.AutoFreePointer(
62-
ptr, size, finalizer=finalizer
64+
ctx, ptr, size, finalizer=finalizer
6365
)
6466

6567
def initialize(self):

0 commit comments

Comments
 (0)