Skip to content

Commit b9e9109

Browse files
authored
Merge branch 'main' into vk/types
2 parents 0b6601c + 2567b28 commit b9e9109

File tree

13 files changed

+3051
-429
lines changed

13 files changed

+3051
-429
lines changed

numba_cuda/numba/cuda/api.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
gpus = devices.gpus
2222

2323

24-
@require_context
2524
def from_cuda_array_interface(desc, owner=None, sync=True):
2625
"""Create a DeviceNDArray from a cuda-array-interface description.
2726
The ``owner`` is the owner of the underlying memory.
@@ -49,9 +48,7 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
4948

5049
cudevptr_class = driver.binding.CUdeviceptr
5150
devptr = cudevptr_class(desc["data"][0])
52-
data = driver.MemoryPointer(
53-
current_context(), devptr, size=size, owner=owner
54-
)
51+
data = driver.MemoryPointer(devptr, size=size, owner=owner)
5552
stream_ptr = desc.get("stream", None)
5653
if stream_ptr is not None:
5754
stream = external_stream(stream_ptr)
@@ -75,12 +72,11 @@ def as_cuda_array(obj, sync=True):
7572
If ``sync`` is ``True``, then the imported stream (if present) will be
7673
synchronized.
7774
"""
78-
if not is_cuda_array(obj):
79-
raise TypeError("*obj* doesn't implement the cuda array interface.")
80-
else:
81-
return from_cuda_array_interface(
82-
obj.__cuda_array_interface__, owner=obj, sync=sync
83-
)
75+
if (
76+
interface := getattr(obj, "__cuda_array_interface__", None)
77+
) is not None:
78+
return from_cuda_array_interface(interface, owner=obj, sync=sync)
79+
raise TypeError("*obj* doesn't implement the cuda array interface.")
8480

8581

8682
def is_cuda_array(obj):

numba_cuda/numba/cuda/args.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,13 @@
66
memory transfers before & after the kernel call.
77
"""
88

9-
import abc
10-
119
from numba.cuda.typing.typeof import typeof, Purpose
1210

1311

14-
class ArgHint(metaclass=abc.ABCMeta):
12+
class ArgHint:
1513
def __init__(self, value):
1614
self.value = value
1715

18-
@abc.abstractmethod
1916
def to_device(self, retr, stream=0):
2017
"""
2118
:param stream: a stream to use when copying data
@@ -25,7 +22,6 @@ def to_device(self, retr, stream=0):
2522
:return: a value (usually an `DeviceNDArray`) to be passed to
2623
the kernel
2724
"""
28-
pass
2925

3026
@property
3127
def _numba_type_(self):

numba_cuda/numba/cuda/cudadrv/devicearray.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import numpy as np
1717

18-
import numba
1918
from numba.cuda.cext import _devicearray
2019
from numba.cuda.cudadrv import devices, dummyarray
2120
from numba.cuda.cudadrv import driver as _driver
@@ -90,32 +89,31 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
9089
if isinstance(strides, int):
9190
strides = (strides,)
9291
dtype = np.dtype(dtype)
93-
self.ndim = len(shape)
94-
if len(strides) != self.ndim:
92+
itemsize = dtype.itemsize
93+
self.ndim = ndim = len(shape)
94+
if len(strides) != ndim:
9595
raise ValueError("strides not match ndim")
96-
self._dummy = dummyarray.Array.from_desc(
97-
0, shape, strides, dtype.itemsize
96+
self._dummy = dummy = dummyarray.Array.from_desc(
97+
0, shape, strides, itemsize
9898
)
9999
# confirm that all elements of shape are ints
100100
if not all(isinstance(dim, (int, np.integer)) for dim in shape):
101101
raise TypeError("all elements of shape must be ints")
102-
self.shape = tuple(shape)
103-
self.strides = tuple(strides)
102+
self.shape = shape = dummy.shape
103+
self.strides = strides = dummy.strides
104104
self.dtype = dtype
105-
self.size = int(functools.reduce(operator.mul, self.shape, 1))
105+
self.size = size = dummy.size
106106
# prepare gpu memory
107-
if self.size > 0:
108-
self.alloc_size = _driver.memory_size_from_info(
109-
self.shape, self.strides, self.dtype.itemsize
107+
if size:
108+
self.alloc_size = alloc_size = _driver.memory_size_from_info(
109+
shape, strides, itemsize
110110
)
111111
if gpu_data is None:
112-
gpu_data = devices.get_context().memalloc(self.alloc_size)
112+
gpu_data = devices.get_context().memalloc(alloc_size)
113113
else:
114114
# Make NULL pointer for empty allocation
115115
null = _driver.binding.CUdeviceptr(0)
116-
gpu_data = _driver.MemoryPointer(
117-
context=devices.get_context(), pointer=null, size=0
118-
)
116+
gpu_data = _driver.MemoryPointer(pointer=null, size=0)
119117
self.alloc_size = 0
120118

121119
self.gpu_data = gpu_data
@@ -199,10 +197,11 @@ def _numba_type_(self):
199197
@property
200198
def device_ctypes_pointer(self):
201199
"""Returns the ctypes pointer to the GPU data buffer"""
202-
if self.gpu_data is None:
203-
return c_void_p(0)
204-
else:
200+
try:
201+
# apparently faster in the non-exceptional case
205202
return self.gpu_data.device_ctypes_pointer
203+
except AttributeError:
204+
return c_void_p(0)
206205

207206
@devices.require_context
208207
def copy_to_device(self, ary, stream=0):
@@ -901,8 +900,12 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
901900
"""
902901
if _driver.is_device_memory(obj):
903902
return obj, False
904-
elif hasattr(obj, "__cuda_array_interface__"):
905-
return numba.cuda.as_cuda_array(obj), False
903+
elif (
904+
interface := getattr(obj, "__cuda_array_interface__", None)
905+
) is not None:
906+
from numba.cuda.api import from_cuda_array_interface
907+
908+
return from_cuda_array_interface(interface, owner=obj), False
906909
else:
907910
if isinstance(obj, np.void):
908911
devobj = from_record_like(obj, stream=stream)

numba_cuda/numba/cuda/cudadrv/devices.py

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,14 @@
2121
from .driver import driver
2222

2323

24-
class _DeviceList(object):
25-
def __getattr__(self, attr):
26-
# First time looking at "lst" attribute.
27-
if attr == "lst":
28-
# Device list is not initialized.
29-
# Query all CUDA devices.
30-
numdev = driver.get_device_count()
31-
gpus = [
32-
_DeviceContextManager(driver.get_device(devid))
33-
for devid in range(numdev)
34-
]
35-
# Define "lst" to avoid re-initialization
36-
self.lst = gpus
37-
return gpus
38-
39-
# Other attributes
40-
return super(_DeviceList, self).__getattr__(attr)
24+
class _DeviceList:
25+
@property
26+
@functools.cache
27+
def lst(self):
28+
return [
29+
_DeviceContextManager(driver.get_device(devid))
30+
for devid in range(driver.get_device_count())
31+
]
4132

4233
def __getitem__(self, devnum):
4334
"""
@@ -79,6 +70,9 @@ class _DeviceContextManager(object):
7970

8071
def __init__(self, device):
8172
self._device = device
73+
# Forwarded directly, to avoid the performance overhead of
74+
# `__getattr__` and method lookup for a commonly accessed method
75+
self.get_primary_context = self._device.get_primary_context
8276

8377
def __getattr__(self, item):
8478
return getattr(self._device, item)
@@ -88,10 +82,10 @@ def __enter__(self):
8882

8983
def __exit__(self, exc_type, exc_val, exc_tb):
9084
# this will verify that we are popping the right device context.
91-
self._device.get_primary_context().pop()
85+
self.get_primary_context().pop()
9286

9387
def __str__(self):
94-
return "<Managed Device {self.id}>".format(self=self)
88+
return f"<Managed Device {self.id}>"
9589

9690

9791
class _Runtime(object):
@@ -147,7 +141,8 @@ def get_or_create_context(self, devnum):
147141
return attached_ctx
148142
else:
149143
devnum = int(devnum)
150-
return self._activate_context_for(devnum)
144+
with self._lock:
145+
return self._activate_context_for(devnum)
151146

152147
def _get_or_create_context_uncached(self, devnum):
153148
"""See also ``get_or_create_context(devnum)``.
@@ -166,28 +161,29 @@ def _get_or_create_context_uncached(self, devnum):
166161
ctx_handle = ctx.handle.value
167162
ac_ctx_handle = ac.context_handle.value
168163
if ctx_handle != ac_ctx_handle:
169-
msg = (
164+
raise RuntimeError(
170165
"Numba cannot operate on non-primary"
171-
" CUDA context {:x}"
166+
f" CUDA context {ac_ctx_handle:x}"
172167
)
173-
raise RuntimeError(msg.format(ac_ctx_handle))
174168
# Ensure the context is ready
175169
ctx.prepare_for_use()
176170
return ctx
177171

178172
def _activate_context_for(self, devnum):
179-
with self._lock:
180-
gpu = self.gpus[devnum]
181-
newctx = gpu.get_primary_context()
182-
# Detect unexpected context switch
183-
cached_ctx = self._get_attached_context()
184-
if cached_ctx is not None and cached_ctx is not newctx:
185-
raise RuntimeError("Cannot switch CUDA-context.")
186-
newctx.push()
187-
return newctx
173+
gpu = self.gpus[devnum]
174+
newctx = gpu.get_primary_context()
175+
# Detect unexpected context switch
176+
cached_ctx = self._get_attached_context()
177+
if cached_ctx is not None and cached_ctx is not newctx:
178+
raise RuntimeError("Cannot switch CUDA-context.")
179+
newctx.push()
180+
return newctx
188181

189182
def _get_attached_context(self):
190-
return getattr(self._tls, "attached_context", None)
183+
try:
184+
return self._tls.attached_context
185+
except AttributeError:
186+
return None
191187

192188
def _set_attached_context(self, ctx):
193189
self._tls.attached_context = ctx

0 commit comments

Comments
 (0)