Skip to content

Commit 5d1bbd4

Browse files
gmarkallkkraus14bdice
authored
Support CUDA 13, drop support for CUDA 11 (#370)
Required changes: - Test third party libraries with CUDA 12 only - Update matrix: - Drop CUDA 11 - Change 12.8.0 to 12.9.1 - Add CUDA 13 - Only run apt commands on Ubuntu - this was coincidentally handled by CTK 11 vs 12, where 11 ran on Rocky Linux and 12 on Ubuntu - Remove `CTK_CURAND_VMAP` - this is no longer used, and the cuRAND installation is handled as a dependency of the numba-cuda package. - Updates documentation to drop references to CTK 11, and add references to CTK 13. - MVC is handled automatically, so most of the documentation for it is dropped. - Remove code relating to CTK 11 only. - Add handling for CUDA 13 paths when using the ctypes binding - Prevent `NVRTC()` objects being creating when using the NVIDIA cuda-python bindings, and replace uses of it with . The `NVRTC()` class directly opens the NVRTC DLL / SO, so it should not be used in conjunction with the cuda-python bindings. - In test binaries, the lowest CC we should generate code for is 7.5, because this is the minimum supported by CTK 13. - The CCCL include path has changed in CUDA 13, so we add that to the include path for test binary generation. --------- Co-authored-by: Keith Kraus <[email protected]> Co-authored-by: Bradley Dice <[email protected]>
1 parent c3ca630 commit 5d1bbd4

29 files changed

+140
-13092
lines changed

.github/workflows/pr.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ jobs:
146146
build_type: pull-request
147147
script: "ci/test_thirdparty.sh"
148148
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
149-
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
149+
# TODO: Enable for CUDA 13 when a supporting version of cuDF is available
150+
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
150151
build-docs:
151152
needs:
152153
- build-conda
@@ -162,4 +163,4 @@ jobs:
162163
build_type: pull-request
163164
script: "ci/coverage_report.sh"
164165
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
165-
matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.8.0" and .PY_VER == "3.13")) | .[0:1]'
166+
matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.11")) | .[0:1]'

ci/matrix.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ build-matrix:
33
simulator-matrix:
44
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.8.0', LINUX_VER: 'rockylinux8' }
55
test-matrix:
6-
- { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'rockylinux8', GPU: 'l4', DRIVER: 'earliest' }
7-
- { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', GPU: 'l4', DRIVER: 'latest' }
8-
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.11', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest' }
9-
- { CUDA_VER: '12.2.2', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest' }
10-
- { CUDA_VER: '12.8.0', ARCH: 'amd64', PY_VER: '3.13', LINUX_VER: 'ubuntu24.04', GPU: 'l4', DRIVER: 'latest' }
11-
- { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'rockylinux8', GPU: 'a100', DRIVER: 'earliest' }
12-
- { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', GPU: 'a100', DRIVER: 'latest' }
13-
- { CUDA_VER: '12.2.2', ARCH: 'arm64', PY_VER: '3.11', LINUX_VER: 'ubuntu22.04', GPU: 'a100', DRIVER: 'latest' }
14-
- { CUDA_VER: '12.2.2', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu22.04', GPU: 'a100', DRIVER: 'latest' }
15-
- { CUDA_VER: '12.8.0', ARCH: 'arm64', PY_VER: '3.13', LINUX_VER: 'ubuntu24.04', GPU: 'a100', DRIVER: 'latest' }
6+
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'rockylinux8', GPU: 'l4', DRIVER: 'earliest' }
7+
- { CUDA_VER: '12.2.2', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest' }
8+
- { CUDA_VER: '12.9.1', ARCH: 'amd64', PY_VER: '3.11', LINUX_VER: 'ubuntu24.04', GPU: 'l4', DRIVER: 'latest' }
9+
- { CUDA_VER: '13.0.0', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu22.04', GPU: 'l4', DRIVER: 'latest' }
10+
- { CUDA_VER: '13.0.0', ARCH: 'amd64', PY_VER: '3.13', LINUX_VER: 'ubuntu24.04', GPU: 'l4', DRIVER: 'latest' }
11+
- { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'rockylinux8', GPU: 'a100', DRIVER: 'earliest' }
12+
- { CUDA_VER: '12.2.2', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu22.04', GPU: 'a100', DRIVER: 'latest' }
13+
- { CUDA_VER: '12.9.1', ARCH: 'arm64', PY_VER: '3.11', LINUX_VER: 'ubuntu24.04', GPU: 'a100', DRIVER: 'latest' }
14+
- { CUDA_VER: '13.0.0', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu22.04', GPU: 'a100', DRIVER: 'latest' }
15+
- { CUDA_VER: '13.0.0', ARCH: 'arm64', PY_VER: '3.13', LINUX_VER: 'ubuntu24.04', GPU: 'a100', DRIVER: 'latest' }

ci/test_conda.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ set -euo pipefail
55

66
. /opt/conda/etc/profile.d/conda.sh
77

8-
if [ "${CUDA_VER%.*.*}" = "11" ]; then
9-
CTK_PACKAGES="cudatoolkit=11"
10-
else
11-
CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev cuda-cuobjdump"
8+
CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev cuda-cuobjdump"
9+
10+
DISTRO=`cat /etc/os-release | grep "^ID=" | awk 'BEGIN {FS="="} { print $2 }'`
11+
12+
if [ "$DISTRO" = "ubuntu" ]; then
1213
apt-get update
1314
apt remove --purge `dpkg --get-selections | grep cuda-nvvm | awk '{print $1}'` -y
1415
apt remove --purge `dpkg --get-selections | grep cuda-nvrtc | awk '{print $1}'` -y

ci/test_wheel_deps_wheels.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33

44
set -euo pipefail
55

6-
# cuRAND versions don't follow the toolkit versions - map toolkit versions to
7-
# appropriate cuRAND versions
8-
declare -A CTK_CURAND_VMAP=( ["12.8"]="10.3.9" ["12.9"]="10.3.10")
96
CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
107
CUDA_VER_MAJOR=${CUDA_VER%.*.*}
11-
CURAND_VER="${CTK_CURAND_VMAP[${CUDA_VER_MAJOR_MINOR}]}"
128

139
rapids-logger "Install wheel with test dependencies"
1410
package=$(realpath wheel/numba_cuda*.whl)

docs/source/reference/envvars.rst

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ target.
8484

8585
.. seealso::
8686

87-
The `Default Stream section
88-
<https://nvidia.github.io/cuda-python/release/11.6.0-notes.html#default-stream>`_
87+
The `Runtime Environment Variables section
88+
<https://nvidia.github.io/cuda-python/cuda-bindings/latest/environment_variables.html#runtime-environment-variables>`_
8989
in the NVIDIA Bindings documentation.
9090

9191
.. envvar:: NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS
@@ -119,13 +119,6 @@ target.
119119
``/usr/local/cuda/include``. On Windows, the default is
120120
``$env:CUDA_PATH\include``.
121121

122-
.. envvar:: NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
123-
124-
Enable minor version compatibility for the CUDA driver. Requires the
125-
``cubinlinker`` and ``ptxcompiler`` packages to be installed. Provides minor
126-
version compatibility for driver versions less than 12.0.
127-
128-
129122
.. envvar:: NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS
130123

131124
A colon separated list of paths that Numba's NVRTC should search for when compiling

docs/source/user/cudapysupported.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,7 @@ The following built-in types support are inherited from CPU nopython mode.
134134
See :ref:`nopython built-in types <pysupported-builtin-types>`.
135135

136136
There is also some very limited support for character sequences (bytes and
137-
unicode strings) used in NumPy arrays. Note that this support can only be used
138-
with CUDA 11.2 onwards.
137+
unicode strings) used in NumPy arrays.
139138

140139
Built-in functions
141140
==================

docs/source/user/installation.rst

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,16 @@ Supported GPUs
1111
--------------
1212

1313
Numba supports all NVIDIA GPUs that are supported by the CUDA Toolkit it uses.
14-
Presently for CUDA 11 this ranges from Compute Capabilities 3.5 to 9.0, and for
15-
CUDA 12 this ranges from 5.0 to 12.1, depending on the exact installed version.
14+
Presently for CUDA 12 this ranges from Compute Capabilities 5.0 to 12.1
15+
depending on the exact installed version, and for CUDA 13 this ranges from 7.5
16+
to 12.1 (the latest as of CUDA 13.0).
1617

1718

1819
Supported CUDA Toolkits
1920
-----------------------
2021

2122
Numba-CUDA aims to support all minor versions of the two most recent CUDA
22-
Toolkit releases. Presently 11 and 12 are supported; CUDA 11.2 is the minimum
23-
required, because older releases (11.0 and 11.1) have a version of NVVM based on
24-
a previous and incompatible LLVM version.
23+
Toolkit releases. Presently 12 and 13 are supported.
2524

2625
For further information about version compatibility between toolkit and driver
2726
versions, refer to :ref:`minor-version-compatibility`.
@@ -30,23 +29,21 @@ versions, refer to :ref:`minor-version-compatibility`.
3029
Installation with a Python package manager
3130
==========================================
3231

33-
Conda users can install the CUDA Toolkit into a conda environment.
32+
Conda users can install the CUDA Toolkit into a conda environment::
3433

35-
For CUDA 12::
34+
$ conda install -c conda-forge numba-cuda "cuda-version=12"
3635

37-
$ conda install -c conda-forge numba-cuda "cuda-version>=12.0"
36+
Or for CUDA 13::
37+
38+
$ conda install -c conda-forge numba-cuda "cuda-version=13"
3839

3940
Alternatively, you can install all CUDA 12 dependencies from PyPI via ``pip``::
4041

4142
$ pip install numba-cuda[cu12]
4243

43-
For CUDA 11, ``cudatoolkit`` is required::
44-
45-
$ conda install -c conda-forge numba-cuda "cuda-version>=11.2,<12.0"
46-
47-
or::
44+
CUDA 13 dependencies can be installed via ``pip`` with::
4845

49-
$ pip install numba-cuda[cu11]
46+
$ pip install numba-cuda[cu13]
5047

5148
If you are not using Conda/pip or if you want to use a different version of CUDA
5249
toolkit, :ref:`cudatoolkit-lookup` describes how Numba searches for a CUDA toolkit.

docs/source/user/minor_version_compatibility.rst

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,68 +7,11 @@ CUDA `Minor Version Compatibility
77
<https://docs.nvidia.com/deploy/cuda-compatibility/index.html#minor-version-compatibility>`_
88
(MVC) enables the use of a newer CUDA Toolkit version than the CUDA version
99
supported by the driver, provided that the Toolkit and driver both have the same
10-
major version. For example, use of CUDA Toolkit 11.5 with CUDA driver 450 (CUDA
11-
version 11.0) is supported through MVC.
12-
13-
Numba supports MVC for CUDA 12 on Linux using the `nvjitlink` library.
14-
15-
Numba supports MVC for CUDA 11 on Linux using the external ``cubinlinker`` and
16-
``ptxcompiler`` packages, subject to the following limitations:
17-
18-
- Linking of archives is unsupported.
19-
- Cooperative Groups are unsupported, because they require an archive to be
20-
linked.
21-
22-
MVC is not supported on Windows.
23-
24-
25-
Installation
26-
------------
27-
28-
CUDA 11
29-
~~~~~~~
30-
31-
To use MVC support, the ``cubinlinker`` and ``ptxcompiler`` compiler packages
32-
must be installed from the appropriate channels. To install using conda, use:
33-
34-
.. code:: bash
35-
36-
conda install -c rapidsai -c conda-forge cubinlinker ptxcompiler
37-
38-
To install with pip, use the NVIDIA package index:
39-
40-
.. code:: bash
41-
42-
pip install --extra-index-url https://pypi.nvidia.com ptxcompiler-cu11 cubinlinker-cu11
43-
44-
CUDA 12
45-
~~~~~~~
46-
47-
For CUDA 12, MVC is provied by default through the ``nvjitlink`` package,
48-
which ``numba-cuda[cu12]`` depends on directly, so no additional installation
49-
steps are required.
50-
51-
Enabling MVC Support
52-
--------------------
53-
54-
CUDA 11
55-
~~~~~~~
56-
57-
MVC support is enabled by setting the environment variable:
58-
59-
.. code:: bash
60-
61-
export NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY=1
62-
63-
or by setting a configuration variable prior to using any CUDA functionality in
64-
Numba:
65-
66-
.. code:: python
67-
68-
from numba import config
69-
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = True
70-
10+
major version. For example, use of CUDA Toolkit 12.9 with CUDA driver 570 (CUDA
11+
version 12.8) is supported through MVC.
7112

13+
Numba supports MVC using the linker in the NVIDIA CUDA Python bindings, which
14+
uses ``nvjitlink`` to provide MVC.
7215

7316

7417
References
@@ -78,5 +21,3 @@ Further information about Minor Version Compatibility may be found in:
7821

7922
- The `CUDA Compatibility Guide
8023
<https://docs.nvidia.com/deploy/cuda-compatibility/index.html>`_.
81-
- The `README for ptxcompiler
82-
<https://github.com/rapidsai/ptxcompiler/blob/main/README.md>`_.

numba_cuda/numba/cuda/api.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from .cudadrv import devicearray, devices, driver
1111
from numba.core import config
1212
from numba.cuda.api_util import prepare_shape_strides_dtype
13-
from numba.cuda.cudadrv.runtime import get_version
1413

1514
# NDarray device helper
1615

@@ -99,13 +98,9 @@ def is_float16_supported():
9998
def is_bfloat16_supported():
10099
"""Whether bfloat16 are supported.
101100
102-
bfloat16 are only supported on devices with compute capability >= 8.0 and cuda version >= 12.0
101+
bfloat16 is only supported on devices with compute capability >= 8.0
103102
"""
104-
cuda_version = get_version()
105-
return current_context().device.supports_bfloat16 and cuda_version >= (
106-
12,
107-
0,
108-
)
103+
return current_context().device.supports_bfloat16
109104

110105

111106
@require_context

numba_cuda/numba/cuda/cuda_paths.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ def get_nvrtc_dso_path():
148148
# Check for each version of the NVRTC DLL, preferring the most
149149
# recent.
150150
versions = (
151-
"112" if IS_WIN32 else "11.2",
152151
"120" if IS_WIN32 else "12",
153152
"130" if IS_WIN32 else "13",
154153
)
@@ -303,16 +302,16 @@ def get_nvidia_nvvm_ctk():
303302

304303
# Assume the existence of NVVM in the conda env implies that a CUDA toolkit
305304
# conda package is installed.
305+
if IS_WIN32:
306+
# The path used on Windows
307+
libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
308+
else:
309+
# The path used on Linux is different to that on Windows
310+
libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
306311

307-
# First, try the location used on Linux and the Windows 11.x packages
308-
libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
309312
if not os.path.exists(libdir) or not os.path.isdir(libdir):
310-
# If that fails, try the location used for Windows 12.x packages
311-
libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
312-
if not os.path.exists(libdir) or not os.path.isdir(libdir):
313-
# If that doesn't exist either, assume we don't have the NVIDIA
314-
# conda package
315-
return
313+
# If the path doesn't exist, we didn't find the NVIDIA conda package
314+
return
316315

317316
paths = find_lib("nvvm", libdir=libdir)
318317
if not paths:
@@ -346,15 +345,8 @@ def get_nvidia_static_cudalib_ctk():
346345
if not nvvm_ctk:
347346
return
348347

349-
if IS_WIN32 and ("Library" not in nvvm_ctk):
350-
# Location specific to CUDA 11.x packages on Windows
351-
dirs = ("Lib", "x64")
352-
else:
353-
# Linux, or Windows with CUDA 12.x packages
354-
dirs = ("lib",)
355-
356348
env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
357-
return os.path.join(env_dir, *dirs)
349+
return os.path.join(env_dir, "lib")
358350

359351

360352
def get_cuda_home(*subdirs):

0 commit comments

Comments
 (0)