Skip to content

Commit 0a0eb35

Browse files
committed
Merge remote-tracking branch 'NVIDIA/main' into arr_reshape
2 parents 7928f2d + c04efe0 commit 0a0eb35

37 files changed

+1614
-78
lines changed

.github/workflows/pr.yaml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ jobs:
1919
- compute-matrix
2020
- build-conda
2121
- test-conda
22+
- test-conda-pynvjitlink
2223
- build-wheels
2324
- test-wheels
25+
- test-wheels-pynvjitlink
2426
- build-docs
2527
secrets: inherit
2628
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
@@ -57,6 +59,16 @@ jobs:
5759
script: "ci/test_conda.sh"
5860
run_codecov: false
5961
matrix_filter: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
62+
test-conda-pynvjitlink:
63+
needs:
64+
- build-conda
65+
- compute-matrix
66+
uses: ./.github/workflows/conda-python-tests.yaml
67+
with:
68+
build_type: pull-request
69+
script: "ci/test_conda_pynvjitlink.sh"
70+
run_codecov: false
71+
matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "12.5.1" and .PY_VER == "3.11"))
6072
build-wheels:
6173
needs:
6274
- compute-matrix
@@ -71,7 +83,15 @@ jobs:
7183
uses: ./.github/workflows/wheels-test.yaml
7284
with:
7385
build_type: pull-request
74-
script: "ci/test_wheel.sh"
86+
script: "ci/test_wheel.sh false"
87+
test-wheels-pynvjitlink:
88+
needs:
89+
- build-wheels
90+
uses: ./.github/workflows/wheels-test.yaml
91+
with:
92+
build_type: pull-request
93+
script: "ci/test_wheel_pynvjitlink.sh"
94+
matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "12.5.1" and .PY_VER == "3.12"))
7595
build-docs:
7696
needs:
7797
- build-conda

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ __pycache__
33
build
44
.*.swp
55
*.so
6+
numba_cuda/numba/cuda/tests/cudadrv/test_device_functions.*
7+
numba_cuda/numba/cuda/tests/cudadrv/undefined_extern.*

ci/test_conda.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ set -euo pipefail
88
if [ "${CUDA_VER%.*.*}" = "11" ]; then
99
CTK_PACKAGES="cudatoolkit"
1010
else
11-
CTK_PACKAGES="cuda-nvcc-impl cuda-nvrtc"
11+
CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc"
1212
fi
1313

1414
rapids-logger "Install testing dependencies"

ci/test_conda_pynvjitlink.sh

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
# Copyright (c) 2024, NVIDIA CORPORATION
3+
4+
set -euo pipefail
5+
6+
. /opt/conda/etc/profile.d/conda.sh
7+
8+
if [ "${CUDA_VER%.*.*}" = "11" ]; then
9+
CTK_PACKAGES="cudatoolkit"
10+
else
11+
CTK_PACKAGES="cuda-nvcc-impl cuda-nvrtc"
12+
fi
13+
14+
rapids-logger "Install testing dependencies"
15+
# TODO: Replace with rapids-dependency-file-generator
16+
rapids-mamba-retry create -n test \
17+
c-compiler \
18+
cxx-compiler \
19+
${CTK_PACKAGES} \
20+
cuda-python \
21+
cuda-version=${CUDA_VER%.*} \
22+
make \
23+
psutil \
24+
pytest \
25+
python=${RAPIDS_PY_VERSION}
26+
27+
# Temporarily allow unbound variables for conda activation.
28+
set +u
29+
conda activate test
30+
set -u
31+
32+
rapids-mamba-retry install -c `pwd`/conda-repo numba-cuda
33+
34+
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
35+
mkdir -p "${RAPIDS_TESTS_DIR}"
36+
pushd "${RAPIDS_TESTS_DIR}"
37+
38+
rapids-print-env
39+
40+
rapids-logger "Check GPU usage"
41+
nvidia-smi
42+
43+
rapids-logger "Show Numba system info"
44+
python -m numba --sysinfo
45+
46+
EXITCODE=0
47+
trap "EXITCODE=1" ERR
48+
set +e
49+
50+
51+
rapids-logger "Install pynvjitlink"
52+
set +u
53+
rapids-mamba-retry install -c rapidsai pynvjitlink
54+
set -u
55+
56+
rapids-logger "Build tests"
57+
58+
PY_SCRIPT="
59+
import numba_cuda
60+
root = numba_cuda.__file__.rstrip('__init__.py')
61+
test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
62+
print(test_dir)
63+
"
64+
65+
NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
66+
pushd $NUMBA_CUDA_TEST_BIN_DIR
67+
make
68+
popd
69+
70+
71+
rapids-logger "Run Tests"
72+
NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
73+
74+
popd
75+
76+
rapids-logger "Test script exiting with value: $EXITCODE"
77+
exit ${EXITCODE}

ci/test_wheel.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ rapids-logger "Install testing dependencies"
88
python -m pip install \
99
psutil \
1010
cuda-python \
11+
nvidia-cuda-cccl-cu12 \
1112
pytest
1213

1314
rapids-logger "Install wheel"

ci/test_wheel_pynvjitlink.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/bin/bash
2+
# Copyright (c) 2023-2024, NVIDIA CORPORATION
3+
4+
set -euo pipefail
5+
6+
rapids-logger "Install testing dependencies"
7+
# TODO: Replace with rapids-dependency-file-generator
8+
python -m pip install \
9+
psutil \
10+
cuda-python \
11+
pytest
12+
13+
rapids-logger "Install pynvjitlink"
14+
python -m pip install pynvjitlink-cu12
15+
16+
rapids-logger "Build tests"
17+
PY_SCRIPT="
18+
import numba_cuda
19+
root = numba_cuda.__file__.rstrip('__init__.py')
20+
test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
21+
print(test_dir)
22+
"
23+
24+
NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
25+
pushd $NUMBA_CUDA_TEST_BIN_DIR
26+
make
27+
popd
28+
29+
rapids-logger "Install wheel"
30+
package=$(realpath wheel/numba_cuda*.whl)
31+
echo "Package path: $package"
32+
python -m pip install $package
33+
34+
rapids-logger "Check GPU usage"
35+
nvidia-smi
36+
37+
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
38+
mkdir -p "${RAPIDS_TESTS_DIR}"
39+
pushd "${RAPIDS_TESTS_DIR}"
40+
41+
rapids-logger "Show Numba system info"
42+
python -m numba --sysinfo
43+
44+
rapids-logger "Run Tests"
45+
NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
46+
47+
popd

docs/source/reference/envvars.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,14 @@ target.
120120
``/usr/local/cuda/include``. On Windows, the default is
121121
``$env:CUDA_PATH\include``.
122122

123+
.. envvar:: NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
123124

125+
Enable minor version compatibility for the CUDA driver. Requires the
126+
``cubinlinker`` and ``ptxcompiler`` packages to be installed. Provides minor
127+
version compatibility for driver versions less than 12.0.
128+
129+
.. envvar:: NUMBA_CUDA_ENABLE_PYNVJITLINK
130+
131+
Use ``pynvjitlink`` for minor version compatibility. Requires the ``pynvjitlink``
132+
package to be installed. Provides minor version compatibility for driver versions
133+
greater than 12.0.

docs/source/user/minor_version_compatibility.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ MVC support is enabled by setting the environment variable:
6565

6666
.. code:: bash
6767
68-
export NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY=1
68+
export NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY=1 # CUDA 11
69+
export NUMBA_CUDA_ENABLE_PYNVJITLINK=1 # CUDA 12
6970
7071
7172
or by setting a configuration variable prior to using any CUDA functionality in
@@ -74,7 +75,8 @@ Numba:
7475
.. code:: python
7576
7677
from numba import config
77-
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = True
78+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = True # CUDA 11
79+
config.CUDA_ENABLE_PYNVJITLINK = True # CUDA 12
7880
7981
8082
References

numba_cuda/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.17
1+
0.0.18

numba_cuda/numba/cuda/codegen.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
5959
get_cufunc), which may be of different compute capabilities.
6060
"""
6161

62-
def __init__(self, codegen, name, entry_name=None, max_registers=None,
63-
nvvm_options=None):
62+
def __init__(
63+
self,
64+
codegen,
65+
name,
66+
entry_name=None,
67+
max_registers=None,
68+
lto=False,
69+
nvvm_options=None
70+
):
6471
"""
6572
codegen:
6673
Codegen object.
@@ -71,6 +78,8 @@ def __init__(self, codegen, name, entry_name=None, max_registers=None,
7178
kernel and not a device function.
7279
max_registers:
7380
The maximum register usage to aim for when linking.
81+
lto:
82+
Whether to enable link-time optimization.
7483
nvvm_options:
7584
Dict of options to pass to NVVM.
7685
"""
@@ -103,6 +112,7 @@ def __init__(self, codegen, name, entry_name=None, max_registers=None,
103112
self._cufunc_cache = {}
104113

105114
self._max_registers = max_registers
115+
self._lto = lto
106116
if nvvm_options is None:
107117
nvvm_options = {}
108118
self._nvvm_options = nvvm_options
@@ -178,7 +188,9 @@ def get_cubin(self, cc=None):
178188
if cubin:
179189
return cubin
180190

181-
linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
191+
linker = driver.Linker.new(
192+
max_registers=self._max_registers, cc=cc, lto=self._lto
193+
)
182194

183195
if linker.lto:
184196
ltoir = self.get_ltoir(cc=cc)

0 commit comments

Comments
 (0)