Skip to content

Commit 327f908

Browse files
merge/resolve
2 parents 107669e + 9c31f59 commit 327f908

File tree

9 files changed

+89
-38
lines changed

9 files changed

+89
-38
lines changed

.github/actions/compute-matrix/action.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ runs:
2020
"
2121
2222
export TEST_MATRIX="
23-
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
24-
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
23+
- { CUDA_VER: '11.4.3', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
24+
- { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
2525
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
26-
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
27-
- { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
28-
- { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
26+
- { CUDA_VER: '12.5.1', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
27+
- { CUDA_VER: '11.4.3', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
28+
- { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
2929
- { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
30-
- { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
30+
- { CUDA_VER: '12.5.1', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
3131
"
3232
3333
echo "BUILD_MATRIX=$(

.github/workflows/conda-python-tests.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ jobs:
134134
run: ${{ inputs.script }}
135135
env:
136136
GH_TOKEN: ${{ github.token }}
137+
CUDA_VER: ${{ matrix.CUDA_VER }}
137138
- name: Generate test report
138139
uses: test-summary/[email protected]
139140
with:

.github/workflows/pr.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,10 @@ jobs:
6868
test-wheels:
6969
needs:
7070
- build-wheels
71-
- compute-matrix
7271
uses: ./.github/workflows/wheels-test.yaml
7372
with:
7473
build_type: pull-request
7574
script: "ci/test_wheel.sh"
76-
matrix_filter: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
7775
build-docs:
7876
needs:
7977
- build-conda

.github/workflows/wheels-test.yaml

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -79,23 +79,14 @@ jobs:
7979
#
8080
export MATRICES="
8181
pull-request:
82-
# amd64
83-
- { ARCH: 'amd64', PY_VER: '3.9', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' }
84-
# arm64
85-
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
86-
nightly:
87-
# amd64
88-
- { ARCH: 'amd64', PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
89-
- { ARCH: 'amd64', PY_VER: '3.9', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' }
90-
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' }
91-
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
92-
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
93-
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' }
94-
# arm64
95-
- { ARCH: 'arm64', PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
96-
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
97-
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' }
98-
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' }
82+
- { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
83+
- { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
84+
- { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
85+
- { CUDA_VER: '12.5.1', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' }
86+
- { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
87+
- { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
88+
- { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
89+
- { CUDA_VER: '12.5.1', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
9990
"
10091
10192
TEST_MATRIX=$(yq -n 'env(MATRICES) | .[strenv(BUILD_TYPE)]')

ci/test_conda.sh

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,20 @@ set -euo pipefail
55

66
. /opt/conda/etc/profile.d/conda.sh
77

8+
if [ "${CUDA_VER%.*.*}" = "11" ]; then
9+
CTK_PACKAGES="cuda-cccl cudatoolkit"
10+
else
11+
CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc"
12+
fi
13+
814
rapids-logger "Install testing dependencies"
915
# TODO: Replace with rapids-dependency-file-generator
1016
rapids-mamba-retry create -n test \
1117
c-compiler \
1218
cxx-compiler \
13-
cuda-cccl \
14-
cuda-nvcc-impl \
15-
cuda-nvrtc \
19+
${CTK_PACKAGES} \
1620
cuda-python \
17-
cuda-version=${RAPIDS_CUDA_VERSION%.*} \
21+
cuda-version=${CUDA_VER%.*} \
1822
make \
1923
psutil \
2024
pytest \

numba_cuda/numba/cuda/printimpl.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from numba.core.errors import NumbaWarning
55
from numba.core.imputils import Registry
66
from numba.cuda import nvvmutils
7+
from numba.cuda.types import Dim3
78
from warnings import warn
89

910
registry = Registry()
@@ -53,6 +54,26 @@ def const_print_impl(ty, context, builder, sigval):
5354
return rawfmt, [val]
5455

5556

57+
@print_item.register(Dim3)
58+
def dim3_print_impl(ty, context, builder, val):
59+
rawfmt = "(%d, %d, %d)"
60+
x = builder.extract_value(val, 0)
61+
y = builder.extract_value(val, 1)
62+
z = builder.extract_value(val, 2)
63+
return rawfmt, [x, y, z]
64+
65+
66+
@print_item.register(types.Boolean)
67+
def bool_print_impl(ty, context, builder, val):
68+
true_string = context.insert_string_const_addrspace(builder, "True")
69+
false_string = context.insert_string_const_addrspace(builder, "False")
70+
res_ptr = cgutils.alloca_once_value(builder, false_string)
71+
with builder.if_then(val):
72+
builder.store(true_string, res_ptr)
73+
rawfmt = "%s"
74+
return rawfmt, [builder.load(res_ptr)]
75+
76+
5677
@lower(print, types.VarArg(types.Any))
5778
def print_varargs(context, builder, sig, args):
5879
"""This function is a generic 'print' wrapper for arbitrary types.

numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ def test_used_list(self):
8787
self.assertIn('section "llvm.metadata"', used_line)
8888

8989
def test_nvvm_ir_verify_fail(self):
90+
if runtime.get_version() >= (12, 5):
91+
self.skipTest("Bad triple doesn't fail verify on CUDA >= 12.5")
9092
m = ir.Module("test_bad_ir")
9193
m.triple = "unknown-unknown-unknown"
9294
m.data_layout = NVVM().data_layout

numba_cuda/numba/cuda/tests/cudapy/test_atomics.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -558,18 +558,12 @@ def assertCorrectFloat64Atomics(self, kernel, shared=True):
558558
# Use the first (and only) definition
559559
asm = next(iter(kernel.inspect_asm().values()))
560560
if cc_X_or_above(6, 0):
561-
if cuda.runtime.get_version() > (12, 1):
562-
# CUDA 12.2 and above generate a more optimized reduction
563-
# instruction, because the result does not need to be
564-
# placed in a register.
565-
inst = 'red'
566-
else:
567-
inst = 'atom'
561+
inst = "(red|atom)"
568562

569563
if shared:
570-
inst = f'{inst}.shared'
564+
inst = f'{inst}\\.shared'
571565

572-
self.assertIn(f'{inst}.add.f64', asm)
566+
self.assertRegex(asm, f'{inst}.add.f64', asm)
573567
else:
574568
if shared:
575569
self.assertIn('atom.shared.cas.b64', asm)

numba_cuda/numba/cuda/tests/cudapy/test_print.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
2+
import numpy as np
23
import subprocess
34
import sys
45
import unittest
@@ -31,6 +32,21 @@ def printfloat():
3132
"""
3233

3334

35+
printbool_usecase = """\
36+
from numba import cuda
37+
38+
@cuda.jit
39+
def printbool(x):
40+
print(True)
41+
print(False)
42+
print(x == 0)
43+
44+
printbool[1, 1](0)
45+
printbool[1, 1](1)
46+
cuda.synchronize()
47+
"""
48+
49+
3450
printstring_usecase = """\
3551
from numba import cuda
3652
@@ -43,6 +59,19 @@ def printstring():
4359
cuda.synchronize()
4460
"""
4561

62+
63+
printdim3_usecase = """\
64+
from numba import cuda
65+
66+
@cuda.jit
67+
def printdim3():
68+
print(cuda.threadIdx)
69+
70+
printdim3[1, (2, 2, 2)]()
71+
cuda.synchronize()
72+
"""
73+
74+
4675
printempty_usecase = """\
4776
from numba import cuda
4877
@@ -95,6 +124,11 @@ def test_printfloat(self):
95124
expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
96125
self.assertIn(output.strip(), expected_cases)
97126

127+
def test_bool(self):
128+
output, _ = self.run_code(printbool_usecase)
129+
expected = "True\nFalse\nTrue\nTrue\nFalse\nFalse"
130+
self.assertEqual(output.strip(), expected)
131+
98132
def test_printempty(self):
99133
output, _ = self.run_code(printempty_usecase)
100134
self.assertEqual(output.strip(), "")
@@ -105,6 +139,12 @@ def test_string(self):
105139
expected = ['%d hop! 999' % i for i in range(3)]
106140
self.assertEqual(sorted(lines), expected)
107141

142+
def test_dim3(self):
143+
output, _ = self.run_code(printdim3_usecase)
144+
lines = [line.strip() for line in output.splitlines(True)]
145+
expected = [str(i) for i in np.ndindex(2, 2, 2)]
146+
self.assertEqual(sorted(lines), expected)
147+
108148
@skip_on_cudasim('cudasim can print unlimited output')
109149
def test_too_many_args(self):
110150
# Tests that we emit the format string and warn when there are more

0 commit comments

Comments
 (0)