Skip to content

Commit 931d966

Browse files
authored
Merge branch 'main' into vk/dispatcher
2 parents 3833d50 + 5bf09fa commit 931d966

File tree

8 files changed

+92
-31
lines changed

8 files changed

+92
-31
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,19 @@ they are compiled for the appropriate compute capability.
3737
```
3838
cd testing
3939
# Optionally, build test binaries and point to their location for the test suite
40-
make
40+
make -j $(nproc)
4141
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`
4242
# Execute tests
4343
pytest -n auto -v
4444
```
4545

46+
Alternatively, you can use [pixi](https://pixi.sh/latest/installation/) to wrap all of that up for you:
47+
48+
```
49+
# run tests against CUDA 13
50+
pixi run -e cu13 test -n auto -v
51+
```
52+
4653

4754
Testing should discover the `numba.cuda` module from the `numba_cuda` package. You
4855
can check where `numba.cuda` files are being located by running

ci/coverage_report.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ python -m pip install \
2020
rapids-logger "Build test binaries"
2121
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
2222
pushd $NUMBA_CUDA_TEST_BIN_DIR
23-
make
23+
make -j $(nproc)
2424

2525
rapids-logger "Check GPU usage"
2626
nvidia-smi

ci/test_conda.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ nvidia-smi
6666
rapids-logger "Build test binaries"
6767
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
6868
pushd $NUMBA_CUDA_TEST_BIN_DIR
69-
make
69+
make -j $(nproc)
7070

7171
rapids-logger "Show Numba system info"
7272
python -m numba --sysinfo

ci/test_conda_ctypes_binding.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ nvidia-smi
5252
rapids-logger "Build test binaries"
5353
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
5454
pushd $NUMBA_CUDA_TEST_BIN_DIR
55-
make
55+
make -j $(nproc)
5656

5757
rapids-logger "Show Numba system info"
5858
python -m numba --sysinfo

ci/test_wheel.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ python -m pip install "${DEPENDENCIES[@]}"
2828
rapids-logger "Build tests"
2929
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
3030
pushd $NUMBA_CUDA_TEST_BIN_DIR
31-
make
31+
make -j $(nproc)
3232

3333
rapids-logger "Test importing numba.cuda"
3434
python -c "from numba import cuda"

ci/test_wheel_ctypes_binding.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ python -m pip install \
1919
rapids-logger "Copy and cd into test binaries dir"
2020
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
2121
pushd $NUMBA_CUDA_TEST_BIN_DIR
22-
# make
22+
# make -j $(nproc)
2323

2424
# Prevent the testsuite trying to use the test binaries
2525
unset NUMBA_CUDA_TEST_BIN_DIR

ci/test_wheel_deps_wheels.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ rapids-logger "Build test binaries"
1717

1818
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
1919
pushd $NUMBA_CUDA_TEST_BIN_DIR
20-
make
20+
make -j $(nproc)
2121

2222
rapids-logger "Check GPU usage"
2323
nvidia-smi

testing/Makefile

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: BSD-2-Clause
3+
.PHONY: all nrt_extern test_device_functions clean
4+
5+
.DEFAULT_GOAL := all
36

47
# Generates the input files used by the nvjitlink tests
58

@@ -45,28 +48,79 @@ OUTPUT_DIR := ./
4548

4649
NRT_INCLUDE_DIR := $(shell python -c "from numba.cuda.memory_management.nrt import get_include; print(get_include())")
4750

48-
all:
49-
@echo "GPU CC: $(GPU_CC)"
50-
@echo "Alternative CC: $(ALT_CC)"
51-
# Compile all test objects
52-
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
53-
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
54-
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
55-
nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions_multi.fatbin test_device_functions.cu
56-
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
57-
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
58-
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu
59-
60-
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/nrt_extern.cubin nrt_extern.cu -I$(NRT_INCLUDE_DIR)
61-
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/nrt_extern.fatbin nrt_extern.cu -I$(NRT_INCLUDE_DIR)
62-
nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $(OUTPUT_DIR)/nrt_extern_multi.fatbin nrt_extern.cu -I$(NRT_INCLUDE_DIR)
63-
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/nrt_extern.ptx nrt_extern.cu -I$(NRT_INCLUDE_DIR)
64-
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/nrt_extern.o nrt_extern.cu -I$(NRT_INCLUDE_DIR)
65-
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/nrt_extern.a nrt_extern.cu -I$(NRT_INCLUDE_DIR)
66-
67-
# Generate LTO-IR wrapped in a fatbin
68-
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir.o test_device_functions.cu
69-
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/nrt_extern.ltoir.o nrt_extern.cu -I$(NRT_INCLUDE_DIR)
51+
$(OUTPUT_DIR)/undefined_extern.cubin: undefined_extern.cu
52+
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $<
53+
54+
$(OUTPUT_DIR)/test_device_functions.cubin: test_device_functions.cu
55+
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $<
56+
57+
$(OUTPUT_DIR)/test_device_functions.fatbin: test_device_functions.cu
58+
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $@ $<
59+
60+
$(OUTPUT_DIR)/test_device_functions_multi.fatbin: test_device_functions.cu
61+
nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $@ $<
62+
63+
$(OUTPUT_DIR)/test_device_functions.ptx: test_device_functions.cu
64+
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $@ $<
65+
66+
$(OUTPUT_DIR)/test_device_functions.o: test_device_functions.cu
67+
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $@ $<
68+
69+
$(OUTPUT_DIR)/test_device_functions.a: test_device_functions.cu
70+
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $@ $<
71+
72+
$(OUTPUT_DIR)/test_device_functions.ltoir.o: test_device_functions.cu
73+
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $@ $<
74+
75+
$(OUTPUT_DIR)/test_device_functions.ltoir: test_device_functions.cu
7076
# Generate LTO-IR in a "raw" LTO-IR container
71-
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu
72-
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/nrt_extern.ltoir nrt_extern.cu --nrt
77+
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir $<
78+
79+
test_device_functions: $(OUTPUT_DIR)/test_device_functions.cubin \
80+
$(OUTPUT_DIR)/test_device_functions.fatbin \
81+
$(OUTPUT_DIR)/test_device_functions_multi.fatbin \
82+
$(OUTPUT_DIR)/test_device_functions.ptx \
83+
$(OUTPUT_DIR)/test_device_functions.o \
84+
$(OUTPUT_DIR)/test_device_functions.a \
85+
$(OUTPUT_DIR)/test_device_functions.ltoir.o \
86+
$(OUTPUT_DIR)/test_device_functions.ltoir
87+
88+
$(OUTPUT_DIR)/nrt_extern.cubin: nrt_extern.cu
89+
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
90+
91+
$(OUTPUT_DIR)/nrt_extern.fatbin: nrt_extern.cu
92+
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
93+
94+
$(OUTPUT_DIR)/nrt_extern_multi.fatbin: nrt_extern.cu
95+
nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
96+
97+
$(OUTPUT_DIR)/nrt_extern.ptx: nrt_extern.cu
98+
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
99+
100+
$(OUTPUT_DIR)/nrt_extern.o: nrt_extern.cu
101+
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
102+
103+
$(OUTPUT_DIR)/nrt_extern.a: nrt_extern.cu
104+
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
105+
106+
$(OUTPUT_DIR)/nrt_extern.ltoir.o: nrt_extern.cu
107+
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR)
108+
109+
$(OUTPUT_DIR)/nrt_extern.ltoir: nrt_extern.cu
110+
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $@ $< --nrt
111+
112+
nrt_extern: $(OUTPUT_DIR)/nrt_extern.cubin \
113+
$(OUTPUT_DIR)/nrt_extern.fatbin \
114+
$(OUTPUT_DIR)/nrt_extern_multi.fatbin \
115+
$(OUTPUT_DIR)/nrt_extern.ptx \
116+
$(OUTPUT_DIR)/nrt_extern.o \
117+
$(OUTPUT_DIR)/nrt_extern.a \
118+
$(OUTPUT_DIR)/nrt_extern.ltoir.o \
119+
$(OUTPUT_DIR)/nrt_extern.ltoir
120+
121+
all: test_device_functions nrt_extern $(OUTPUT_DIR)/undefined_extern.cubin
122+
@>&2 echo "GPU CC: $(GPU_CC)"
123+
@>&2 echo "Alternative CC: $(ALT_CC)"
124+
125+
clean:
126+
rm -f *.cubin *.fatbin *.ptx *.o *.a *.ltoir

0 commit comments

Comments
 (0)