NVIDIA
diff --git a/‎.github/workflows/pr.yaml‎
Lines changed: 0 additions & 25 deletions b/‎.github/workflows/pr.yaml‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 1 deletion b/‎README.md‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎ci/coverage_report.sh‎
Lines changed: 1 addition & 1 deletion b/‎ci/coverage_report.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/test_conda.sh‎
Lines changed: 1 addition & 1 deletion b/‎ci/test_conda.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/test_conda_ctypes_binding.sh‎
Lines changed: 0 additions & 70 deletions b/‎ci/test_conda_ctypes_binding.sh‎
Lines changed: 0 additions & 70 deletions
diff --git a/‎ci/test_wheel.sh‎
Lines changed: 1 addition & 1 deletion b/‎ci/test_wheel.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/test_wheel_ctypes_binding.sh‎
Lines changed: 0 additions & 37 deletions b/‎ci/test_wheel_ctypes_binding.sh‎
Lines changed: 0 additions & 37 deletions
diff --git a/‎ci/test_wheel_deps_wheels.sh‎
Lines changed: 1 addition & 1 deletion b/‎ci/test_wheel_deps_wheels.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/reference/envvars.rst‎
Lines changed: 4 additions & 5 deletions b/‎docs/source/reference/envvars.rst‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎docs/source/user/bindings.rst‎
Lines changed: 12 additions & 23 deletions b/‎docs/source/user/bindings.rst‎
Lines changed: 12 additions & 23 deletions
@@ -19,13 +19,11 @@ jobs:
       - compute-matrix
       - build-conda
       - test-conda
-      - test-conda-ctypes-binding
       - test-simulator
       - build-wheels
       - build-wheels-windows
       - test-wheels-windows
       - test-wheels
-      - test-wheels-ctypes-binding
       - test-wheels-deps-wheels
       - test-thirdparty
       - build-docs
@@ -80,18 +78,6 @@ jobs:
       script: "ci/test_conda.sh"
       run_codecov: false
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-  test-conda-ctypes-binding:
-    needs:
-      - build-conda
-      - compute-matrix
-    uses: ./.github/workflows/conda-python-tests.yaml
-    with:
-      build_type: pull-request
-      script: "ci/test_conda_ctypes_binding.sh"
-      run_codecov: false
-      # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
-      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   test-simulator:
     needs:
       - build-conda
@@ -124,17 +110,6 @@ jobs:
       build_type: pull-request
       script: "ci/test_wheel.sh false"
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-  test-wheels-ctypes-binding:
-    needs:
-      - build-wheels
-      - compute-matrix
-    uses: ./.github/workflows/wheels-test.yaml
-    with:
-      build_type: pull-request
-      script: "ci/test_wheel_ctypes_binding.sh"
-      # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
-      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   test-wheels-deps-wheels:
     needs:
       - build-wheels
 
@@ -37,12 +37,19 @@ they are compiled for the appropriate compute capability.
 ```
 cd testing
 # Optionally, build test binaries and point to their location for the test suite
-make
+make -j $(nproc)
 export NUMBA_CUDA_TEST_BIN_DIR=`pwd`
 # Execute tests
 pytest -n auto -v
 ```
 
+Alternatively, you can use [pixi](https://pixi.sh/latest/installation/) to wrap all of that up for you:
+
+```
+# run tests against CUDA 13
+pixi run -e cu13 test -n auto -v
+```
+
 
 Testing should discover the `numba.cuda` module from the `numba_cuda` package. You
 can check where `numba.cuda` files are being located by running
 
@@ -20,7 +20,7 @@ python -m pip install \
 rapids-logger "Build test binaries"
 export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
 pushd $NUMBA_CUDA_TEST_BIN_DIR
-make
+make -j $(nproc)
 
 rapids-logger "Check GPU usage"
 nvidia-smi
 
@@ -66,7 +66,7 @@ nvidia-smi
 rapids-logger "Build test binaries"
 export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
 pushd $NUMBA_CUDA_TEST_BIN_DIR
-make
+make -j $(nproc)
 
 rapids-logger "Show Numba system info"
 python -m numba --sysinfo
 
@@ -28,7 +28,7 @@ python -m pip install "${DEPENDENCIES[@]}"
 rapids-logger "Build tests"
 export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
 pushd $NUMBA_CUDA_TEST_BIN_DIR
-make
+make -j $(nproc)
 
 rapids-logger "Test importing numba.cuda"
 python -c "from numba import cuda"
 
@@ -17,7 +17,7 @@ rapids-logger "Build test binaries"
 
 export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
 pushd $NUMBA_CUDA_TEST_BIN_DIR
-make
+make -j $(nproc)
 
 rapids-logger "Check GPU usage"
 nvidia-smi
 
@@ -103,12 +103,11 @@ target.
    Enable warnings if a kernel is launched with host memory which forces a copy to and
    from the device. This option is on by default (default value is 1).
 
-.. envvar:: NUMBA_CUDA_USE_NVIDIA_BINDING
+.. note::
 
-   When set to 1, Numba will attempt to use the `NVIDIA CUDA Python binding
-   <https://nvidia.github.io/cuda-python/>`_ to make calls to the driver API
-   instead of using its own ctypes binding. This defaults to 1 (on). Set to
-   0 to use the ctypes bindings.
+   Numba-CUDA always uses the NVIDIA CUDA Python bindings. The legacy ctypes
+   bindings and the ``NUMBA_CUDA_USE_NVIDIA_BINDING`` environment variable have
+   been removed.
 
 .. envvar:: NUMBA_CUDA_INCLUDE_PATH
 
 
@@ -5,25 +5,22 @@
 CUDA Bindings
 =============
 
-Numba supports two bindings to the CUDA Driver APIs: its own internal bindings
-based on ctypes, and the official `NVIDIA CUDA Python bindings
-<https://nvidia.github.io/cuda-python/>`_. Functionality is equivalent between
-the two bindings.
-
-The internal bindings are used by default. If the NVIDIA bindings are installed,
-then they can be used by setting the environment variable
-``NUMBA_CUDA_USE_NVIDIA_BINDING`` to ``1`` prior to the import of Numba. Once
-Numba has been imported, the selected binding cannot be changed.
+Numba-CUDA uses the official `NVIDIA CUDA Python bindings
+<https://nvidia.github.io/cuda-python/>`_ for all CUDA Driver interactions.
+Numba-CUDA previously provided its own internal ctypes-based bindings; the
+public APIs exposing those bindings are kept for compatibility, but if you
+need to interact directly with the CUDA Driver or other CUDA libraries we
+recommend using the `cuda-python <https://nvidia.github.io/cuda-python/>`_
+package directly.
 
 
 Per-Thread Default Streams
 --------------------------
 
 Responsibility for handling Per-Thread Default Streams (PTDS) is delegated to
-the NVIDIA bindings when they are in use. To use PTDS with the NVIDIA bindings,
-set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
-``1`` instead of Numba's environmnent variable
-:envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.
+the NVIDIA bindings. To use PTDS, set the environment variable
+``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to ``1`` instead of Numba's
+environment variable :envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.
 
 .. seealso::
 
@@ -35,13 +32,5 @@ set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
 Roadmap
 -------
 
-In Numba 0.56, the NVIDIA Bindings will be used by default, if they are
-installed.
-
-In future versions of Numba:
-
-- The internal bindings will be deprecated.
-- The internal bindings will be removed.
-
-At present, no specific release is planned for the deprecation or removal of
-the internal bindings.
+The ctypes-based internal bindings have been removed in favor of the NVIDIA
+bindings. Future work focuses on expanding usage of ``cuda.core`` APIs.