Skip to content

Commit aeceaef

Browse files
Backporting Features to v0.20, bump to v0.20.1 (#562)
- Relax the pinning to cuda-core to allow it floating across minor releases (#559) (645e46c) - Bump version to 0.20.1 (2e58567) - [test] Use numpy's tolerance for float16 (#491) (8bb46bc) --------- Co-authored-by: Michael Wang <[email protected]> Co-authored-by: Asher Mancinelli <[email protected]>
1 parent ed5d280 commit aeceaef

File tree

6 files changed

+31
-26
lines changed

6 files changed

+31
-26
lines changed

ci/test_wheel.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ echo "Package path: ${package}"
1313
DEPENDENCIES=(
1414
"${package}[test]"
1515
"cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*"
16-
"cuda-core==0.3.*"
16+
"cuda-core>=0.3.0,<1.0.0"
1717
)
1818

1919
# Constrain oldest supported dependencies for testing

conda/recipes/numba-cuda/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ requirements:
3030
- python
3131
- numba >=0.59.1
3232
- cuda-bindings >=12.9.1
33-
- cuda-core ==0.3.*
33+
- cuda-core >=0.3.0,<1.0.0
3434

3535
about:
3636
home: {{ project_urls["Homepage"] }}

numba_cuda/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.20.0
1+
0.20.1

numba_cuda/numba/cuda/testing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from typing import Iterable, Union
1818
from io import StringIO
1919
import unittest
20+
import numpy as np
2021

2122
if PYVERSION >= (3, 10):
2223
from filecheck.matcher import Matcher
@@ -44,6 +45,8 @@ class CUDATestCase(TestCase):
4445
matches FileCheck checks, and is not specific to CUDADispatcher.
4546
"""
4647

48+
FLOAT16_RTOL = np.finfo(np.float16).eps
49+
4750
def setUp(self):
4851
self._low_occupancy_warnings = config.CUDA_LOW_OCCUPANCY_WARNINGS
4952
self._warn_on_implicit_copy = config.CUDA_WARN_ON_IMPLICIT_COPY

numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ def test_hadd(self):
629629
arg1 = np.array([3.0], dtype=np.float16)
630630
arg2 = np.array([4.0], dtype=np.float16)
631631
compiled[1, 1](ary, arg1, arg2)
632-
np.testing.assert_allclose(ary[0], arg1 + arg2)
632+
np.testing.assert_allclose(ary[0], arg1 + arg2, rtol=self.FLOAT16_RTOL)
633633

634634
@skip_unless_cc_53
635635
def test_hadd_scalar(self):
@@ -639,7 +639,7 @@ def test_hadd_scalar(self):
639639
arg2 = np.float16(3.0)
640640
compiled[1, 1](ary, arg1, arg2)
641641
ref = arg1 + arg2
642-
np.testing.assert_allclose(ary[0], ref)
642+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
643643

644644
@skip_on_cudasim("Compilation unsupported in the simulator")
645645
@skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -657,7 +657,9 @@ def test_hfma(self):
657657
arg2 = np.array([3.0], dtype=np.float16)
658658
arg3 = np.array([4.0], dtype=np.float16)
659659
compiled[1, 1](ary, arg1, arg2, arg3)
660-
np.testing.assert_allclose(ary[0], arg1 * arg2 + arg3)
660+
np.testing.assert_allclose(
661+
ary[0], arg1 * arg2 + arg3, rtol=self.FLOAT16_RTOL
662+
)
661663

662664
@skip_unless_cc_53
663665
def test_hfma_scalar(self):
@@ -668,7 +670,7 @@ def test_hfma_scalar(self):
668670
arg3 = np.float16(4.0)
669671
compiled[1, 1](ary, arg1, arg2, arg3)
670672
ref = arg1 * arg2 + arg3
671-
np.testing.assert_allclose(ary[0], ref)
673+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
672674

673675
@skip_on_cudasim("Compilation unsupported in the simulator")
674676
@skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -687,7 +689,7 @@ def test_hsub(self):
687689
arg1 = np.array([3.0], dtype=np.float16)
688690
arg2 = np.array([4.0], dtype=np.float16)
689691
compiled[1, 1](ary, arg1, arg2)
690-
np.testing.assert_allclose(ary[0], arg1 - arg2)
692+
np.testing.assert_allclose(ary[0], arg1 - arg2, rtol=self.FLOAT16_RTOL)
691693

692694
@skip_unless_cc_53
693695
def test_hsub_scalar(self):
@@ -697,7 +699,7 @@ def test_hsub_scalar(self):
697699
arg2 = np.float16(1.57)
698700
compiled[1, 1](ary, arg1, arg2)
699701
ref = arg1 - arg2
700-
np.testing.assert_allclose(ary[0], ref)
702+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
701703

702704
@skip_on_cudasim("Compilation unsupported in the simulator")
703705
@skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -714,7 +716,7 @@ def test_hmul(self):
714716
arg1 = np.array([3.0], dtype=np.float16)
715717
arg2 = np.array([4.0], dtype=np.float16)
716718
compiled[1, 1](ary, arg1, arg2)
717-
np.testing.assert_allclose(ary[0], arg1 * arg2)
719+
np.testing.assert_allclose(ary[0], arg1 * arg2, rtol=self.FLOAT16_RTOL)
718720

719721
@skip_unless_cc_53
720722
def test_hmul_scalar(self):
@@ -724,7 +726,7 @@ def test_hmul_scalar(self):
724726
arg2 = np.float16(1.57)
725727
compiled[1, 1](ary, arg1, arg2)
726728
ref = arg1 * arg2
727-
np.testing.assert_allclose(ary[0], ref)
729+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
728730

729731
@skip_on_cudasim("Compilation unsupported in the simulator")
730732
@skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -743,7 +745,7 @@ def test_hdiv_scalar(self):
743745

744746
compiled[1, 1](ary, arg1, arg2)
745747
ref = arg1 / arg2
746-
np.testing.assert_allclose(ary[0], ref)
748+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
747749

748750
@skip_unless_cc_53
749751
def test_hdiv(self):
@@ -754,15 +756,15 @@ def test_hdiv(self):
754756

755757
compiled.forall(ary.size)(ary, arry1, arry2)
756758
ref = arry1 / arry2
757-
np.testing.assert_allclose(ary, ref)
759+
np.testing.assert_allclose(ary, ref, rtol=self.FLOAT16_RTOL)
758760

759761
@skip_unless_cc_53
760762
def test_hneg(self):
761763
compiled = cuda.jit("void(f2[:], f2[:])")(simple_hneg)
762764
ary = np.zeros(1, dtype=np.float16)
763765
arg1 = np.array([3.0], dtype=np.float16)
764766
compiled[1, 1](ary, arg1)
765-
np.testing.assert_allclose(ary[0], -arg1)
767+
np.testing.assert_allclose(ary[0], -arg1, rtol=self.FLOAT16_RTOL)
766768

767769
@skip_unless_cc_53
768770
def test_hneg_scalar(self):
@@ -771,7 +773,7 @@ def test_hneg_scalar(self):
771773
arg1 = np.float16(3.1415926)
772774
compiled[1, 1](ary, arg1)
773775
ref = -arg1
774-
np.testing.assert_allclose(ary[0], ref)
776+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
775777

776778
@skip_on_cudasim("Compilation unsupported in the simulator")
777779
@skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -787,7 +789,7 @@ def test_habs(self):
787789
ary = np.zeros(1, dtype=np.float16)
788790
arg1 = np.array([-3.0], dtype=np.float16)
789791
compiled[1, 1](ary, arg1)
790-
np.testing.assert_allclose(ary[0], abs(arg1))
792+
np.testing.assert_allclose(ary[0], abs(arg1), rtol=self.FLOAT16_RTOL)
791793

792794
@skip_unless_cc_53
793795
def test_habs_scalar(self):
@@ -796,7 +798,7 @@ def test_habs_scalar(self):
796798
arg1 = np.float16(-3.1415926)
797799
compiled[1, 1](ary, arg1)
798800
ref = abs(arg1)
799-
np.testing.assert_allclose(ary[0], ref)
801+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
800802

801803
@skip_on_cudasim("Compilation unsupported in the simulator")
802804
@skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -849,15 +851,15 @@ def test_fp16_intrinsics_common(self):
849851
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
850852
kernel[1, N](r, x)
851853
expected = fn(x, dtype=np.float16)
852-
np.testing.assert_allclose(r, expected)
854+
np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
853855

854856
x2 = np.random.randint(1, 10, size=N).astype(np.float16)
855857
for kernel, fn in zip(exp_kernels, expected_exp_functions):
856858
with self.subTest(fn=fn):
857859
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
858860
kernel[1, N](r, x2)
859861
expected = fn(x2, dtype=np.float16)
860-
np.testing.assert_allclose(r, expected)
862+
np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
861863

862864
@skip_unless_cc_53
863865
def test_hexp10(self):
@@ -876,7 +878,7 @@ def hexp10_vectors(r, x):
876878

877879
# Run the kernel
878880
hexp10_vectors[1, N](r, x)
879-
np.testing.assert_allclose(r, 10**x)
881+
np.testing.assert_allclose(r, 10**x, rtol=self.FLOAT16_RTOL)
880882

881883
@skip_unless_cc_53
882884
def test_fp16_comparison(self):
@@ -948,10 +950,10 @@ def test_hmax(self):
948950
arg1 = np.float16(3.0)
949951
arg2 = np.float16(4.0)
950952
compiled[1, 1](ary, arg1, arg2)
951-
np.testing.assert_allclose(ary[0], arg2)
953+
np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
952954
arg1 = np.float16(5.0)
953955
compiled[1, 1](ary, arg1, arg2)
954-
np.testing.assert_allclose(ary[0], arg1)
956+
np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
955957

956958
@skip_unless_cc_53
957959
def test_hmin(self):
@@ -960,10 +962,10 @@ def test_hmin(self):
960962
arg1 = np.float16(3.0)
961963
arg2 = np.float16(4.0)
962964
compiled[1, 1](ary, arg1, arg2)
963-
np.testing.assert_allclose(ary[0], arg1)
965+
np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
964966
arg1 = np.float16(5.0)
965967
compiled[1, 1](ary, arg1, arg2)
966-
np.testing.assert_allclose(ary[0], arg2)
968+
np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
967969

968970
def test_cbrt_f32(self):
969971
compiled = cuda.jit("void(float32[:], float32)")(simple_cbrt)

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies = ["numba>=0.60.0"]
2525
[project.optional-dependencies]
2626
cu12 = [
2727
"cuda-bindings>=12.9.1,<13.0.0",
28-
"cuda-core==0.3.*",
28+
"cuda-core>=0.3.0,<1.0.0",
2929
"cuda-python==12.9.*", # supports all CTK 12.x
3030
"nvidia-cuda-nvcc-cu12", # for libNVVM
3131
"nvidia-cuda-runtime-cu12",
@@ -36,7 +36,7 @@ cu12 = [
3636
# TODO: Use cuda-toolkit package dependencies - e.g. cuda-toolkit[curand,nvvm,nvrtc]=13.*
3737
cu13 = [
3838
"cuda-bindings==13.*",
39-
"cuda-core==0.3.2,<0.4.0dev0",
39+
"cuda-core>=0.3.2,<1.0.0",
4040
"cuda-python==13.*",
4141
"nvidia-nvvm==13.*",
4242
"nvidia-cuda-runtime==13.*",

0 commit comments

Comments
 (0)