Skip to content

Commit a6fc0ee

Browse files
authored
Merge pull request #52 from awf/define-by-bias
Update to IEEE P3109 Release 3.
2 parents f7ffce9 + 96175c9 commit a6fc0ee

20 files changed

+13365
-3420
lines changed

docs/source/01-decode.ipynb

Lines changed: 455 additions & 451 deletions
Large diffs are not rendered by default.

docs/source/02-value-stats.ipynb

Lines changed: 844 additions & 491 deletions
Large diffs are not rendered by default.

docs/source/03-value-tables.ipynb

Lines changed: 11650 additions & 2294 deletions
Large diffs are not rendered by default.

docs/source/04-benchmark.ipynb

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 3,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -34,17 +34,24 @@
3434
},
3535
{
3636
"cell_type": "code",
37-
"execution_count": 4,
37+
"execution_count": 2,
3838
"metadata": {},
3939
"outputs": [
40+
{
41+
"name": "stderr",
42+
"output_type": "stream",
43+
"text": [
44+
"WARNING:2025-08-20 15:40:01,949:jax._src.xla_bridge:872: An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n"
45+
]
46+
},
4047
{
4148
"name": "stdout",
4249
"output_type": "stream",
4350
"text": [
44-
"GFloat scalar : 7510.22 nsec (25 runs at size 10000)\n",
45-
"GFloat vectorized, numpy arrays: 43.82 nsec (25 runs at size 1000000)\n",
46-
"GFloat vectorized, JAX JIT : 2.69 nsec (500 runs at size 1000000)\n",
47-
"ML_dtypes : 2.57 nsec (500 runs at size 1000000)\n"
51+
"GFloat scalar : 2605.38 nsec (50 runs at size 10000)\n",
52+
"GFloat vectorized, numpy arrays: 50.20 nsec (25 runs at size 1000000)\n",
53+
"GFloat vectorized, JAX JIT : 3.79 nsec (500 runs at size 1000000)\n",
54+
"ML_dtypes : 2.60 nsec (500 runs at size 1000000)\n"
4855
]
4956
}
5057
],
@@ -101,7 +108,7 @@
101108
],
102109
"metadata": {
103110
"kernelspec": {
104-
"display_name": ".venv",
111+
"display_name": "gfloat",
105112
"language": "python",
106113
"name": "python3"
107114
},
@@ -115,7 +122,7 @@
115122
"name": "python",
116123
"nbconvert_exporter": "python",
117124
"pygments_lexer": "ipython3",
118-
"version": "3.10.0"
125+
"version": "3.12.3"
119126
}
120127
},
121128
"nbformat": 4,

docs/source/05-stochastic-rounding.ipynb

Lines changed: 17 additions & 20 deletions
Large diffs are not rendered by default.

docs/source/index.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ formats in Python. Headline features:
2626
Provided Formats
2727
----------------
2828

29-
Formats are parameterized by the primary IEEE-754 parameters of:
29+
Formats are parameterized by the primary parameters of:
3030

3131
* Width in bits (k)
3232
* Precision (p)
33-
* Maximum exponent (emax)
33+
* Exponent bias (bias)
3434

3535
with additional fields defining the presence/encoding of:
3636

37-
* Infinities
37+
* Domain (Finite vs Extended)
38+
* Signed/unsigned
3839
* Not-a-number (NaN) values
3940
* Negative zero
4041
* Subnormal numbers
41-
* Signed/unsigned
4242
* Two's complement encoding (of the significand)
4343

4444
This allows an implementation of generic floating point encode/decode logic,

src/gfloat/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .round_ndarray import round_ndarray
1515
from .encode_ndarray import encode_ndarray
1616
from .decode_ndarray import decode_ndarray
17-
from .types import FloatClass, FloatValue, FormatInfo, RoundMode
17+
from .types import FloatClass, FloatValue, FormatInfo, Domain, RoundMode
1818

1919
# Don't automatically import from .formats.
2020
# If the user wants them in their namespace, they can explicitly import

src/gfloat/decode.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from .types import FloatClass, FloatValue, FormatInfo
5+
from .types import FloatClass, FloatValue, FormatInfo, Domain
66

77

88
def decode_float(fi: FormatInfo, i: int) -> FloatValue:
@@ -46,29 +46,32 @@ def decode_float(fi: FormatInfo, i: int) -> FloatValue:
4646
if fi.is_twos_complement and signbit:
4747
significand = (1 << t) - significand
4848

49-
expBias = fi.expBias
49+
bias = fi.bias
5050

5151
iszero = exp == 0 and significand == 0 and fi.has_zero
5252
issubnormal = fi.has_subnormals and (exp == 0) and (significand != 0)
5353
isnormal = not iszero and not issubnormal
5454
if iszero or issubnormal:
55-
expval = 1 - expBias
55+
expval = 1 - bias
5656
fsignificand = significand * 2**-t
5757
else:
58-
expval = exp - expBias
58+
expval = exp - bias
5959
fsignificand = 1.0 + significand * 2**-t
6060

6161
# Handle specials: Infs, NaN, -0, NaN_0
62-
signed_infinity = -np.inf if signbit else np.inf
6362

63+
# High NaNs
6464
fval = None
65-
# All-bits-special exponent (ABSE)
66-
if w > 0 and exp == 2**w - 1:
67-
min_i_with_nan = 2 ** (p - 1) - fi.num_high_nans
68-
if significand >= min_i_with_nan:
69-
fval = np.nan
70-
if fi.has_infs and significand == min_i_with_nan - 1:
71-
fval = signed_infinity
65+
max_positive_code = (1 << (k - fi.signBits)) - 1
66+
code_without_sign = i & max_positive_code
67+
if code_without_sign > max_positive_code - fi.num_high_nans:
68+
# Return nan, ignore sign
69+
fval = np.nan
70+
71+
# Infinities
72+
if fi.domain == Domain.Extended:
73+
if code_without_sign == max_positive_code - fi.num_high_nans:
74+
fval = -np.inf if signbit else np.inf
7275

7376
# Negative zero or NaN
7477
if iszero and i == signmask and not fi.is_twos_complement:

src/gfloat/decode_ndarray.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from types import ModuleType
44
import numpy as np
55
import numpy.typing as npt
6-
from .types import FormatInfo
6+
from .types import FormatInfo, Domain
77

88

99
def decode_ndarray(
@@ -47,16 +47,17 @@ def decode_ndarray(
4747
if fi.is_twos_complement:
4848
significand = np.where(sign < 0, (1 << t) - significand, significand)
4949

50-
expBias = fi.expBias
50+
bias = fi.bias
5151

5252
fval = np.zeros_like(codes, dtype=np.float64)
5353
isspecial = np.zeros_like(codes, dtype=bool)
5454

55-
if fi.has_infs:
55+
if fi.domain == Domain.Extended:
5656
fval = np.where(codes == fi.code_of_posinf, np.inf, fval)
5757
isspecial |= codes == fi.code_of_posinf
58-
fval = np.where(codes == fi.code_of_neginf, -np.inf, fval)
59-
isspecial |= codes == fi.code_of_neginf
58+
if fi.is_signed:
59+
fval = np.where(codes == fi.code_of_neginf, -np.inf, fval)
60+
isspecial |= codes == fi.code_of_neginf
6061

6162
if fi.num_nans > 0:
6263
code_is_nan = codes == fi.code_of_nan
@@ -76,7 +77,7 @@ def decode_ndarray(
7677
fval = np.where(iszero & (sign < 0), -0.0, fval)
7778

7879
issubnormal = (exp == 0) & (significand != 0) & fi.has_subnormals
79-
expval = np.where(issubnormal, 1 - expBias, exp - expBias)
80+
expval = np.where(issubnormal, 1 - bias, exp - bias)
8081
fsignificand = np.where(issubnormal, 0.0, 1.0) + np.ldexp(significand, -t)
8182

8283
# Normal/Subnormal/Zero case, other values will be overwritten

src/gfloat/encode.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import numpy as np
66

7-
from .types import FormatInfo
7+
from .types import FormatInfo, Domain
88

99

1010
def encode_float(fi: FormatInfo, v: float) -> int:
@@ -36,14 +36,14 @@ def encode_float(fi: FormatInfo, v: float) -> int:
3636

3737
# Overflow/underflow
3838
if v > fi.max:
39-
if fi.has_infs:
39+
if fi.domain == Domain.Extended:
4040
return fi.code_of_posinf
4141
if fi.num_nans > 0:
4242
return fi.code_of_nan
4343
return fi.code_of_max
4444

4545
if v < fi.min:
46-
if fi.has_infs:
46+
if fi.domain == Domain.Extended:
4747
return fi.code_of_neginf
4848
if fi.num_nans > 0:
4949
return fi.code_of_nan
@@ -65,12 +65,12 @@ def encode_float(fi: FormatInfo, v: float) -> int:
6565
exp -= 1
6666
# now sig in range [1, 2)
6767

68-
biased_exp = exp + fi.expBias
68+
biased_exp = exp + fi.bias
6969
if biased_exp < 1 and fi.has_subnormals:
7070
# subnormal
7171
sig *= 2.0 ** (biased_exp - 1)
7272
biased_exp = 0
73-
assert vpos == sig * 2 ** (1 - fi.expBias)
73+
assert vpos == sig * 2 ** (1 - fi.bias)
7474
else:
7575
if sig > 0:
7676
sig -= 1.0

0 commit comments

Comments
 (0)