Examples of underflow (or subnormal) = nan
|
if (from_bits == 0 && kFromHasZero) { |
|
if constexpr (kToHasZero) { |
|
// Keep the sign, if `To` supports it. |
|
return from_sign_bit && kToIsSigned ? -To{} : To{}; |
|
} else { |
|
return kSaturate ? std::numeric_limits<To>::denorm_min() |
|
: Eigen::NumTraits<To>::quiet_NaN(); |
|
} |
|
} |
|
if constexpr (!kToIsSigned && kFromIsSigned) { |
|
if (from_sign_bit) { |
|
return kSaturate ? std::numeric_limits<To>::lowest() |
|
: Eigen::NumTraits<To>::quiet_NaN(); |
|
} |
|
} |
Examples of overflow = inf
|
// `From` supports larger values than `To`, we may overflow. |
|
if constexpr (std::make_pair(std::numeric_limits<To>::max_exponent, |
|
std::numeric_limits<To>::digits) < |
|
std::make_pair(std::numeric_limits<From>::max_exponent, |
|
std::numeric_limits<From>::digits)) { |
|
if (rounded_from_bits > aligned_highest) { |
|
// Overflowed values map to highest or infinity depending on kSaturate. |
|
to = kSaturate ? Eigen::NumTraits<To>::highest() |
|
: Eigen::NumTraits<To>::infinity(); |
|
} |
|
} |
Expected results
Both nan, or both inf.
nan > inf
|
constexpr inline bool(isnan)(const float8_e3m4& a) { |
|
return abs(a).rep() > std::numeric_limits<float8_e3m4>::infinity().rep(); |
|
} |
|
constexpr inline bool(isnan)(const float8_e4m3& a) { |
|
return abs(a).rep() > std::numeric_limits<float8_e4m3>::infinity().rep(); |
|
} |
So it is confusing to use nan for lower values.
Examples of underflow (or subnormal) =
nanml_dtypes/ml_dtypes/include/float8.h
Lines 1423 to 1431 in 1527272
ml_dtypes/ml_dtypes/include/float8.h
Lines 1433 to 1438 in 1527272
Examples of overflow =
infml_dtypes/ml_dtypes/include/float8.h
Lines 1562 to 1572 in 1527272
Expected results
Both
nan, or bothinf.nan>infml_dtypes/ml_dtypes/include/float8.h
Lines 1103 to 1105 in 1527272
ml_dtypes/ml_dtypes/include/float8.h
Lines 1111 to 1113 in 1527272
So it is confusing to use
nanfor lower values.