diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-09-02 21:17:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-08 17:34:50 +0000 |
commit | 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch) | |
tree | 62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/Support/APFloat.cpp | |
parent | cf037972ea8863e2bab7461d77345367d2c1e054 (diff) | |
parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/APFloat.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Support/APFloat.cpp | 1142 |
1 files changed, 591 insertions, 551 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp index eae4fdb6c3d0..4a73739b5282 100644 --- a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp @@ -14,8 +14,10 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" @@ -51,209 +53,303 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi namespace llvm { - // How the nonfinite values Inf and NaN are represented. - enum class fltNonfiniteBehavior { - // Represents standard IEEE 754 behavior. A value is nonfinite if the - // exponent field is all 1s. In such cases, a value is Inf if the - // significand bits are all zero, and NaN otherwise - IEEE754, - - // Only the Float8E5M2 has this behavior. There is no Inf representation. A - // value is NaN if the exponent field and the mantissa field are all 1s. - // This behavior matches the FP8 E4M3 type described in - // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs - // as non-signalling, although the paper does not state whether the NaN - // values are signalling or not. - NanOnly, - }; +// How the nonfinite values Inf and NaN are represented. +enum class fltNonfiniteBehavior { + // Represents standard IEEE 754 behavior. A value is nonfinite if the + // exponent field is all 1s. In such cases, a value is Inf if the + // significand bits are all zero, and NaN otherwise + IEEE754, + + // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, + // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no + // representation for Inf, and operations that would ordinarily produce Inf + // produce NaN instead. + // The details of the NaN representation(s) in this form are determined by the + // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available + // encodings do not distinguish between signalling and quiet NaN. + NanOnly, +}; - /* Represents floating point arithmetic semantics. */ - struct fltSemantics { - /* The largest E such that 2^E is representable; this matches the - definition of IEEE 754. */ - APFloatBase::ExponentType maxExponent; +// How NaN values are represented. This is curently only used in combination +// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE +// while having IEEE non-finite behavior is liable to lead to unexpected +// results. +enum class fltNanEncoding { + // Represents the standard IEEE behavior where a value is NaN if its + // exponent is all 1s and the significand is non-zero. + IEEE, + + // Represents the behavior in the Float8E4M3 floating point type where NaN is + // represented by having the exponent and mantissa set to all 1s. + // This behavior matches the FP8 E4M3 type described in + // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs + // as non-signalling, although the paper does not state whether the NaN + // values are signalling or not. + AllOnes, + + // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types + // where NaN is represented by a sign bit of 1 and all 0s in the exponent + // and mantissa (i.e. the negative zero encoding in a IEEE float). Since + // there is only one NaN value, it is treated as quiet NaN. This matches the + // behavior described in https://arxiv.org/abs/2206.02915 . + NegativeZero, +}; - /* The smallest E such that 2^E is a normalized number; this - matches the definition of IEEE 754. */ - APFloatBase::ExponentType minExponent; +/* Represents floating point arithmetic semantics. */ +struct fltSemantics { + /* The largest E such that 2^E is representable; this matches the + definition of IEEE 754. */ + APFloatBase::ExponentType maxExponent; - /* Number of bits in the significand. This includes the integer - bit. */ - unsigned int precision; + /* The smallest E such that 2^E is a normalized number; this + matches the definition of IEEE 754. */ + APFloatBase::ExponentType minExponent; - /* Number of bits actually used in the semantics. */ - unsigned int sizeInBits; + /* Number of bits in the significand. This includes the integer + bit. */ + unsigned int precision; - fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; + /* Number of bits actually used in the semantics. */ + unsigned int sizeInBits; - // Returns true if any number described by this semantics can be precisely - // represented by the specified semantics. Does not take into account - // the value of fltNonfiniteBehavior. - bool isRepresentableBy(const fltSemantics &S) const { - return maxExponent <= S.maxExponent && minExponent >= S.minExponent && - precision <= S.precision; - } - }; + fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; - static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; - static const fltSemantics semBFloat = {127, -126, 8, 16}; - static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; - static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; - static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; - static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; - static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8, - fltNonfiniteBehavior::NanOnly}; - static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; - static const fltSemantics semBogus = {0, 0, 0, 0}; - - /* The IBM double-double semantics. Such a number consists of a pair of IEEE - 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, - (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. - Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent - to each other, and two 11-bit exponents. - - Note: we need to make the value different from semBogus as otherwise - an unsafe optimization may collapse both values to a single address, - and we heavily rely on them having distinct addresses. */ - static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; - - /* These are legacy semantics for the fallback, inaccrurate implementation of - IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the - operation. It's equivalent to having an IEEE number with consecutive 106 - bits of mantissa and 11 bits of exponent. - - It's not equivalent to IBM double-double. For example, a legit IBM - double-double, 1 + epsilon: - - 1 + epsilon = 1 + (1 >> 1076) - - is not representable by a consecutive 106 bits of mantissa. - - Currently, these semantics are used in the following way: - - semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> - (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> - semPPCDoubleDoubleLegacy -> IEEE operations - - We use bitcastToAPInt() to get the bit representation (in APInt) of the - underlying IEEEdouble, then use the APInt constructor to construct the - legacy IEEE float. - - TODO: Implement all operations in semPPCDoubleDouble, and delete these - semantics. */ - static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, - 53 + 53, 128}; - - const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { - switch (S) { - case S_IEEEhalf: - return IEEEhalf(); - case S_BFloat: - return BFloat(); - case S_IEEEsingle: - return IEEEsingle(); - case S_IEEEdouble: - return IEEEdouble(); - case S_IEEEquad: - return IEEEquad(); - case S_PPCDoubleDouble: - return PPCDoubleDouble(); - case S_Float8E5M2: - return Float8E5M2(); - case S_Float8E4M3FN: - return Float8E4M3FN(); - case S_x87DoubleExtended: - return x87DoubleExtended(); - } - llvm_unreachable("Unrecognised floating semantics"); - } - - APFloatBase::Semantics - APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { - if (&Sem == &llvm::APFloat::IEEEhalf()) - return S_IEEEhalf; - else if (&Sem == &llvm::APFloat::BFloat()) - return S_BFloat; - else if (&Sem == &llvm::APFloat::IEEEsingle()) - return S_IEEEsingle; - else if (&Sem == &llvm::APFloat::IEEEdouble()) - return S_IEEEdouble; - else if (&Sem == &llvm::APFloat::IEEEquad()) - return S_IEEEquad; - else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) - return S_PPCDoubleDouble; - else if (&Sem == &llvm::APFloat::Float8E5M2()) - return S_Float8E5M2; - else if (&Sem == &llvm::APFloat::Float8E4M3FN()) - return S_Float8E4M3FN; - else if (&Sem == &llvm::APFloat::x87DoubleExtended()) - return S_x87DoubleExtended; - else - llvm_unreachable("Unknown floating semantics"); + fltNanEncoding nanEncoding = fltNanEncoding::IEEE; + // Returns true if any number described by this semantics can be precisely + // represented by the specified semantics. Does not take into account + // the value of fltNonfiniteBehavior. + bool isRepresentableBy(const fltSemantics &S) const { + return maxExponent <= S.maxExponent && minExponent >= S.minExponent && + precision <= S.precision; } +}; - const fltSemantics &APFloatBase::IEEEhalf() { - return semIEEEhalf; - } - const fltSemantics &APFloatBase::BFloat() { - return semBFloat; - } - const fltSemantics &APFloatBase::IEEEsingle() { - return semIEEEsingle; - } - const fltSemantics &APFloatBase::IEEEdouble() { - return semIEEEdouble; - } - const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } - const fltSemantics &APFloatBase::PPCDoubleDouble() { - return semPPCDoubleDouble; - } - const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } - const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } - const fltSemantics &APFloatBase::x87DoubleExtended() { - return semX87DoubleExtended; - } - const fltSemantics &APFloatBase::Bogus() { return semBogus; } +static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; +static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; +static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; +static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; +static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; +static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; +static constexpr fltSemantics semFloat8E5M2FNUZ = { + 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; +static constexpr fltSemantics semFloat8E4M3FN = { + 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; +static constexpr fltSemantics semFloat8E4M3FNUZ = { + 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; +static constexpr fltSemantics semFloat8E4M3B11FNUZ = { + 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; +static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; +static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; +static constexpr fltSemantics semBogus = {0, 0, 0, 0}; + +/* The IBM double-double semantics. Such a number consists of a pair of IEEE + 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, + (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. + Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent + to each other, and two 11-bit exponents. + + Note: we need to make the value different from semBogus as otherwise + an unsafe optimization may collapse both values to a single address, + and we heavily rely on them having distinct addresses. */ +static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; + +/* These are legacy semantics for the fallback, inaccrurate implementation of + IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the + operation. It's equivalent to having an IEEE number with consecutive 106 + bits of mantissa and 11 bits of exponent. + + It's not equivalent to IBM double-double. For example, a legit IBM + double-double, 1 + epsilon: + + 1 + epsilon = 1 + (1 >> 1076) + + is not representable by a consecutive 106 bits of mantissa. + + Currently, these semantics are used in the following way: + + semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> + (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> + semPPCDoubleDoubleLegacy -> IEEE operations + + We use bitcastToAPInt() to get the bit representation (in APInt) of the + underlying IEEEdouble, then use the APInt constructor to construct the + legacy IEEE float. + + TODO: Implement all operations in semPPCDoubleDouble, and delete these + semantics. */ +static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, + 53 + 53, 128}; + +const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { + switch (S) { + case S_IEEEhalf: + return IEEEhalf(); + case S_BFloat: + return BFloat(); + case S_IEEEsingle: + return IEEEsingle(); + case S_IEEEdouble: + return IEEEdouble(); + case S_IEEEquad: + return IEEEquad(); + case S_PPCDoubleDouble: + return PPCDoubleDouble(); + case S_Float8E5M2: + return Float8E5M2(); + case S_Float8E5M2FNUZ: + return Float8E5M2FNUZ(); + case S_Float8E4M3FN: + return Float8E4M3FN(); + case S_Float8E4M3FNUZ: + return Float8E4M3FNUZ(); + case S_Float8E4M3B11FNUZ: + return Float8E4M3B11FNUZ(); + case S_FloatTF32: + return FloatTF32(); + case S_x87DoubleExtended: + return x87DoubleExtended(); + } + llvm_unreachable("Unrecognised floating semantics"); +} + +APFloatBase::Semantics +APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { + if (&Sem == &llvm::APFloat::IEEEhalf()) + return S_IEEEhalf; + else if (&Sem == &llvm::APFloat::BFloat()) + return S_BFloat; + else if (&Sem == &llvm::APFloat::IEEEsingle()) + return S_IEEEsingle; + else if (&Sem == &llvm::APFloat::IEEEdouble()) + return S_IEEEdouble; + else if (&Sem == &llvm::APFloat::IEEEquad()) + return S_IEEEquad; + else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) + return S_PPCDoubleDouble; + else if (&Sem == &llvm::APFloat::Float8E5M2()) + return S_Float8E5M2; + else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) + return S_Float8E5M2FNUZ; + else if (&Sem == &llvm::APFloat::Float8E4M3FN()) + return S_Float8E4M3FN; + else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) + return S_Float8E4M3FNUZ; + else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) + return S_Float8E4M3B11FNUZ; + else if (&Sem == &llvm::APFloat::FloatTF32()) + return S_FloatTF32; + else if (&Sem == &llvm::APFloat::x87DoubleExtended()) + return S_x87DoubleExtended; + else + llvm_unreachable("Unknown floating semantics"); +} + +const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } +const fltSemantics &APFloatBase::BFloat() { return semBFloat; } +const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } +const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } +const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } +const fltSemantics &APFloatBase::PPCDoubleDouble() { + return semPPCDoubleDouble; +} +const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } +const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } +const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } +const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } +const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { + return semFloat8E4M3B11FNUZ; +} +const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } +const fltSemantics &APFloatBase::x87DoubleExtended() { + return semX87DoubleExtended; +} +const fltSemantics &APFloatBase::Bogus() { return semBogus; } + +constexpr RoundingMode APFloatBase::rmNearestTiesToEven; +constexpr RoundingMode APFloatBase::rmTowardPositive; +constexpr RoundingMode APFloatBase::rmTowardNegative; +constexpr RoundingMode APFloatBase::rmTowardZero; +constexpr RoundingMode APFloatBase::rmNearestTiesToAway; + +/* A tight upper bound on number of parts required to hold the value + pow(5, power) is + + power * 815 / (351 * integerPartWidth) + 1 + + However, whilst the result may require only this many parts, + because we are multiplying two values to get it, the + multiplication may require an extra part with the excess part + being zero (consider the trivial case of 1 * 1, tcFullMultiply + requires two parts to hold the single-part result). So we add an + extra one to guarantee enough space whilst multiplying. */ +const unsigned int maxExponent = 16383; +const unsigned int maxPrecision = 113; +const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; +const unsigned int maxPowerOfFiveParts = + 2 + + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); + +unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { + return semantics.precision; +} +APFloatBase::ExponentType +APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { + return semantics.maxExponent; +} +APFloatBase::ExponentType +APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { + return semantics.minExponent; +} +unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { + return semantics.sizeInBits; +} +unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, + bool isSigned) { + // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need + // at least one more bit than the MaxExponent to hold the max FP value. + unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; + // Extra sign bit needed. + if (isSigned) + ++MinBitWidth; + return MinBitWidth; +} + +bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, + const fltSemantics &Dst) { + // Exponent range must be larger. + if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) + return false; - constexpr RoundingMode APFloatBase::rmNearestTiesToEven; - constexpr RoundingMode APFloatBase::rmTowardPositive; - constexpr RoundingMode APFloatBase::rmTowardNegative; - constexpr RoundingMode APFloatBase::rmTowardZero; - constexpr RoundingMode APFloatBase::rmNearestTiesToAway; + // If the mantissa is long enough, the result value could still be denormal + // with a larger exponent range. + // + // FIXME: This condition is probably not accurate but also shouldn't be a + // practical concern with existing types. + return Dst.precision >= Src.precision; +} - /* A tight upper bound on number of parts required to hold the value - pow(5, power) is +unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { + return Sem.sizeInBits; +} - power * 815 / (351 * integerPartWidth) + 1 +static constexpr APFloatBase::ExponentType +exponentZero(const fltSemantics &semantics) { + return semantics.minExponent - 1; +} - However, whilst the result may require only this many parts, - because we are multiplying two values to get it, the - multiplication may require an extra part with the excess part - being zero (consider the trivial case of 1 * 1, tcFullMultiply - requires two parts to hold the single-part result). So we add an - extra one to guarantee enough space whilst multiplying. */ - const unsigned int maxExponent = 16383; - const unsigned int maxPrecision = 113; - const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; - const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); +static constexpr APFloatBase::ExponentType +exponentInf(const fltSemantics &semantics) { + return semantics.maxExponent + 1; +} - unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { - return semantics.precision; - } - APFloatBase::ExponentType - APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { +static constexpr APFloatBase::ExponentType +exponentNaN(const fltSemantics &semantics) { + if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + if (semantics.nanEncoding == fltNanEncoding::NegativeZero) + return exponentZero(semantics); return semantics.maxExponent; } - APFloatBase::ExponentType - APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { - return semantics.minExponent; - } - unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { - return semantics.sizeInBits; - } - - unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { - return Sem.sizeInBits; + return semantics.maxExponent + 1; } /* A bunch of private, handy routines. */ @@ -262,9 +358,7 @@ static inline Error createError(const Twine &Err) { return make_error<StringError>(Err, inconvertibleErrorCode()); } -static inline unsigned int -partCountForBits(unsigned int bits) -{ +static constexpr inline unsigned int partCountForBits(unsigned int bits) { return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; } @@ -509,7 +603,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* If we ran off the end it is exactly zero or one-half, otherwise a little more. */ - if (hexDigit == -1U) + if (hexDigit == UINT_MAX) return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; else return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; @@ -526,7 +620,7 @@ lostFractionThroughTruncation(const APFloatBase::integerPart *parts, lsb = APInt::tcLSB(parts, partCount); - /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ + /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ if (bits <= lsb) return lfExactlyZero; if (bits == lsb + 1) @@ -798,10 +892,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { APInt fill_storage; if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { - // The only NaN representation is where the mantissa is all 1s, which is - // non-signalling. + // Finite-only types do not distinguish signalling and quiet NaN, so + // make them all signalling. SNaN = false; - fill_storage = APInt::getAllOnes(semantics->precision - 1); + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { + sign = true; + fill_storage = APInt::getZero(semantics->precision - 1); + } else { + fill_storage = APInt::getAllOnes(semantics->precision - 1); + } fill = &fill_storage; } @@ -832,6 +931,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { // conventionally, this is the next bit down from the QNaN bit. if (APInt::tcIsZero(significand, numParts)) APInt::tcSetBit(significand, QNaNBit - 1); + } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { + // The only NaN is a quiet NaN, and it has no bits sets in the significand. + // Do nothing. } else { // We always have to set the QNaN bit to make it a QNaN. APInt::tcSetBit(significand, QNaNBit); @@ -976,7 +1078,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const { } bool IEEEFloat::isLargest() const { - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes) { // The largest number by magnitude in our format will be the floating point // number with maximum exponent and with significand that is all ones except // the LSB. @@ -1418,7 +1521,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { exponent = semantics->maxExponent; tcSetLeastSignificantBits(significandParts(), partCount(), semantics->precision); - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes) APInt::tcClearBit(significandParts(), 0); return opInexact; @@ -1519,7 +1623,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, } } + // The all-ones values is an overflow if NaN is all ones. If NaN is + // represented by negative zero, then it is a valid finite value. if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes && exponent == semantics->maxExponent && isSignificandAllOnes()) return handleOverflow(rounding_mode); @@ -1530,8 +1637,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, underflow for exact results. */ if (lost_fraction == lfExactlyZero) { /* Canonicalize zeroes. */ - if (omsb == 0) + if (omsb == 0) { category = fcZero; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } return opOK; } @@ -1549,18 +1659,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, /* Renormalize by incrementing the exponent and shifting our significand right one. However if we already have the maximum exponent we overflow to infinity. */ - if (exponent == semantics->maxExponent) { - category = fcInfinity; - - return (opStatus) (opOverflow | opInexact); - } + if (exponent == semantics->maxExponent) + // Invoke overflow handling with a rounding mode that will guarantee + // that the result gets turned into the correct infinity representation. + // This is needed instead of just setting the category to infinity to + // account for 8-bit floating point types that have no inf, only NaN. + return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); shiftSignificandRight(1); return opInexact; } + // The all-ones values is an overflow if NaN is all ones. If NaN is + // represented by negative zero, then it is a valid finite value. if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes && exponent == semantics->maxExponent && isSignificandAllOnes()) return handleOverflow(rounding_mode); } @@ -1574,8 +1688,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, assert(omsb < semantics->precision); /* Canonicalize zeroes. */ - if (omsb == 0) + if (omsb == 0) { category = fcZero; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } /* The fcZero case is a denormal that underflowed to zero. */ return (opStatus) (opUnderflow | opInexact); @@ -1877,6 +1994,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { /* Change sign. */ void IEEEFloat::changeSign() { + // With NaN-as-negative-zero, neither NaN or negative zero can change + // their signs. + if (semantics->nanEncoding == fltNanEncoding::NegativeZero && + (isZero() || isNaN())) + return; /* Look mummy, this one's easy. */ sign = !sign; } @@ -1906,6 +2028,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, if (category == fcZero) { if (rhs.category != fcZero || (sign == rhs.sign) == subtract) sign = (rounding_mode == rmTowardNegative); + // NaN-in-negative-zero means zeros need to be normalized to +0. + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; } return fs; @@ -1931,6 +2056,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, sign ^= rhs.sign; fs = multiplySpecials(rhs); + if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; if (isFiniteNonZero()) { lostFraction lost_fraction = multiplySignificand(rhs); fs = normalize(rounding_mode, lost_fraction); @@ -1949,6 +2076,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, sign ^= rhs.sign; fs = divideSpecials(rhs); + if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; if (isFiniteNonZero()) { lostFraction lost_fraction = divideSignificand(rhs); fs = normalize(rounding_mode, lost_fraction); @@ -2057,8 +2186,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { } } - if (isZero()) + if (isZero()) { sign = origSign; // IEEE754 requires this + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + // But some 8-bit floats only have positive 0. + sign = false; + } + else sign ^= origSign; return fs; @@ -2083,8 +2217,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { fs = subtract(V, rmNearestTiesToEven); assert(fs==opOK); } - if (isZero()) + if (isZero()) { sign = origSign; // fmod requires this + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } return fs; } @@ -2112,8 +2249,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a positive zero unless rounding to minus infinity, except that adding two like-signed zeroes gives that zero. */ - if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) + if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { sign = (rounding_mode == rmTowardNegative); + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } } else { fs = multiplySpecials(multiplicand); @@ -2389,6 +2529,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, return is_signaling ? opInvalidOp : opOK; } + // If NaN is negative zero, we need to create a new NaN to avoid converting + // NaN to -Inf. + if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && + semantics->nanEncoding != fltNanEncoding::NegativeZero) + makeNaN(false, false); + *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; // For x87 extended precision, we want to make a NaN, not a special NaN if @@ -2410,6 +2556,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, makeNaN(false, sign); *losesInfo = true; fs = opInexact; + } else if (category == fcZero && + semantics->nanEncoding == fltNanEncoding::NegativeZero) { + // Negative zero loses info, but positive zero doesn't. + *losesInfo = + fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; + fs = *losesInfo ? opInexact : opOK; + // NaN is negative zero means -0 -> +0, which can lose information + sign = false; } else { *losesInfo = false; fs = opOK; @@ -2696,7 +2850,7 @@ IEEEFloat::convertFromHexadecimalString(StringRef s, } hex_value = hexDigitValue(*p); - if (hex_value == -1U) + if (hex_value == UINT_MAX) break; p++; @@ -2877,9 +3031,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { category = fcZero; fs = opOK; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; - /* Check whether the normalized exponent is high enough to overflow - max during the log-rebasing in the max-exponent check below. */ + /* Check whether the normalized exponent is high enough to overflow + max during the log-rebasing in the max-exponent check below. */ } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { fs = handleOverflow(rounding_mode); @@ -3337,201 +3493,121 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { return APInt(128, words); } -APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); - assert(partCount()==2); +template <const fltSemantics &S> +APInt IEEEFloat::convertIEEEFloatToAPInt() const { + assert(semantics == &S); + + constexpr int bias = -(S.minExponent - 1); + constexpr unsigned int trailing_significand_bits = S.precision - 1; + constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; + constexpr integerPart integer_bit = + integerPart{1} << (trailing_significand_bits % integerPartWidth); + constexpr uint64_t significand_mask = integer_bit - 1; + constexpr unsigned int exponent_bits = + S.sizeInBits - 1 - trailing_significand_bits; + static_assert(exponent_bits < 64); + constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; - uint64_t myexponent, mysignificand, mysignificand2; + uint64_t myexponent; + std::array<integerPart, partCountForBits(trailing_significand_bits)> + mysignificand; if (isFiniteNonZero()) { - myexponent = exponent+16383; //bias - mysignificand = significandParts()[0]; - mysignificand2 = significandParts()[1]; - if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = mysignificand2 = 0; - } else if (category==fcInfinity) { - myexponent = 0x7fff; - mysignificand = mysignificand2 = 0; + myexponent = exponent + bias; + std::copy_n(significandParts(), mysignificand.size(), + mysignificand.begin()); + if (myexponent == 1 && + !(significandParts()[integer_bit_part] & integer_bit)) + myexponent = 0; // denormal + } else if (category == fcZero) { + myexponent = ::exponentZero(S) + bias; + mysignificand.fill(0); + } else if (category == fcInfinity) { + if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + llvm_unreachable("semantics don't support inf!"); + } + myexponent = ::exponentInf(S) + bias; + mysignificand.fill(0); } else { assert(category == fcNaN && "Unknown category!"); - myexponent = 0x7fff; - mysignificand = significandParts()[0]; - mysignificand2 = significandParts()[1]; - } - - uint64_t words[2]; - words[0] = mysignificand; - words[1] = ((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7fff) << 48) | - (mysignificand2 & 0xffffffffffffLL); + myexponent = ::exponentNaN(S) + bias; + std::copy_n(significandParts(), mysignificand.size(), + mysignificand.begin()); + } + std::array<uint64_t, (S.sizeInBits + 63) / 64> words; + auto words_iter = + std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); + if constexpr (significand_mask != 0) { + // Clear the integer bit. + words[mysignificand.size() - 1] &= significand_mask; + } + std::fill(words_iter, words.end(), uint64_t{0}); + constexpr size_t last_word = words.size() - 1; + uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) + << ((S.sizeInBits - 1) % 64); + words[last_word] |= shifted_sign; + uint64_t shifted_exponent = (myexponent & exponent_mask) + << (trailing_significand_bits % 64); + words[last_word] |= shifted_exponent; + if constexpr (last_word == 0) { + return APInt(S.sizeInBits, words[0]); + } + return APInt(S.sizeInBits, words); +} - return APInt(128, words); +APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { + assert(partCount() == 2); + return convertIEEEFloatToAPInt<semIEEEquad>(); } APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); assert(partCount()==1); - - uint64_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent+1023; //bias - mysignificand = *significandParts(); - if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category==fcInfinity) { - myexponent = 0x7ff; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0x7ff; - mysignificand = *significandParts(); - } - - return APInt(64, ((((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7ff) << 52) | - (mysignificand & 0xfffffffffffffLL)))); + return convertIEEEFloatToAPInt<semIEEEdouble>(); } APInt IEEEFloat::convertFloatAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); assert(partCount()==1); - - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent+127; //bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x800000)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category==fcInfinity) { - myexponent = 0xff; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0xff; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | - (mysignificand & 0x7fffff))); + return convertIEEEFloatToAPInt<semIEEEsingle>(); } APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semBFloat); assert(partCount() == 1); - - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent + 127; // bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x80)) - myexponent = 0; // denormal - } else if (category == fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category == fcInfinity) { - myexponent = 0xff; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0xff; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | - (mysignificand & 0x7f))); + return convertIEEEFloatToAPInt<semBFloat>(); } APInt IEEEFloat::convertHalfAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); assert(partCount()==1); - - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent+15; //bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x400)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category==fcInfinity) { - myexponent = 0x1f; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0x1f; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | - (mysignificand & 0x3ff))); + return convertIEEEFloatToAPInt<semIEEEhalf>(); } APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2); assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E5M2>(); +} - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent + 15; // bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x4)) - myexponent = 0; // denormal - } else if (category == fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category == fcInfinity) { - myexponent = 0x1f; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0x1f; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) | - (mysignificand & 0x3))); +APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); } APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN); assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); +} - uint32_t myexponent, mysignificand; +APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); +} - if (isFiniteNonZero()) { - myexponent = exponent + 7; // bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x8)) - myexponent = 0; // denormal - } else if (category == fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category == fcInfinity) { - myexponent = 0xf; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0xf; - mysignificand = (uint32_t)*significandParts(); - } +APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); +} - return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) | - (mysignificand & 0x7))); +APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloatTF32>(); } // This function creates an APInt that is just a bit map of the floating @@ -3560,9 +3636,21 @@ APInt IEEEFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) return convertFloat8E5M2APFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) + return convertFloat8E5M2FNUZAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) return convertFloat8E4M3FNAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) + return convertFloat8E4M3FNUZAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) + return convertFloat8E4M3B11FNUZAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semFloatTF32) + return convertFloatTF32APFloatToAPInt(); + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && "unknown format!"); return convertF80LongDoubleAPFloatToAPInt(); @@ -3643,205 +3731,131 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { } } -void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { - uint64_t i1 = api.getRawData()[0]; - uint64_t i2 = api.getRawData()[1]; - uint64_t myexponent = (i2 >> 48) & 0x7fff; - uint64_t mysignificand = i1; - uint64_t mysignificand2 = i2 & 0xffffffffffffLL; +template <const fltSemantics &S> +void IEEEFloat::initFromIEEEAPInt(const APInt &api) { + assert(api.getBitWidth() == S.sizeInBits); + constexpr integerPart integer_bit = integerPart{1} + << ((S.precision - 1) % integerPartWidth); + constexpr uint64_t significand_mask = integer_bit - 1; + constexpr unsigned int trailing_significand_bits = S.precision - 1; + constexpr unsigned int stored_significand_parts = + partCountForBits(trailing_significand_bits); + constexpr unsigned int exponent_bits = + S.sizeInBits - 1 - trailing_significand_bits; + static_assert(exponent_bits < 64); + constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; + constexpr int bias = -(S.minExponent - 1); - initialize(&semIEEEquad); - assert(partCount()==2); - - sign = static_cast<unsigned int>(i2>>63); - if (myexponent==0 && - (mysignificand==0 && mysignificand2==0)) { - makeZero(sign); - } else if (myexponent==0x7fff && - (mysignificand==0 && mysignificand2==0)) { - makeInf(sign); - } else if (myexponent==0x7fff && - (mysignificand!=0 || mysignificand2 !=0)) { - category = fcNaN; - exponent = exponentNaN(); - significandParts()[0] = mysignificand; - significandParts()[1] = mysignificand2; - } else { - category = fcNormal; - exponent = myexponent - 16383; - significandParts()[0] = mysignificand; - significandParts()[1] = mysignificand2; - if (myexponent==0) // denormal - exponent = -16382; - else - significandParts()[1] |= 0x1000000000000LL; // integer bit + // Copy the bits of the significand. We need to clear out the exponent and + // sign bit in the last word. + std::array<integerPart, stored_significand_parts> mysignificand; + std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); + if constexpr (significand_mask != 0) { + mysignificand[mysignificand.size() - 1] &= significand_mask; } -} -void IEEEFloat::initFromDoubleAPInt(const APInt &api) { - uint64_t i = *api.getRawData(); - uint64_t myexponent = (i >> 52) & 0x7ff; - uint64_t mysignificand = i & 0xfffffffffffffLL; + // We assume the last word holds the sign bit, the exponent, and potentially + // some of the trailing significand field. + uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; + uint64_t myexponent = + (last_word >> (trailing_significand_bits % 64)) & exponent_mask; - initialize(&semIEEEdouble); - assert(partCount()==1); + initialize(&S); + assert(partCount() == mysignificand.size()); - sign = static_cast<unsigned int>(i>>63); - if (myexponent==0 && mysignificand==0) { - makeZero(sign); - } else if (myexponent==0x7ff && mysignificand==0) { - makeInf(sign); - } else if (myexponent==0x7ff && mysignificand!=0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 1023; - *significandParts() = mysignificand; - if (myexponent==0) // denormal - exponent = -1022; - else - *significandParts() |= 0x10000000000000LL; // integer bit - } -} + sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); -void IEEEFloat::initFromFloatAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 23) & 0xff; - uint32_t mysignificand = i & 0x7fffff; + bool all_zero_significand = + llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); - initialize(&semIEEEsingle); - assert(partCount()==1); + bool is_zero = myexponent == 0 && all_zero_significand; - sign = i >> 31; - if (myexponent==0 && mysignificand==0) { - makeZero(sign); - } else if (myexponent==0xff && mysignificand==0) { - makeInf(sign); - } else if (myexponent==0xff && mysignificand!=0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 127; //bias - *significandParts() = mysignificand; - if (myexponent==0) // denormal - exponent = -126; - else - *significandParts() |= 0x800000; // integer bit + if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { + if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { + makeInf(sign); + return; + } } -} -void IEEEFloat::initFromBFloatAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 7) & 0xff; - uint32_t mysignificand = i & 0x7f; + bool is_nan = false; - initialize(&semBFloat); - assert(partCount() == 1); + if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { + is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; + } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { + bool all_ones_significand = + std::all_of(mysignificand.begin(), mysignificand.end() - 1, + [](integerPart bits) { return bits == ~integerPart{0}; }) && + (!significand_mask || + mysignificand[mysignificand.size() - 1] == significand_mask); + is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; + } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { + is_nan = is_zero && sign; + } - sign = i >> 15; - if (myexponent == 0 && mysignificand == 0) { - makeZero(sign); - } else if (myexponent == 0xff && mysignificand == 0) { - makeInf(sign); - } else if (myexponent == 0xff && mysignificand != 0) { + if (is_nan) { category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 127; // bias - *significandParts() = mysignificand; - if (myexponent == 0) // denormal - exponent = -126; - else - *significandParts() |= 0x80; // integer bit + exponent = ::exponentNaN(S); + std::copy_n(mysignificand.begin(), mysignificand.size(), + significandParts()); + return; + } + + if (is_zero) { + makeZero(sign); + return; } + + category = fcNormal; + exponent = myexponent - bias; + std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); + if (myexponent == 0) // denormal + exponent = S.minExponent; + else + significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit } -void IEEEFloat::initFromHalfAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 10) & 0x1f; - uint32_t mysignificand = i & 0x3ff; +void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEquad>(api); +} - initialize(&semIEEEhalf); - assert(partCount()==1); +void IEEEFloat::initFromDoubleAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEdouble>(api); +} - sign = i >> 15; - if (myexponent==0 && mysignificand==0) { - makeZero(sign); - } else if (myexponent==0x1f && mysignificand==0) { - makeInf(sign); - } else if (myexponent==0x1f && mysignificand!=0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 15; //bias - *significandParts() = mysignificand; - if (myexponent==0) // denormal - exponent = -14; - else - *significandParts() |= 0x400; // integer bit - } +void IEEEFloat::initFromFloatAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEsingle>(api); } -void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 2) & 0x1f; - uint32_t mysignificand = i & 0x3; +void IEEEFloat::initFromBFloatAPInt(const APInt &api) { + initFromIEEEAPInt<semBFloat>(api); +} - initialize(&semFloat8E5M2); - assert(partCount() == 1); +void IEEEFloat::initFromHalfAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEhalf>(api); +} - sign = i >> 7; - if (myexponent == 0 && mysignificand == 0) { - makeZero(sign); - } else if (myexponent == 0x1f && mysignificand == 0) { - makeInf(sign); - } else if (myexponent == 0x1f && mysignificand != 0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 15; // bias - *significandParts() = mysignificand; - if (myexponent == 0) // denormal - exponent = -14; - else - *significandParts() |= 0x4; // integer bit - } +void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E5M2>(api); +} + +void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); } void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 3) & 0xf; - uint32_t mysignificand = i & 0x7; + initFromIEEEAPInt<semFloat8E4M3FN>(api); +} - initialize(&semFloat8E4M3FN); - assert(partCount() == 1); +void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); +} - sign = i >> 7; - if (myexponent == 0 && mysignificand == 0) { - makeZero(sign); - } else if (myexponent == 0xf && mysignificand == 7) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 7; // bias - *significandParts() = mysignificand; - if (myexponent == 0) // denormal - exponent = -6; - else - *significandParts() |= 0x8; // integer bit - } +void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); +} + +void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { + initFromIEEEAPInt<semFloatTF32>(api); } /// Treat api as containing the bits of a floating point number. @@ -3863,8 +3877,16 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { return initFromPPCDoubleDoubleAPInt(api); if (Sem == &semFloat8E5M2) return initFromFloat8E5M2APInt(api); + if (Sem == &semFloat8E5M2FNUZ) + return initFromFloat8E5M2FNUZAPInt(api); if (Sem == &semFloat8E4M3FN) return initFromFloat8E4M3FNAPInt(api); + if (Sem == &semFloat8E4M3FNUZ) + return initFromFloat8E4M3FNUZAPInt(api); + if (Sem == &semFloat8E4M3B11FNUZ) + return initFromFloat8E4M3B11FNUZAPInt(api); + if (Sem == &semFloatTF32) + return initFromFloatTF32APInt(api); llvm_unreachable(nullptr); } @@ -3893,7 +3915,8 @@ void IEEEFloat::makeLargest(bool Negative) { ? (~integerPart(0) >> NumUnusedHighBits) : 0; - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes) significand[0] &= ~integerPart(1); } @@ -4074,7 +4097,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, } // Ignore trailing binary zeros. - int trailingZeros = significand.countTrailingZeros(); + int trailingZeros = significand.countr_zero(); exp += trailingZeros; significand.lshrInPlace(trailingZeros); @@ -4321,6 +4344,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { APInt::tcSet(significandParts(), 0, partCount()); category = fcZero; exponent = 0; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; break; } @@ -4407,17 +4432,15 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { } APFloatBase::ExponentType IEEEFloat::exponentNaN() const { - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) - return semantics->maxExponent; - return semantics->maxExponent + 1; + return ::exponentNaN(*semantics); } APFloatBase::ExponentType IEEEFloat::exponentInf() const { - return semantics->maxExponent + 1; + return ::exponentInf(*semantics); } APFloatBase::ExponentType IEEEFloat::exponentZero() const { - return semantics->minExponent - 1; + return ::exponentZero(*semantics); } void IEEEFloat::makeInf(bool Negative) { @@ -4435,6 +4458,10 @@ void IEEEFloat::makeInf(bool Negative) { void IEEEFloat::makeZero(bool Negative) { category = fcZero; sign = Negative; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { + // Merge negative zero to positive because 0b10000...000 is used for NaN + sign = false; + } exponent = exponentZero(); APInt::tcSet(significandParts(), 0, partCount()); } @@ -4477,7 +4504,7 @@ IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; // Clamp to one past the range ends to let normalize handle overlflow. - X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); + X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); X.normalize(RoundingMode, lfExactlyZero); if (X.isNaN()) X.makeQuiet(); @@ -5114,6 +5141,19 @@ APFloat::APFloat(const fltSemantics &Semantics, StringRef S) consumeError(StatusOrErr.takeError()); } +FPClassTest APFloat::classify() const { + if (isZero()) + return isNegative() ? fcNegZero : fcPosZero; + if (isNormal()) + return isNegative() ? fcNegNormal : fcPosNormal; + if (isDenormal()) + return isNegative() ? fcNegSubnormal : fcPosSubnormal; + if (isInfinity()) + return isNegative() ? fcNegInf : fcPosInf; + assert(isNaN() && "Other class of FP constant"); + return isSignaling() ? fcSNan : fcQNan; +} + APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo) { if (&getSemantics() == &ToSemantics) { |