src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-09-02 21:17:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-12-08 17:34:50 +0000
commit	06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree	62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/Support/APFloat.cpp
parent	cf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent	7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/APFloat.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Support/APFloat.cpp

1142

1 files changed, 591 insertions, 551 deletions

diff --git a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
index eae4fdb6c3d0..4a73739b5282 100644
--- a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp

@@ -14,8 +14,10 @@

#include "llvm/ADT/APFloat.h"

#include "llvm/ADT/APSInt.h"

#include "llvm/ADT/ArrayRef.h"

+#include "llvm/ADT/FloatingPointMode.h"

#include "llvm/ADT/FoldingSet.h"

#include "llvm/ADT/Hashing.h"

+#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/StringExtras.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/Config/llvm-config.h"

@@ -51,209 +53,303 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi

namespace llvm {

- // How the nonfinite values Inf and NaN are represented.

- enum class fltNonfiniteBehavior {

- // Represents standard IEEE 754 behavior. A value is nonfinite if the

- // exponent field is all 1s. In such cases, a value is Inf if the

- // significand bits are all zero, and NaN otherwise

- IEEE754,

- // Only the Float8E5M2 has this behavior. There is no Inf representation. A

- // value is NaN if the exponent field and the mantissa field are all 1s.

- // This behavior matches the FP8 E4M3 type described in

- // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs

- // as non-signalling, although the paper does not state whether the NaN

- // values are signalling or not.

- NanOnly,

- };

+// How the nonfinite values Inf and NaN are represented.

+enum class fltNonfiniteBehavior {

+ // Represents standard IEEE 754 behavior. A value is nonfinite if the

+ // exponent field is all 1s. In such cases, a value is Inf if the

+ // significand bits are all zero, and NaN otherwise

+ IEEE754,

+ // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,

+ // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no

+ // representation for Inf, and operations that would ordinarily produce Inf

+ // produce NaN instead.

+ // The details of the NaN representation(s) in this form are determined by the

+ // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available

+ // encodings do not distinguish between signalling and quiet NaN.

+ NanOnly,

+};

- /* Represents floating point arithmetic semantics. */

- struct fltSemantics {

- /* The largest E such that 2^E is representable; this matches the

- definition of IEEE 754. */

- APFloatBase::ExponentType maxExponent;

+// How NaN values are represented. This is curently only used in combination

+// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE

+// while having IEEE non-finite behavior is liable to lead to unexpected

+// results.

+enum class fltNanEncoding {

+ // Represents the standard IEEE behavior where a value is NaN if its

+ // exponent is all 1s and the significand is non-zero.

+ IEEE,

+ // Represents the behavior in the Float8E4M3 floating point type where NaN is

+ // represented by having the exponent and mantissa set to all 1s.

+ // This behavior matches the FP8 E4M3 type described in

+ // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs

+ // as non-signalling, although the paper does not state whether the NaN

+ // values are signalling or not.

+ AllOnes,

+ // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types

+ // where NaN is represented by a sign bit of 1 and all 0s in the exponent

+ // and mantissa (i.e. the negative zero encoding in a IEEE float). Since

+ // there is only one NaN value, it is treated as quiet NaN. This matches the

+ // behavior described in https://arxiv.org/abs/2206.02915 .

+ NegativeZero,

+};

- /* The smallest E such that 2^E is a normalized number; this

- matches the definition of IEEE 754. */

- APFloatBase::ExponentType minExponent;

+/* Represents floating point arithmetic semantics. */

+struct fltSemantics {

+ /* The largest E such that 2^E is representable; this matches the

+ definition of IEEE 754. */

+ APFloatBase::ExponentType maxExponent;

- /* Number of bits in the significand. This includes the integer

- bit. */

- unsigned int precision;

+ /* The smallest E such that 2^E is a normalized number; this

+ matches the definition of IEEE 754. */

+ APFloatBase::ExponentType minExponent;

- /* Number of bits actually used in the semantics. */

- unsigned int sizeInBits;

+ /* Number of bits in the significand. This includes the integer

+ bit. */

+ unsigned int precision;

- fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;

+ /* Number of bits actually used in the semantics. */

+ unsigned int sizeInBits;

- // Returns true if any number described by this semantics can be precisely

- // represented by the specified semantics. Does not take into account

- // the value of fltNonfiniteBehavior.

- bool isRepresentableBy(const fltSemantics &S) const {

- return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&

- precision <= S.precision;

- }

- };

+ fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;

- static const fltSemantics semIEEEhalf = {15, -14, 11, 16};

- static const fltSemantics semBFloat = {127, -126, 8, 16};

- static const fltSemantics semIEEEsingle = {127, -126, 24, 32};

- static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};

- static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};

- static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};

- static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,

- fltNonfiniteBehavior::NanOnly};

- static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};

- static const fltSemantics semBogus = {0, 0, 0, 0};

- /* The IBM double-double semantics. Such a number consists of a pair of IEEE

- 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,

- (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.

- Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent

- to each other, and two 11-bit exponents.

- Note: we need to make the value different from semBogus as otherwise

- an unsafe optimization may collapse both values to a single address,

- and we heavily rely on them having distinct addresses. */

- static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};

- /* These are legacy semantics for the fallback, inaccrurate implementation of

- IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the

- operation. It's equivalent to having an IEEE number with consecutive 106

- bits of mantissa and 11 bits of exponent.

- It's not equivalent to IBM double-double. For example, a legit IBM

- double-double, 1 + epsilon:

- 1 + epsilon = 1 + (1 >> 1076)

- is not representable by a consecutive 106 bits of mantissa.

- Currently, these semantics are used in the following way:

- semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->

- (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->

- semPPCDoubleDoubleLegacy -> IEEE operations

- We use bitcastToAPInt() to get the bit representation (in APInt) of the

- underlying IEEEdouble, then use the APInt constructor to construct the

- legacy IEEE float.

- TODO: Implement all operations in semPPCDoubleDouble, and delete these

- semantics. */

- static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,

- 53 + 53, 128};

- const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {

- switch (S) {

- case S_IEEEhalf:

- return IEEEhalf();

- case S_BFloat:

- return BFloat();

- case S_IEEEsingle:

- return IEEEsingle();

- case S_IEEEdouble:

- return IEEEdouble();

- case S_IEEEquad:

- return IEEEquad();

- case S_PPCDoubleDouble:

- return PPCDoubleDouble();

- case S_Float8E5M2:

- return Float8E5M2();

- case S_Float8E4M3FN:

- return Float8E4M3FN();

- case S_x87DoubleExtended:

- return x87DoubleExtended();

- }

- llvm_unreachable("Unrecognised floating semantics");

- }

- APFloatBase::Semantics

- APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {

- if (&Sem == &llvm::APFloat::IEEEhalf())

- return S_IEEEhalf;

- else if (&Sem == &llvm::APFloat::BFloat())

- return S_BFloat;

- else if (&Sem == &llvm::APFloat::IEEEsingle())

- return S_IEEEsingle;

- else if (&Sem == &llvm::APFloat::IEEEdouble())

- return S_IEEEdouble;

- else if (&Sem == &llvm::APFloat::IEEEquad())

- return S_IEEEquad;

- else if (&Sem == &llvm::APFloat::PPCDoubleDouble())

- return S_PPCDoubleDouble;

- else if (&Sem == &llvm::APFloat::Float8E5M2())

- return S_Float8E5M2;

- else if (&Sem == &llvm::APFloat::Float8E4M3FN())

- return S_Float8E4M3FN;

- else if (&Sem == &llvm::APFloat::x87DoubleExtended())

- return S_x87DoubleExtended;

- else

- llvm_unreachable("Unknown floating semantics");

+ fltNanEncoding nanEncoding = fltNanEncoding::IEEE;

+ // Returns true if any number described by this semantics can be precisely

+ // represented by the specified semantics. Does not take into account

+ // the value of fltNonfiniteBehavior.

+ bool isRepresentableBy(const fltSemantics &S) const {

+ return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&

+ precision <= S.precision;

}

+};

- const fltSemantics &APFloatBase::IEEEhalf() {

- return semIEEEhalf;

- }

- const fltSemantics &APFloatBase::BFloat() {

- return semBFloat;

- }

- const fltSemantics &APFloatBase::IEEEsingle() {

- return semIEEEsingle;

- }

- const fltSemantics &APFloatBase::IEEEdouble() {

- return semIEEEdouble;

- }

- const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }

- const fltSemantics &APFloatBase::PPCDoubleDouble() {

- return semPPCDoubleDouble;

- }

- const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }

- const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }

- const fltSemantics &APFloatBase::x87DoubleExtended() {

- return semX87DoubleExtended;

- }

- const fltSemantics &APFloatBase::Bogus() { return semBogus; }

+static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};

+static constexpr fltSemantics semBFloat = {127, -126, 8, 16};

+static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};

+static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};

+static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};

+static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};

+static constexpr fltSemantics semFloat8E5M2FNUZ = {

+ 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};

+static constexpr fltSemantics semFloat8E4M3FN = {

+ 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};

+static constexpr fltSemantics semFloat8E4M3FNUZ = {

+ 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};

+static constexpr fltSemantics semFloat8E4M3B11FNUZ = {

+ 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};

+static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};

+static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};

+static constexpr fltSemantics semBogus = {0, 0, 0, 0};

+/* The IBM double-double semantics. Such a number consists of a pair of IEEE

+ 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,

+ (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.

+ Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent

+ to each other, and two 11-bit exponents.

+ Note: we need to make the value different from semBogus as otherwise

+ an unsafe optimization may collapse both values to a single address,

+ and we heavily rely on them having distinct addresses. */

+static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};

+/* These are legacy semantics for the fallback, inaccrurate implementation of

+ IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the

+ operation. It's equivalent to having an IEEE number with consecutive 106

+ bits of mantissa and 11 bits of exponent.

+ It's not equivalent to IBM double-double. For example, a legit IBM

+ double-double, 1 + epsilon:

+ 1 + epsilon = 1 + (1 >> 1076)

+ is not representable by a consecutive 106 bits of mantissa.

+ Currently, these semantics are used in the following way:

+ semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->

+ (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->

+ semPPCDoubleDoubleLegacy -> IEEE operations

+ We use bitcastToAPInt() to get the bit representation (in APInt) of the

+ underlying IEEEdouble, then use the APInt constructor to construct the

+ legacy IEEE float.

+ TODO: Implement all operations in semPPCDoubleDouble, and delete these

+ semantics. */

+static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,

+ 53 + 53, 128};

+const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {

+ switch (S) {

+ case S_IEEEhalf:

+ return IEEEhalf();

+ case S_BFloat:

+ return BFloat();

+ case S_IEEEsingle:

+ return IEEEsingle();

+ case S_IEEEdouble:

+ return IEEEdouble();

+ case S_IEEEquad:

+ return IEEEquad();

+ case S_PPCDoubleDouble:

+ return PPCDoubleDouble();

+ case S_Float8E5M2:

+ return Float8E5M2();

+ case S_Float8E5M2FNUZ:

+ return Float8E5M2FNUZ();

+ case S_Float8E4M3FN:

+ return Float8E4M3FN();

+ case S_Float8E4M3FNUZ:

+ return Float8E4M3FNUZ();

+ case S_Float8E4M3B11FNUZ:

+ return Float8E4M3B11FNUZ();

+ case S_FloatTF32:

+ return FloatTF32();

+ case S_x87DoubleExtended:

+ return x87DoubleExtended();

+ }

+ llvm_unreachable("Unrecognised floating semantics");

+APFloatBase::Semantics

+APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {

+ if (&Sem == &llvm::APFloat::IEEEhalf())

+ return S_IEEEhalf;

+ else if (&Sem == &llvm::APFloat::BFloat())

+ return S_BFloat;

+ else if (&Sem == &llvm::APFloat::IEEEsingle())

+ return S_IEEEsingle;

+ else if (&Sem == &llvm::APFloat::IEEEdouble())

+ return S_IEEEdouble;

+ else if (&Sem == &llvm::APFloat::IEEEquad())

+ return S_IEEEquad;

+ else if (&Sem == &llvm::APFloat::PPCDoubleDouble())

+ return S_PPCDoubleDouble;

+ else if (&Sem == &llvm::APFloat::Float8E5M2())

+ return S_Float8E5M2;

+ else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())

+ return S_Float8E5M2FNUZ;

+ else if (&Sem == &llvm::APFloat::Float8E4M3FN())

+ return S_Float8E4M3FN;

+ else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())

+ return S_Float8E4M3FNUZ;

+ else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())

+ return S_Float8E4M3B11FNUZ;

+ else if (&Sem == &llvm::APFloat::FloatTF32())

+ return S_FloatTF32;

+ else if (&Sem == &llvm::APFloat::x87DoubleExtended())

+ return S_x87DoubleExtended;

+ else

+ llvm_unreachable("Unknown floating semantics");

+const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }

+const fltSemantics &APFloatBase::BFloat() { return semBFloat; }

+const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }

+const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }

+const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }

+const fltSemantics &APFloatBase::PPCDoubleDouble() {

+ return semPPCDoubleDouble;

+const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }

+const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }

+const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }

+const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }

+const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {

+ return semFloat8E4M3B11FNUZ;

+const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }

+const fltSemantics &APFloatBase::x87DoubleExtended() {

+ return semX87DoubleExtended;

+const fltSemantics &APFloatBase::Bogus() { return semBogus; }

+constexpr RoundingMode APFloatBase::rmNearestTiesToEven;

+constexpr RoundingMode APFloatBase::rmTowardPositive;

+constexpr RoundingMode APFloatBase::rmTowardNegative;

+constexpr RoundingMode APFloatBase::rmTowardZero;

+constexpr RoundingMode APFloatBase::rmNearestTiesToAway;

+/* A tight upper bound on number of parts required to hold the value

+ pow(5, power) is

+ power * 815 / (351 * integerPartWidth) + 1

+ However, whilst the result may require only this many parts,

+ because we are multiplying two values to get it, the

+ multiplication may require an extra part with the excess part

+ being zero (consider the trivial case of 1 * 1, tcFullMultiply

+ requires two parts to hold the single-part result). So we add an

+ extra one to guarantee enough space whilst multiplying. */

+const unsigned int maxExponent = 16383;

+const unsigned int maxPrecision = 113;

+const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;

+const unsigned int maxPowerOfFiveParts =

+ 2 +

+ ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));

+unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {

+ return semantics.precision;

+APFloatBase::ExponentType

+APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {

+ return semantics.maxExponent;

+APFloatBase::ExponentType

+APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {

+ return semantics.minExponent;

+unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {

+ return semantics.sizeInBits;

+unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,

+ bool isSigned) {

+ // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need

+ // at least one more bit than the MaxExponent to hold the max FP value.

+ unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;

+ // Extra sign bit needed.

+ if (isSigned)

+ ++MinBitWidth;

+ return MinBitWidth;

+bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,

+ const fltSemantics &Dst) {

+ // Exponent range must be larger.

+ if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)

+ return false;

- constexpr RoundingMode APFloatBase::rmNearestTiesToEven;

- constexpr RoundingMode APFloatBase::rmTowardPositive;

- constexpr RoundingMode APFloatBase::rmTowardNegative;

- constexpr RoundingMode APFloatBase::rmTowardZero;

- constexpr RoundingMode APFloatBase::rmNearestTiesToAway;

+ // If the mantissa is long enough, the result value could still be denormal

+ // with a larger exponent range.

+ //

+ // FIXME: This condition is probably not accurate but also shouldn't be a

+ // practical concern with existing types.

+ return Dst.precision >= Src.precision;

- /* A tight upper bound on number of parts required to hold the value

- pow(5, power) is

+unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {

+ return Sem.sizeInBits;

- power * 815 / (351 * integerPartWidth) + 1

+static constexpr APFloatBase::ExponentType

+exponentZero(const fltSemantics &semantics) {

+ return semantics.minExponent - 1;

- However, whilst the result may require only this many parts,

- because we are multiplying two values to get it, the

- multiplication may require an extra part with the excess part

- being zero (consider the trivial case of 1 * 1, tcFullMultiply

- requires two parts to hold the single-part result). So we add an

- extra one to guarantee enough space whilst multiplying. */

- const unsigned int maxExponent = 16383;

- const unsigned int maxPrecision = 113;

- const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;

- const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));

+static constexpr APFloatBase::ExponentType

+exponentInf(const fltSemantics &semantics) {

+ return semantics.maxExponent + 1;

- unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {

- return semantics.precision;

- }

- APFloatBase::ExponentType

- APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {

+static constexpr APFloatBase::ExponentType

+exponentNaN(const fltSemantics &semantics) {

+ if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {

+ if (semantics.nanEncoding == fltNanEncoding::NegativeZero)

+ return exponentZero(semantics);

return semantics.maxExponent;

}

- APFloatBase::ExponentType

- APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {

- return semantics.minExponent;

- }

- unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {

- return semantics.sizeInBits;

- }

- unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {

- return Sem.sizeInBits;

+ return semantics.maxExponent + 1;

}

/* A bunch of private, handy routines. */

@@ -262,9 +358,7 @@ static inline Error createError(const Twine &Err) {

return make_error<StringError>(Err, inconvertibleErrorCode());

}

-static inline unsigned int

-partCountForBits(unsigned int bits)

+static constexpr inline unsigned int partCountForBits(unsigned int bits) {

return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;

}

@@ -509,7 +603,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,

/* If we ran off the end it is exactly zero or one-half, otherwise

a little more. */

- if (hexDigit == -1U)

+ if (hexDigit == UINT_MAX)

return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;

else

return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;

@@ -526,7 +620,7 @@ lostFractionThroughTruncation(const APFloatBase::integerPart *parts,

lsb = APInt::tcLSB(parts, partCount);

- /* Note this is guaranteed true if bits == 0, or LSB == -1U. */

+ /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */

if (bits <= lsb)

return lfExactlyZero;

if (bits == lsb + 1)

@@ -798,10 +892,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {

APInt fill_storage;

if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {

- // The only NaN representation is where the mantissa is all 1s, which is

- // non-signalling.

+ // Finite-only types do not distinguish signalling and quiet NaN, so

+ // make them all signalling.

SNaN = false;

- fill_storage = APInt::getAllOnes(semantics->precision - 1);

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {

+ sign = true;

+ fill_storage = APInt::getZero(semantics->precision - 1);

+ } else {

+ fill_storage = APInt::getAllOnes(semantics->precision - 1);

+ }

fill = &fill_storage;

}

@@ -832,6 +931,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {

// conventionally, this is the next bit down from the QNaN bit.

if (APInt::tcIsZero(significand, numParts))

APInt::tcSetBit(significand, QNaNBit - 1);

+ } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {

+ // The only NaN is a quiet NaN, and it has no bits sets in the significand.

+ // Do nothing.

} else {

// We always have to set the QNaN bit to make it a QNaN.

APInt::tcSetBit(significand, QNaNBit);

@@ -976,7 +1078,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {

}

bool IEEEFloat::isLargest() const {

- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {

+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&

+ semantics->nanEncoding == fltNanEncoding::AllOnes) {

// The largest number by magnitude in our format will be the floating point

// number with maximum exponent and with significand that is all ones except

// the LSB.

@@ -1418,7 +1521,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {

exponent = semantics->maxExponent;

tcSetLeastSignificantBits(significandParts(), partCount(),

semantics->precision);

- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)

+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&

+ semantics->nanEncoding == fltNanEncoding::AllOnes)

APInt::tcClearBit(significandParts(), 0);

return opInexact;

@@ -1519,7 +1623,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,

}

+ // The all-ones values is an overflow if NaN is all ones. If NaN is

+ // represented by negative zero, then it is a valid finite value.

if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&

+ semantics->nanEncoding == fltNanEncoding::AllOnes &&

exponent == semantics->maxExponent && isSignificandAllOnes())

return handleOverflow(rounding_mode);

@@ -1530,8 +1637,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,

underflow for exact results. */

if (lost_fraction == lfExactlyZero) {

/* Canonicalize zeroes. */

- if (omsb == 0)

+ if (omsb == 0) {

category = fcZero;

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

+ }

return opOK;

}

@@ -1549,18 +1659,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,

/* Renormalize by incrementing the exponent and shifting our

significand right one. However if we already have the

maximum exponent we overflow to infinity. */

- if (exponent == semantics->maxExponent) {

- category = fcInfinity;

- return (opStatus) (opOverflow | opInexact);

- }

+ if (exponent == semantics->maxExponent)

+ // Invoke overflow handling with a rounding mode that will guarantee

+ // that the result gets turned into the correct infinity representation.

+ // This is needed instead of just setting the category to infinity to

+ // account for 8-bit floating point types that have no inf, only NaN.

+ return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);

shiftSignificandRight(1);

return opInexact;

}

+ // The all-ones values is an overflow if NaN is all ones. If NaN is

+ // represented by negative zero, then it is a valid finite value.

if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&

+ semantics->nanEncoding == fltNanEncoding::AllOnes &&

exponent == semantics->maxExponent && isSignificandAllOnes())

return handleOverflow(rounding_mode);

}

@@ -1574,8 +1688,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,

assert(omsb < semantics->precision);

/* Canonicalize zeroes. */

- if (omsb == 0)

+ if (omsb == 0) {

category = fcZero;

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

+ }

/* The fcZero case is a denormal that underflowed to zero. */

return (opStatus) (opUnderflow | opInexact);

@@ -1877,6 +1994,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {

/* Change sign. */

void IEEEFloat::changeSign() {

+ // With NaN-as-negative-zero, neither NaN or negative zero can change

+ // their signs.

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&

+ (isZero() || isNaN()))

+ return;

/* Look mummy, this one's easy. */

sign = !sign;

}

@@ -1906,6 +2028,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,

if (category == fcZero) {

if (rhs.category != fcZero || (sign == rhs.sign) == subtract)

sign = (rounding_mode == rmTowardNegative);

+ // NaN-in-negative-zero means zeros need to be normalized to +0.

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

}

return fs;

@@ -1931,6 +2056,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,

sign ^= rhs.sign;

fs = multiplySpecials(rhs);

+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

if (isFiniteNonZero()) {

lostFraction lost_fraction = multiplySignificand(rhs);

fs = normalize(rounding_mode, lost_fraction);

@@ -1949,6 +2076,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,

sign ^= rhs.sign;

fs = divideSpecials(rhs);

+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

if (isFiniteNonZero()) {

lostFraction lost_fraction = divideSignificand(rhs);

fs = normalize(rounding_mode, lost_fraction);

@@ -2057,8 +2186,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {

}

- if (isZero())

+ if (isZero()) {

sign = origSign; // IEEE754 requires this

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ // But some 8-bit floats only have positive 0.

+ sign = false;

+ }

else

sign ^= origSign;

return fs;

@@ -2083,8 +2217,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {

fs = subtract(V, rmNearestTiesToEven);

assert(fs==opOK);

}

- if (isZero())

+ if (isZero()) {

sign = origSign; // fmod requires this

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

+ }

return fs;

}

@@ -2112,8 +2249,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,

/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a

positive zero unless rounding to minus infinity, except that

adding two like-signed zeroes gives that zero. */

- if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)

+ if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {

sign = (rounding_mode == rmTowardNegative);

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

+ }

} else {

fs = multiplySpecials(multiplicand);

@@ -2389,6 +2529,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,

return is_signaling ? opInvalidOp : opOK;

}

+ // If NaN is negative zero, we need to create a new NaN to avoid converting

+ // NaN to -Inf.

+ if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&

+ semantics->nanEncoding != fltNanEncoding::NegativeZero)

+ makeNaN(false, false);

*losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;

// For x87 extended precision, we want to make a NaN, not a special NaN if

@@ -2410,6 +2556,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,

makeNaN(false, sign);

*losesInfo = true;

fs = opInexact;

+ } else if (category == fcZero &&

+ semantics->nanEncoding == fltNanEncoding::NegativeZero) {

+ // Negative zero loses info, but positive zero doesn't.

+ *losesInfo =

+ fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;

+ fs = *losesInfo ? opInexact : opOK;

+ // NaN is negative zero means -0 -> +0, which can lose information

+ sign = false;

} else {

*losesInfo = false;

fs = opOK;

@@ -2696,7 +2850,7 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,

}

hex_value = hexDigitValue(*p);

- if (hex_value == -1U)

+ if (hex_value == UINT_MAX)

break;

p++;

@@ -2877,9 +3031,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {

if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {

category = fcZero;

fs = opOK;

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

- /* Check whether the normalized exponent is high enough to overflow

- max during the log-rebasing in the max-exponent check below. */

+ /* Check whether the normalized exponent is high enough to overflow

+ max during the log-rebasing in the max-exponent check below. */

} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {

fs = handleOverflow(rounding_mode);

@@ -3337,201 +3493,121 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {

return APInt(128, words);

}

-APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);

- assert(partCount()==2);

+template <const fltSemantics &S>

+APInt IEEEFloat::convertIEEEFloatToAPInt() const {

+ assert(semantics == &S);

+ constexpr int bias = -(S.minExponent - 1);

+ constexpr unsigned int trailing_significand_bits = S.precision - 1;

+ constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;

+ constexpr integerPart integer_bit =

+ integerPart{1} << (trailing_significand_bits % integerPartWidth);

+ constexpr uint64_t significand_mask = integer_bit - 1;

+ constexpr unsigned int exponent_bits =

+ S.sizeInBits - 1 - trailing_significand_bits;

+ static_assert(exponent_bits < 64);

+ constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;

- uint64_t myexponent, mysignificand, mysignificand2;

+ uint64_t myexponent;

+ std::array<integerPart, partCountForBits(trailing_significand_bits)>

+ mysignificand;

if (isFiniteNonZero()) {

- myexponent = exponent+16383; //bias

- mysignificand = significandParts()[0];

- mysignificand2 = significandParts()[1];

- if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))

- myexponent = 0; // denormal

- } else if (category==fcZero) {

- myexponent = 0;

- mysignificand = mysignificand2 = 0;

- } else if (category==fcInfinity) {

- myexponent = 0x7fff;

- mysignificand = mysignificand2 = 0;

+ myexponent = exponent + bias;

+ std::copy_n(significandParts(), mysignificand.size(),

+ mysignificand.begin());

+ if (myexponent == 1 &&

+ !(significandParts()[integer_bit_part] & integer_bit))

+ myexponent = 0; // denormal

+ } else if (category == fcZero) {

+ myexponent = ::exponentZero(S) + bias;

+ mysignificand.fill(0);

+ } else if (category == fcInfinity) {

+ if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {

+ llvm_unreachable("semantics don't support inf!");

+ }

+ myexponent = ::exponentInf(S) + bias;

+ mysignificand.fill(0);

} else {

assert(category == fcNaN && "Unknown category!");

- myexponent = 0x7fff;

- mysignificand = significandParts()[0];

- mysignificand2 = significandParts()[1];

- }

- uint64_t words[2];

- words[0] = mysignificand;

- words[1] = ((uint64_t)(sign & 1) << 63) |

- ((myexponent & 0x7fff) << 48) |

- (mysignificand2 & 0xffffffffffffLL);

+ myexponent = ::exponentNaN(S) + bias;

+ std::copy_n(significandParts(), mysignificand.size(),

+ mysignificand.begin());

+ }

+ std::array<uint64_t, (S.sizeInBits + 63) / 64> words;

+ auto words_iter =

+ std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());

+ if constexpr (significand_mask != 0) {

+ // Clear the integer bit.

+ words[mysignificand.size() - 1] &= significand_mask;

+ }

+ std::fill(words_iter, words.end(), uint64_t{0});

+ constexpr size_t last_word = words.size() - 1;

+ uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)

+ << ((S.sizeInBits - 1) % 64);

+ words[last_word] |= shifted_sign;

+ uint64_t shifted_exponent = (myexponent & exponent_mask)

+ << (trailing_significand_bits % 64);

+ words[last_word] |= shifted_exponent;

+ if constexpr (last_word == 0) {

+ return APInt(S.sizeInBits, words[0]);

+ }

+ return APInt(S.sizeInBits, words);

- return APInt(128, words);

+APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {

+ assert(partCount() == 2);

+ return convertIEEEFloatToAPInt<semIEEEquad>();

}

APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);

assert(partCount()==1);

- uint64_t myexponent, mysignificand;

- if (isFiniteNonZero()) {

- myexponent = exponent+1023; //bias

- mysignificand = *significandParts();

- if (myexponent==1 && !(mysignificand & 0x10000000000000LL))

- myexponent = 0; // denormal

- } else if (category==fcZero) {

- myexponent = 0;

- mysignificand = 0;

- } else if (category==fcInfinity) {

- myexponent = 0x7ff;

- mysignificand = 0;

- } else {

- assert(category == fcNaN && "Unknown category!");

- myexponent = 0x7ff;

- mysignificand = *significandParts();

- }

- return APInt(64, ((((uint64_t)(sign & 1) << 63) |

- ((myexponent & 0x7ff) << 52) |

- (mysignificand & 0xfffffffffffffLL))));

+ return convertIEEEFloatToAPInt<semIEEEdouble>();

}

APInt IEEEFloat::convertFloatAPFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);

assert(partCount()==1);

- uint32_t myexponent, mysignificand;

- if (isFiniteNonZero()) {

- myexponent = exponent+127; //bias

- mysignificand = (uint32_t)*significandParts();

- if (myexponent == 1 && !(mysignificand & 0x800000))

- myexponent = 0; // denormal

- } else if (category==fcZero) {

- myexponent = 0;

- mysignificand = 0;

- } else if (category==fcInfinity) {

- myexponent = 0xff;

- mysignificand = 0;

- } else {

- assert(category == fcNaN && "Unknown category!");

- myexponent = 0xff;

- mysignificand = (uint32_t)*significandParts();

- }

- return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |

- (mysignificand & 0x7fffff)));

+ return convertIEEEFloatToAPInt<semIEEEsingle>();

}

APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics *)&semBFloat);

assert(partCount() == 1);

- uint32_t myexponent, mysignificand;

- if (isFiniteNonZero()) {

- myexponent = exponent + 127; // bias

- mysignificand = (uint32_t)*significandParts();

- if (myexponent == 1 && !(mysignificand & 0x80))

- myexponent = 0; // denormal

- } else if (category == fcZero) {

- myexponent = 0;

- mysignificand = 0;

- } else if (category == fcInfinity) {

- myexponent = 0xff;

- mysignificand = 0;

- } else {

- assert(category == fcNaN && "Unknown category!");

- myexponent = 0xff;

- mysignificand = (uint32_t)*significandParts();

- }

- return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |

- (mysignificand & 0x7f)));

+ return convertIEEEFloatToAPInt<semBFloat>();

}

APInt IEEEFloat::convertHalfAPFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);

assert(partCount()==1);

- uint32_t myexponent, mysignificand;

- if (isFiniteNonZero()) {

- myexponent = exponent+15; //bias

- mysignificand = (uint32_t)*significandParts();

- if (myexponent == 1 && !(mysignificand & 0x400))

- myexponent = 0; // denormal

- } else if (category==fcZero) {

- myexponent = 0;

- mysignificand = 0;

- } else if (category==fcInfinity) {

- myexponent = 0x1f;

- mysignificand = 0;

- } else {

- assert(category == fcNaN && "Unknown category!");

- myexponent = 0x1f;

- mysignificand = (uint32_t)*significandParts();

- }

- return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |

- (mysignificand & 0x3ff)));

+ return convertIEEEFloatToAPInt<semIEEEhalf>();

}

APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);

assert(partCount() == 1);

+ return convertIEEEFloatToAPInt<semFloat8E5M2>();

- uint32_t myexponent, mysignificand;

- if (isFiniteNonZero()) {

- myexponent = exponent + 15; // bias

- mysignificand = (uint32_t)*significandParts();

- if (myexponent == 1 && !(mysignificand & 0x4))

- myexponent = 0; // denormal

- } else if (category == fcZero) {

- myexponent = 0;

- mysignificand = 0;

- } else if (category == fcInfinity) {

- myexponent = 0x1f;

- mysignificand = 0;

- } else {

- assert(category == fcNaN && "Unknown category!");

- myexponent = 0x1f;

- mysignificand = (uint32_t)*significandParts();

- }

- return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |

- (mysignificand & 0x3)));

+APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {

+ assert(partCount() == 1);

+ return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();

}

APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {

- assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);

assert(partCount() == 1);

+ return convertIEEEFloatToAPInt<semFloat8E4M3FN>();

- uint32_t myexponent, mysignificand;

+APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {

+ assert(partCount() == 1);

+ return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();

- if (isFiniteNonZero()) {

- myexponent = exponent + 7; // bias

- mysignificand = (uint32_t)*significandParts();

- if (myexponent == 1 && !(mysignificand & 0x8))

- myexponent = 0; // denormal

- } else if (category == fcZero) {

- myexponent = 0;

- mysignificand = 0;

- } else if (category == fcInfinity) {

- myexponent = 0xf;

- mysignificand = 0;

- } else {

- assert(category == fcNaN && "Unknown category!");

- myexponent = 0xf;

- mysignificand = (uint32_t)*significandParts();

- }

+APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {

+ assert(partCount() == 1);

+ return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();

- return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |

- (mysignificand & 0x7)));

+APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {

+ assert(partCount() == 1);

+ return convertIEEEFloatToAPInt<semFloatTF32>();

}

// This function creates an APInt that is just a bit map of the floating

@@ -3560,9 +3636,21 @@ APInt IEEEFloat::bitcastToAPInt() const {

if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)

return convertFloat8E5M2APFloatToAPInt();

+ if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)

+ return convertFloat8E5M2FNUZAPFloatToAPInt();

if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)

return convertFloat8E4M3FNAPFloatToAPInt();

+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)

+ return convertFloat8E4M3FNUZAPFloatToAPInt();

+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)

+ return convertFloat8E4M3B11FNUZAPFloatToAPInt();

+ if (semantics == (const llvm::fltSemantics *)&semFloatTF32)

+ return convertFloatTF32APFloatToAPInt();

assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&

"unknown format!");

return convertF80LongDoubleAPFloatToAPInt();

@@ -3643,205 +3731,131 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {

}

-void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {

- uint64_t i1 = api.getRawData()[0];

- uint64_t i2 = api.getRawData()[1];

- uint64_t myexponent = (i2 >> 48) & 0x7fff;

- uint64_t mysignificand = i1;

- uint64_t mysignificand2 = i2 & 0xffffffffffffLL;

+template <const fltSemantics &S>

+void IEEEFloat::initFromIEEEAPInt(const APInt &api) {

+ assert(api.getBitWidth() == S.sizeInBits);

+ constexpr integerPart integer_bit = integerPart{1}

+ << ((S.precision - 1) % integerPartWidth);

+ constexpr uint64_t significand_mask = integer_bit - 1;

+ constexpr unsigned int trailing_significand_bits = S.precision - 1;

+ constexpr unsigned int stored_significand_parts =

+ partCountForBits(trailing_significand_bits);

+ constexpr unsigned int exponent_bits =

+ S.sizeInBits - 1 - trailing_significand_bits;

+ static_assert(exponent_bits < 64);

+ constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;

+ constexpr int bias = -(S.minExponent - 1);

- initialize(&semIEEEquad);

- assert(partCount()==2);

- sign = static_cast<unsigned int>(i2>>63);

- if (myexponent==0 &&

- (mysignificand==0 && mysignificand2==0)) {

- makeZero(sign);

- } else if (myexponent==0x7fff &&

- (mysignificand==0 && mysignificand2==0)) {

- makeInf(sign);

- } else if (myexponent==0x7fff &&

- (mysignificand!=0 || mysignificand2 !=0)) {

- category = fcNaN;

- exponent = exponentNaN();

- significandParts()[0] = mysignificand;

- significandParts()[1] = mysignificand2;

- } else {

- category = fcNormal;

- exponent = myexponent - 16383;

- significandParts()[0] = mysignificand;

- significandParts()[1] = mysignificand2;

- if (myexponent==0) // denormal

- exponent = -16382;

- else

- significandParts()[1] |= 0x1000000000000LL; // integer bit

+ // Copy the bits of the significand. We need to clear out the exponent and

+ // sign bit in the last word.

+ std::array<integerPart, stored_significand_parts> mysignificand;

+ std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());

+ if constexpr (significand_mask != 0) {

+ mysignificand[mysignificand.size() - 1] &= significand_mask;

}

-void IEEEFloat::initFromDoubleAPInt(const APInt &api) {

- uint64_t i = *api.getRawData();

- uint64_t myexponent = (i >> 52) & 0x7ff;

- uint64_t mysignificand = i & 0xfffffffffffffLL;

+ // We assume the last word holds the sign bit, the exponent, and potentially

+ // some of the trailing significand field.

+ uint64_t last_word = api.getRawData()[api.getNumWords() - 1];

+ uint64_t myexponent =

+ (last_word >> (trailing_significand_bits % 64)) & exponent_mask;

- initialize(&semIEEEdouble);

- assert(partCount()==1);

+ initialize(&S);

+ assert(partCount() == mysignificand.size());

- sign = static_cast<unsigned int>(i>>63);

- if (myexponent==0 && mysignificand==0) {

- makeZero(sign);

- } else if (myexponent==0x7ff && mysignificand==0) {

- makeInf(sign);

- } else if (myexponent==0x7ff && mysignificand!=0) {

- category = fcNaN;

- exponent = exponentNaN();

- *significandParts() = mysignificand;

- } else {

- category = fcNormal;

- exponent = myexponent - 1023;

- *significandParts() = mysignificand;

- if (myexponent==0) // denormal

- exponent = -1022;

- else

- *significandParts() |= 0x10000000000000LL; // integer bit

- }

+ sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));

-void IEEEFloat::initFromFloatAPInt(const APInt &api) {

- uint32_t i = (uint32_t)*api.getRawData();

- uint32_t myexponent = (i >> 23) & 0xff;

- uint32_t mysignificand = i & 0x7fffff;

+ bool all_zero_significand =

+ llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });

- initialize(&semIEEEsingle);

- assert(partCount()==1);

+ bool is_zero = myexponent == 0 && all_zero_significand;

- sign = i >> 31;

- if (myexponent==0 && mysignificand==0) {

- makeZero(sign);

- } else if (myexponent==0xff && mysignificand==0) {

- makeInf(sign);

- } else if (myexponent==0xff && mysignificand!=0) {

- category = fcNaN;

- exponent = exponentNaN();

- *significandParts() = mysignificand;

- } else {

- category = fcNormal;

- exponent = myexponent - 127; //bias

- *significandParts() = mysignificand;

- if (myexponent==0) // denormal

- exponent = -126;

- else

- *significandParts() |= 0x800000; // integer bit

+ if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {

+ if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {

+ makeInf(sign);

+ return;

+ }

}

-void IEEEFloat::initFromBFloatAPInt(const APInt &api) {

- uint32_t i = (uint32_t)*api.getRawData();

- uint32_t myexponent = (i >> 7) & 0xff;

- uint32_t mysignificand = i & 0x7f;

+ bool is_nan = false;

- initialize(&semBFloat);

- assert(partCount() == 1);

+ if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {

+ is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;

+ } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {

+ bool all_ones_significand =

+ std::all_of(mysignificand.begin(), mysignificand.end() - 1,

+ [](integerPart bits) { return bits == ~integerPart{0}; }) &&

+ (!significand_mask ||

+ mysignificand[mysignificand.size() - 1] == significand_mask);

+ is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;

+ } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {

+ is_nan = is_zero && sign;

+ }

- sign = i >> 15;

- if (myexponent == 0 && mysignificand == 0) {

- makeZero(sign);

- } else if (myexponent == 0xff && mysignificand == 0) {

- makeInf(sign);

- } else if (myexponent == 0xff && mysignificand != 0) {

+ if (is_nan) {

category = fcNaN;

- exponent = exponentNaN();

- *significandParts() = mysignificand;

- } else {

- category = fcNormal;

- exponent = myexponent - 127; // bias

- *significandParts() = mysignificand;

- if (myexponent == 0) // denormal

- exponent = -126;

- else

- *significandParts() |= 0x80; // integer bit

+ exponent = ::exponentNaN(S);

+ std::copy_n(mysignificand.begin(), mysignificand.size(),

+ significandParts());

+ return;

+ }

+ if (is_zero) {

+ makeZero(sign);

+ return;

}

+ category = fcNormal;

+ exponent = myexponent - bias;

+ std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());

+ if (myexponent == 0) // denormal

+ exponent = S.minExponent;

+ else

+ significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit

}

-void IEEEFloat::initFromHalfAPInt(const APInt &api) {

- uint32_t i = (uint32_t)*api.getRawData();

- uint32_t myexponent = (i >> 10) & 0x1f;

- uint32_t mysignificand = i & 0x3ff;

+void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {

+ initFromIEEEAPInt<semIEEEquad>(api);

- initialize(&semIEEEhalf);

- assert(partCount()==1);

+void IEEEFloat::initFromDoubleAPInt(const APInt &api) {

+ initFromIEEEAPInt<semIEEEdouble>(api);

- sign = i >> 15;

- if (myexponent==0 && mysignificand==0) {

- makeZero(sign);

- } else if (myexponent==0x1f && mysignificand==0) {

- makeInf(sign);

- } else if (myexponent==0x1f && mysignificand!=0) {

- category = fcNaN;

- exponent = exponentNaN();

- *significandParts() = mysignificand;

- } else {

- category = fcNormal;

- exponent = myexponent - 15; //bias

- *significandParts() = mysignificand;

- if (myexponent==0) // denormal

- exponent = -14;

- else

- *significandParts() |= 0x400; // integer bit

- }

+void IEEEFloat::initFromFloatAPInt(const APInt &api) {

+ initFromIEEEAPInt<semIEEEsingle>(api);

}

-void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {

- uint32_t i = (uint32_t)*api.getRawData();

- uint32_t myexponent = (i >> 2) & 0x1f;

- uint32_t mysignificand = i & 0x3;

+void IEEEFloat::initFromBFloatAPInt(const APInt &api) {

+ initFromIEEEAPInt<semBFloat>(api);

- initialize(&semFloat8E5M2);

- assert(partCount() == 1);

+void IEEEFloat::initFromHalfAPInt(const APInt &api) {

+ initFromIEEEAPInt<semIEEEhalf>(api);

- sign = i >> 7;

- if (myexponent == 0 && mysignificand == 0) {

- makeZero(sign);

- } else if (myexponent == 0x1f && mysignificand == 0) {

- makeInf(sign);

- } else if (myexponent == 0x1f && mysignificand != 0) {

- category = fcNaN;

- exponent = exponentNaN();

- *significandParts() = mysignificand;

- } else {

- category = fcNormal;

- exponent = myexponent - 15; // bias

- *significandParts() = mysignificand;

- if (myexponent == 0) // denormal

- exponent = -14;

- else

- *significandParts() |= 0x4; // integer bit

- }

+void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {

+ initFromIEEEAPInt<semFloat8E5M2>(api);

+void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {

+ initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);

}

void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {

- uint32_t i = (uint32_t)*api.getRawData();

- uint32_t myexponent = (i >> 3) & 0xf;

- uint32_t mysignificand = i & 0x7;

+ initFromIEEEAPInt<semFloat8E4M3FN>(api);

- initialize(&semFloat8E4M3FN);

- assert(partCount() == 1);

+void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {

+ initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);

- sign = i >> 7;

- if (myexponent == 0 && mysignificand == 0) {

- makeZero(sign);

- } else if (myexponent == 0xf && mysignificand == 7) {

- category = fcNaN;

- exponent = exponentNaN();

- *significandParts() = mysignificand;

- } else {

- category = fcNormal;

- exponent = myexponent - 7; // bias

- *significandParts() = mysignificand;

- if (myexponent == 0) // denormal

- exponent = -6;

- else

- *significandParts() |= 0x8; // integer bit

- }

+void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {

+ initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);

+void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {

+ initFromIEEEAPInt<semFloatTF32>(api);

}

/// Treat api as containing the bits of a floating point number.

@@ -3863,8 +3877,16 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {

return initFromPPCDoubleDoubleAPInt(api);

if (Sem == &semFloat8E5M2)

return initFromFloat8E5M2APInt(api);

+ if (Sem == &semFloat8E5M2FNUZ)

+ return initFromFloat8E5M2FNUZAPInt(api);

if (Sem == &semFloat8E4M3FN)

return initFromFloat8E4M3FNAPInt(api);

+ if (Sem == &semFloat8E4M3FNUZ)

+ return initFromFloat8E4M3FNUZAPInt(api);

+ if (Sem == &semFloat8E4M3B11FNUZ)

+ return initFromFloat8E4M3B11FNUZAPInt(api);

+ if (Sem == &semFloatTF32)

+ return initFromFloatTF32APInt(api);

llvm_unreachable(nullptr);

}

@@ -3893,7 +3915,8 @@ void IEEEFloat::makeLargest(bool Negative) {

? (~integerPart(0) >> NumUnusedHighBits)

: 0;

- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)

+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&

+ semantics->nanEncoding == fltNanEncoding::AllOnes)

significand[0] &= ~integerPart(1);

}

@@ -4074,7 +4097,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,

}

// Ignore trailing binary zeros.

- int trailingZeros = significand.countTrailingZeros();

+ int trailingZeros = significand.countr_zero();

exp += trailingZeros;

significand.lshrInPlace(trailingZeros);

@@ -4321,6 +4344,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {

APInt::tcSet(significandParts(), 0, partCount());

category = fcZero;

exponent = 0;

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)

+ sign = false;

break;

}

@@ -4407,17 +4432,15 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {

}

APFloatBase::ExponentType IEEEFloat::exponentNaN() const {

- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)

- return semantics->maxExponent;

- return semantics->maxExponent + 1;

+ return ::exponentNaN(*semantics);

}

APFloatBase::ExponentType IEEEFloat::exponentInf() const {

- return semantics->maxExponent + 1;

+ return ::exponentInf(*semantics);

}

APFloatBase::ExponentType IEEEFloat::exponentZero() const {

- return semantics->minExponent - 1;

+ return ::exponentZero(*semantics);

}

void IEEEFloat::makeInf(bool Negative) {

@@ -4435,6 +4458,10 @@ void IEEEFloat::makeInf(bool Negative) {

void IEEEFloat::makeZero(bool Negative) {

category = fcZero;

sign = Negative;

+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {

+ // Merge negative zero to positive because 0b10000...000 is used for NaN

+ sign = false;

+ }

exponent = exponentZero();

APInt::tcSet(significandParts(), 0, partCount());

}

@@ -4477,7 +4504,7 @@ IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {

int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;

// Clamp to one past the range ends to let normalize handle overlflow.

- X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);

+ X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);

X.normalize(RoundingMode, lfExactlyZero);

if (X.isNaN())

X.makeQuiet();

@@ -5114,6 +5141,19 @@ APFloat::APFloat(const fltSemantics &Semantics, StringRef S)

consumeError(StatusOrErr.takeError());

}

+FPClassTest APFloat::classify() const {

+ if (isZero())

+ return isNegative() ? fcNegZero : fcPosZero;

+ if (isNormal())

+ return isNegative() ? fcNegNormal : fcPosNormal;

+ if (isDenormal())

+ return isNegative() ? fcNegSubnormal : fcPosSubnormal;

+ if (isInfinity())

+ return isNegative() ? fcNegInf : fcPosInf;

+ assert(isNaN() && "Other class of FP constant");

+ return isSignaling() ? fcSNan : fcQNan;

APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,

roundingMode RM, bool *losesInfo) {

if (&getSemantics() == &ToSemantics) {