aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-09-02 21:17:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-08 17:34:50 +0000
commit06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/Support/APFloat.cpp
parentcf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/APFloat.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APFloat.cpp1142
1 files changed, 591 insertions, 551 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
index eae4fdb6c3d0..4a73739b5282 100644
--- a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
@@ -14,8 +14,10 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
@@ -51,209 +53,303 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi
namespace llvm {
- // How the nonfinite values Inf and NaN are represented.
- enum class fltNonfiniteBehavior {
- // Represents standard IEEE 754 behavior. A value is nonfinite if the
- // exponent field is all 1s. In such cases, a value is Inf if the
- // significand bits are all zero, and NaN otherwise
- IEEE754,
-
- // Only the Float8E5M2 has this behavior. There is no Inf representation. A
- // value is NaN if the exponent field and the mantissa field are all 1s.
- // This behavior matches the FP8 E4M3 type described in
- // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
- // as non-signalling, although the paper does not state whether the NaN
- // values are signalling or not.
- NanOnly,
- };
+// How the nonfinite values Inf and NaN are represented.
+enum class fltNonfiniteBehavior {
+ // Represents standard IEEE 754 behavior. A value is nonfinite if the
+ // exponent field is all 1s. In such cases, a value is Inf if the
+ // significand bits are all zero, and NaN otherwise
+ IEEE754,
+
+ // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
+ // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
+ // representation for Inf, and operations that would ordinarily produce Inf
+ // produce NaN instead.
+ // The details of the NaN representation(s) in this form are determined by the
+ // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
+ // encodings do not distinguish between signalling and quiet NaN.
+ NanOnly,
+};
- /* Represents floating point arithmetic semantics. */
- struct fltSemantics {
- /* The largest E such that 2^E is representable; this matches the
- definition of IEEE 754. */
- APFloatBase::ExponentType maxExponent;
+// How NaN values are represented. This is curently only used in combination
+// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
+// while having IEEE non-finite behavior is liable to lead to unexpected
+// results.
+enum class fltNanEncoding {
+ // Represents the standard IEEE behavior where a value is NaN if its
+ // exponent is all 1s and the significand is non-zero.
+ IEEE,
+
+ // Represents the behavior in the Float8E4M3 floating point type where NaN is
+ // represented by having the exponent and mantissa set to all 1s.
+ // This behavior matches the FP8 E4M3 type described in
+ // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
+ // as non-signalling, although the paper does not state whether the NaN
+ // values are signalling or not.
+ AllOnes,
+
+ // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
+ // where NaN is represented by a sign bit of 1 and all 0s in the exponent
+ // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
+ // there is only one NaN value, it is treated as quiet NaN. This matches the
+ // behavior described in https://arxiv.org/abs/2206.02915 .
+ NegativeZero,
+};
- /* The smallest E such that 2^E is a normalized number; this
- matches the definition of IEEE 754. */
- APFloatBase::ExponentType minExponent;
+/* Represents floating point arithmetic semantics. */
+struct fltSemantics {
+ /* The largest E such that 2^E is representable; this matches the
+ definition of IEEE 754. */
+ APFloatBase::ExponentType maxExponent;
- /* Number of bits in the significand. This includes the integer
- bit. */
- unsigned int precision;
+ /* The smallest E such that 2^E is a normalized number; this
+ matches the definition of IEEE 754. */
+ APFloatBase::ExponentType minExponent;
- /* Number of bits actually used in the semantics. */
- unsigned int sizeInBits;
+ /* Number of bits in the significand. This includes the integer
+ bit. */
+ unsigned int precision;
- fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+ /* Number of bits actually used in the semantics. */
+ unsigned int sizeInBits;
- // Returns true if any number described by this semantics can be precisely
- // represented by the specified semantics. Does not take into account
- // the value of fltNonfiniteBehavior.
- bool isRepresentableBy(const fltSemantics &S) const {
- return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
- precision <= S.precision;
- }
- };
+ fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
- static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
- static const fltSemantics semBFloat = {127, -126, 8, 16};
- static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
- static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
- static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
- static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
- static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
- fltNonfiniteBehavior::NanOnly};
- static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
- static const fltSemantics semBogus = {0, 0, 0, 0};
-
- /* The IBM double-double semantics. Such a number consists of a pair of IEEE
- 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
- (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
- Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
- to each other, and two 11-bit exponents.
-
- Note: we need to make the value different from semBogus as otherwise
- an unsafe optimization may collapse both values to a single address,
- and we heavily rely on them having distinct addresses. */
- static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
-
- /* These are legacy semantics for the fallback, inaccrurate implementation of
- IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
- operation. It's equivalent to having an IEEE number with consecutive 106
- bits of mantissa and 11 bits of exponent.
-
- It's not equivalent to IBM double-double. For example, a legit IBM
- double-double, 1 + epsilon:
-
- 1 + epsilon = 1 + (1 >> 1076)
-
- is not representable by a consecutive 106 bits of mantissa.
-
- Currently, these semantics are used in the following way:
-
- semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
- (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
- semPPCDoubleDoubleLegacy -> IEEE operations
-
- We use bitcastToAPInt() to get the bit representation (in APInt) of the
- underlying IEEEdouble, then use the APInt constructor to construct the
- legacy IEEE float.
-
- TODO: Implement all operations in semPPCDoubleDouble, and delete these
- semantics. */
- static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
- 53 + 53, 128};
-
- const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
- switch (S) {
- case S_IEEEhalf:
- return IEEEhalf();
- case S_BFloat:
- return BFloat();
- case S_IEEEsingle:
- return IEEEsingle();
- case S_IEEEdouble:
- return IEEEdouble();
- case S_IEEEquad:
- return IEEEquad();
- case S_PPCDoubleDouble:
- return PPCDoubleDouble();
- case S_Float8E5M2:
- return Float8E5M2();
- case S_Float8E4M3FN:
- return Float8E4M3FN();
- case S_x87DoubleExtended:
- return x87DoubleExtended();
- }
- llvm_unreachable("Unrecognised floating semantics");
- }
-
- APFloatBase::Semantics
- APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
- if (&Sem == &llvm::APFloat::IEEEhalf())
- return S_IEEEhalf;
- else if (&Sem == &llvm::APFloat::BFloat())
- return S_BFloat;
- else if (&Sem == &llvm::APFloat::IEEEsingle())
- return S_IEEEsingle;
- else if (&Sem == &llvm::APFloat::IEEEdouble())
- return S_IEEEdouble;
- else if (&Sem == &llvm::APFloat::IEEEquad())
- return S_IEEEquad;
- else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
- return S_PPCDoubleDouble;
- else if (&Sem == &llvm::APFloat::Float8E5M2())
- return S_Float8E5M2;
- else if (&Sem == &llvm::APFloat::Float8E4M3FN())
- return S_Float8E4M3FN;
- else if (&Sem == &llvm::APFloat::x87DoubleExtended())
- return S_x87DoubleExtended;
- else
- llvm_unreachable("Unknown floating semantics");
+ fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
+ // Returns true if any number described by this semantics can be precisely
+ // represented by the specified semantics. Does not take into account
+ // the value of fltNonfiniteBehavior.
+ bool isRepresentableBy(const fltSemantics &S) const {
+ return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
+ precision <= S.precision;
}
+};
- const fltSemantics &APFloatBase::IEEEhalf() {
- return semIEEEhalf;
- }
- const fltSemantics &APFloatBase::BFloat() {
- return semBFloat;
- }
- const fltSemantics &APFloatBase::IEEEsingle() {
- return semIEEEsingle;
- }
- const fltSemantics &APFloatBase::IEEEdouble() {
- return semIEEEdouble;
- }
- const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
- const fltSemantics &APFloatBase::PPCDoubleDouble() {
- return semPPCDoubleDouble;
- }
- const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
- const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
- const fltSemantics &APFloatBase::x87DoubleExtended() {
- return semX87DoubleExtended;
- }
- const fltSemantics &APFloatBase::Bogus() { return semBogus; }
+static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
+static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
+static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
+static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
+static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
+static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
+static constexpr fltSemantics semFloat8E5M2FNUZ = {
+ 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E4M3FN = {
+ 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
+static constexpr fltSemantics semFloat8E4M3FNUZ = {
+ 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
+ 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
+static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
+static constexpr fltSemantics semBogus = {0, 0, 0, 0};
+
+/* The IBM double-double semantics. Such a number consists of a pair of IEEE
+ 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
+ (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
+ Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
+ to each other, and two 11-bit exponents.
+
+ Note: we need to make the value different from semBogus as otherwise
+ an unsafe optimization may collapse both values to a single address,
+ and we heavily rely on them having distinct addresses. */
+static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
+
+/* These are legacy semantics for the fallback, inaccrurate implementation of
+ IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
+ operation. It's equivalent to having an IEEE number with consecutive 106
+ bits of mantissa and 11 bits of exponent.
+
+ It's not equivalent to IBM double-double. For example, a legit IBM
+ double-double, 1 + epsilon:
+
+ 1 + epsilon = 1 + (1 >> 1076)
+
+ is not representable by a consecutive 106 bits of mantissa.
+
+ Currently, these semantics are used in the following way:
+
+ semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
+ (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
+ semPPCDoubleDoubleLegacy -> IEEE operations
+
+ We use bitcastToAPInt() to get the bit representation (in APInt) of the
+ underlying IEEEdouble, then use the APInt constructor to construct the
+ legacy IEEE float.
+
+ TODO: Implement all operations in semPPCDoubleDouble, and delete these
+ semantics. */
+static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
+ 53 + 53, 128};
+
+const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
+ switch (S) {
+ case S_IEEEhalf:
+ return IEEEhalf();
+ case S_BFloat:
+ return BFloat();
+ case S_IEEEsingle:
+ return IEEEsingle();
+ case S_IEEEdouble:
+ return IEEEdouble();
+ case S_IEEEquad:
+ return IEEEquad();
+ case S_PPCDoubleDouble:
+ return PPCDoubleDouble();
+ case S_Float8E5M2:
+ return Float8E5M2();
+ case S_Float8E5M2FNUZ:
+ return Float8E5M2FNUZ();
+ case S_Float8E4M3FN:
+ return Float8E4M3FN();
+ case S_Float8E4M3FNUZ:
+ return Float8E4M3FNUZ();
+ case S_Float8E4M3B11FNUZ:
+ return Float8E4M3B11FNUZ();
+ case S_FloatTF32:
+ return FloatTF32();
+ case S_x87DoubleExtended:
+ return x87DoubleExtended();
+ }
+ llvm_unreachable("Unrecognised floating semantics");
+}
+
+APFloatBase::Semantics
+APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
+ if (&Sem == &llvm::APFloat::IEEEhalf())
+ return S_IEEEhalf;
+ else if (&Sem == &llvm::APFloat::BFloat())
+ return S_BFloat;
+ else if (&Sem == &llvm::APFloat::IEEEsingle())
+ return S_IEEEsingle;
+ else if (&Sem == &llvm::APFloat::IEEEdouble())
+ return S_IEEEdouble;
+ else if (&Sem == &llvm::APFloat::IEEEquad())
+ return S_IEEEquad;
+ else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
+ return S_PPCDoubleDouble;
+ else if (&Sem == &llvm::APFloat::Float8E5M2())
+ return S_Float8E5M2;
+ else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
+ return S_Float8E5M2FNUZ;
+ else if (&Sem == &llvm::APFloat::Float8E4M3FN())
+ return S_Float8E4M3FN;
+ else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
+ return S_Float8E4M3FNUZ;
+ else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
+ return S_Float8E4M3B11FNUZ;
+ else if (&Sem == &llvm::APFloat::FloatTF32())
+ return S_FloatTF32;
+ else if (&Sem == &llvm::APFloat::x87DoubleExtended())
+ return S_x87DoubleExtended;
+ else
+ llvm_unreachable("Unknown floating semantics");
+}
+
+const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
+const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
+const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
+const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
+const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
+const fltSemantics &APFloatBase::PPCDoubleDouble() {
+ return semPPCDoubleDouble;
+}
+const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
+const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
+const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
+const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
+const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
+ return semFloat8E4M3B11FNUZ;
+}
+const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
+const fltSemantics &APFloatBase::x87DoubleExtended() {
+ return semX87DoubleExtended;
+}
+const fltSemantics &APFloatBase::Bogus() { return semBogus; }
+
+constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
+constexpr RoundingMode APFloatBase::rmTowardPositive;
+constexpr RoundingMode APFloatBase::rmTowardNegative;
+constexpr RoundingMode APFloatBase::rmTowardZero;
+constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
+
+/* A tight upper bound on number of parts required to hold the value
+ pow(5, power) is
+
+ power * 815 / (351 * integerPartWidth) + 1
+
+ However, whilst the result may require only this many parts,
+ because we are multiplying two values to get it, the
+ multiplication may require an extra part with the excess part
+ being zero (consider the trivial case of 1 * 1, tcFullMultiply
+ requires two parts to hold the single-part result). So we add an
+ extra one to guarantee enough space whilst multiplying. */
+const unsigned int maxExponent = 16383;
+const unsigned int maxPrecision = 113;
+const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+const unsigned int maxPowerOfFiveParts =
+ 2 +
+ ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+
+unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
+ return semantics.precision;
+}
+APFloatBase::ExponentType
+APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+ return semantics.maxExponent;
+}
+APFloatBase::ExponentType
+APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
+ return semantics.minExponent;
+}
+unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
+ return semantics.sizeInBits;
+}
+unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
+ bool isSigned) {
+ // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
+ // at least one more bit than the MaxExponent to hold the max FP value.
+ unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
+ // Extra sign bit needed.
+ if (isSigned)
+ ++MinBitWidth;
+ return MinBitWidth;
+}
+
+bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
+ const fltSemantics &Dst) {
+ // Exponent range must be larger.
+ if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
+ return false;
- constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
- constexpr RoundingMode APFloatBase::rmTowardPositive;
- constexpr RoundingMode APFloatBase::rmTowardNegative;
- constexpr RoundingMode APFloatBase::rmTowardZero;
- constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
+ // If the mantissa is long enough, the result value could still be denormal
+ // with a larger exponent range.
+ //
+ // FIXME: This condition is probably not accurate but also shouldn't be a
+ // practical concern with existing types.
+ return Dst.precision >= Src.precision;
+}
- /* A tight upper bound on number of parts required to hold the value
- pow(5, power) is
+unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
+ return Sem.sizeInBits;
+}
- power * 815 / (351 * integerPartWidth) + 1
+static constexpr APFloatBase::ExponentType
+exponentZero(const fltSemantics &semantics) {
+ return semantics.minExponent - 1;
+}
- However, whilst the result may require only this many parts,
- because we are multiplying two values to get it, the
- multiplication may require an extra part with the excess part
- being zero (consider the trivial case of 1 * 1, tcFullMultiply
- requires two parts to hold the single-part result). So we add an
- extra one to guarantee enough space whilst multiplying. */
- const unsigned int maxExponent = 16383;
- const unsigned int maxPrecision = 113;
- const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
- const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+static constexpr APFloatBase::ExponentType
+exponentInf(const fltSemantics &semantics) {
+ return semantics.maxExponent + 1;
+}
- unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
- return semantics.precision;
- }
- APFloatBase::ExponentType
- APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+static constexpr APFloatBase::ExponentType
+exponentNaN(const fltSemantics &semantics) {
+ if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
+ return exponentZero(semantics);
return semantics.maxExponent;
}
- APFloatBase::ExponentType
- APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
- return semantics.minExponent;
- }
- unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
- return semantics.sizeInBits;
- }
-
- unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
- return Sem.sizeInBits;
+ return semantics.maxExponent + 1;
}
/* A bunch of private, handy routines. */
@@ -262,9 +358,7 @@ static inline Error createError(const Twine &Err) {
return make_error<StringError>(Err, inconvertibleErrorCode());
}
-static inline unsigned int
-partCountForBits(unsigned int bits)
-{
+static constexpr inline unsigned int partCountForBits(unsigned int bits) {
return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
}
@@ -509,7 +603,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
/* If we ran off the end it is exactly zero or one-half, otherwise
a little more. */
- if (hexDigit == -1U)
+ if (hexDigit == UINT_MAX)
return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
else
return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
@@ -526,7 +620,7 @@ lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
lsb = APInt::tcLSB(parts, partCount);
- /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
+ /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
if (bits <= lsb)
return lfExactlyZero;
if (bits == lsb + 1)
@@ -798,10 +892,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
APInt fill_storage;
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
- // The only NaN representation is where the mantissa is all 1s, which is
- // non-signalling.
+ // Finite-only types do not distinguish signalling and quiet NaN, so
+ // make them all signalling.
SNaN = false;
- fill_storage = APInt::getAllOnes(semantics->precision - 1);
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ sign = true;
+ fill_storage = APInt::getZero(semantics->precision - 1);
+ } else {
+ fill_storage = APInt::getAllOnes(semantics->precision - 1);
+ }
fill = &fill_storage;
}
@@ -832,6 +931,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
// conventionally, this is the next bit down from the QNaN bit.
if (APInt::tcIsZero(significand, numParts))
APInt::tcSetBit(significand, QNaNBit - 1);
+ } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // The only NaN is a quiet NaN, and it has no bits sets in the significand.
+ // Do nothing.
} else {
// We always have to set the QNaN bit to make it a QNaN.
APInt::tcSetBit(significand, QNaNBit);
@@ -976,7 +1078,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
}
bool IEEEFloat::isLargest() const {
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes) {
// The largest number by magnitude in our format will be the floating point
// number with maximum exponent and with significand that is all ones except
// the LSB.
@@ -1418,7 +1521,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
exponent = semantics->maxExponent;
tcSetLeastSignificantBits(significandParts(), partCount(),
semantics->precision);
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes)
APInt::tcClearBit(significandParts(), 0);
return opInexact;
@@ -1519,7 +1623,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
}
}
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
+ // represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);
@@ -1530,8 +1637,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
underflow for exact results. */
if (lost_fraction == lfExactlyZero) {
/* Canonicalize zeroes. */
- if (omsb == 0)
+ if (omsb == 0) {
category = fcZero;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
return opOK;
}
@@ -1549,18 +1659,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
/* Renormalize by incrementing the exponent and shifting our
significand right one. However if we already have the
maximum exponent we overflow to infinity. */
- if (exponent == semantics->maxExponent) {
- category = fcInfinity;
-
- return (opStatus) (opOverflow | opInexact);
- }
+ if (exponent == semantics->maxExponent)
+ // Invoke overflow handling with a rounding mode that will guarantee
+ // that the result gets turned into the correct infinity representation.
+ // This is needed instead of just setting the category to infinity to
+ // account for 8-bit floating point types that have no inf, only NaN.
+ return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
shiftSignificandRight(1);
return opInexact;
}
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
+ // represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);
}
@@ -1574,8 +1688,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
assert(omsb < semantics->precision);
/* Canonicalize zeroes. */
- if (omsb == 0)
+ if (omsb == 0) {
category = fcZero;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
/* The fcZero case is a denormal that underflowed to zero. */
return (opStatus) (opUnderflow | opInexact);
@@ -1877,6 +1994,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
/* Change sign. */
void IEEEFloat::changeSign() {
+ // With NaN-as-negative-zero, neither NaN or negative zero can change
+ // their signs.
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
+ (isZero() || isNaN()))
+ return;
/* Look mummy, this one's easy. */
sign = !sign;
}
@@ -1906,6 +2028,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
if (category == fcZero) {
if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
sign = (rounding_mode == rmTowardNegative);
+ // NaN-in-negative-zero means zeros need to be normalized to +0.
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
}
return fs;
@@ -1931,6 +2056,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
sign ^= rhs.sign;
fs = multiplySpecials(rhs);
+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
if (isFiniteNonZero()) {
lostFraction lost_fraction = multiplySignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
@@ -1949,6 +2076,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
sign ^= rhs.sign;
fs = divideSpecials(rhs);
+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
if (isFiniteNonZero()) {
lostFraction lost_fraction = divideSignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
@@ -2057,8 +2186,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
}
}
- if (isZero())
+ if (isZero()) {
sign = origSign; // IEEE754 requires this
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ // But some 8-bit floats only have positive 0.
+ sign = false;
+ }
+
else
sign ^= origSign;
return fs;
@@ -2083,8 +2217,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK);
}
- if (isZero())
+ if (isZero()) {
sign = origSign; // fmod requires this
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
return fs;
}
@@ -2112,8 +2249,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */
- if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
+ if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
sign = (rounding_mode == rmTowardNegative);
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
} else {
fs = multiplySpecials(multiplicand);
@@ -2389,6 +2529,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
return is_signaling ? opInvalidOp : opOK;
}
+ // If NaN is negative zero, we need to create a new NaN to avoid converting
+ // NaN to -Inf.
+ if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
+ semantics->nanEncoding != fltNanEncoding::NegativeZero)
+ makeNaN(false, false);
+
*losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
// For x87 extended precision, we want to make a NaN, not a special NaN if
@@ -2410,6 +2556,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
makeNaN(false, sign);
*losesInfo = true;
fs = opInexact;
+ } else if (category == fcZero &&
+ semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // Negative zero loses info, but positive zero doesn't.
+ *losesInfo =
+ fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
+ fs = *losesInfo ? opInexact : opOK;
+ // NaN is negative zero means -0 -> +0, which can lose information
+ sign = false;
} else {
*losesInfo = false;
fs = opOK;
@@ -2696,7 +2850,7 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,
}
hex_value = hexDigitValue(*p);
- if (hex_value == -1U)
+ if (hex_value == UINT_MAX)
break;
p++;
@@ -2877,9 +3031,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
category = fcZero;
fs = opOK;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
- /* Check whether the normalized exponent is high enough to overflow
- max during the log-rebasing in the max-exponent check below. */
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
fs = handleOverflow(rounding_mode);
@@ -3337,201 +3493,121 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
return APInt(128, words);
}
-APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
- assert(partCount()==2);
+template <const fltSemantics &S>
+APInt IEEEFloat::convertIEEEFloatToAPInt() const {
+ assert(semantics == &S);
+
+ constexpr int bias = -(S.minExponent - 1);
+ constexpr unsigned int trailing_significand_bits = S.precision - 1;
+ constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
+ constexpr integerPart integer_bit =
+ integerPart{1} << (trailing_significand_bits % integerPartWidth);
+ constexpr uint64_t significand_mask = integer_bit - 1;
+ constexpr unsigned int exponent_bits =
+ S.sizeInBits - 1 - trailing_significand_bits;
+ static_assert(exponent_bits < 64);
+ constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
- uint64_t myexponent, mysignificand, mysignificand2;
+ uint64_t myexponent;
+ std::array<integerPart, partCountForBits(trailing_significand_bits)>
+ mysignificand;
if (isFiniteNonZero()) {
- myexponent = exponent+16383; //bias
- mysignificand = significandParts()[0];
- mysignificand2 = significandParts()[1];
- if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = mysignificand2 = 0;
- } else if (category==fcInfinity) {
- myexponent = 0x7fff;
- mysignificand = mysignificand2 = 0;
+ myexponent = exponent + bias;
+ std::copy_n(significandParts(), mysignificand.size(),
+ mysignificand.begin());
+ if (myexponent == 1 &&
+ !(significandParts()[integer_bit_part] & integer_bit))
+ myexponent = 0; // denormal
+ } else if (category == fcZero) {
+ myexponent = ::exponentZero(S) + bias;
+ mysignificand.fill(0);
+ } else if (category == fcInfinity) {
+ if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ llvm_unreachable("semantics don't support inf!");
+ }
+ myexponent = ::exponentInf(S) + bias;
+ mysignificand.fill(0);
} else {
assert(category == fcNaN && "Unknown category!");
- myexponent = 0x7fff;
- mysignificand = significandParts()[0];
- mysignificand2 = significandParts()[1];
- }
-
- uint64_t words[2];
- words[0] = mysignificand;
- words[1] = ((uint64_t)(sign & 1) << 63) |
- ((myexponent & 0x7fff) << 48) |
- (mysignificand2 & 0xffffffffffffLL);
+ myexponent = ::exponentNaN(S) + bias;
+ std::copy_n(significandParts(), mysignificand.size(),
+ mysignificand.begin());
+ }
+ std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
+ auto words_iter =
+ std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
+ if constexpr (significand_mask != 0) {
+ // Clear the integer bit.
+ words[mysignificand.size() - 1] &= significand_mask;
+ }
+ std::fill(words_iter, words.end(), uint64_t{0});
+ constexpr size_t last_word = words.size() - 1;
+ uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
+ << ((S.sizeInBits - 1) % 64);
+ words[last_word] |= shifted_sign;
+ uint64_t shifted_exponent = (myexponent & exponent_mask)
+ << (trailing_significand_bits % 64);
+ words[last_word] |= shifted_exponent;
+ if constexpr (last_word == 0) {
+ return APInt(S.sizeInBits, words[0]);
+ }
+ return APInt(S.sizeInBits, words);
+}
- return APInt(128, words);
+APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
+ assert(partCount() == 2);
+ return convertIEEEFloatToAPInt<semIEEEquad>();
}
APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
assert(partCount()==1);
-
- uint64_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent+1023; //bias
- mysignificand = *significandParts();
- if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category==fcInfinity) {
- myexponent = 0x7ff;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0x7ff;
- mysignificand = *significandParts();
- }
-
- return APInt(64, ((((uint64_t)(sign & 1) << 63) |
- ((myexponent & 0x7ff) << 52) |
- (mysignificand & 0xfffffffffffffLL))));
+ return convertIEEEFloatToAPInt<semIEEEdouble>();
}
APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
assert(partCount()==1);
-
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent+127; //bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x800000))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category==fcInfinity) {
- myexponent = 0xff;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0xff;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
- (mysignificand & 0x7fffff)));
+ return convertIEEEFloatToAPInt<semIEEEsingle>();
}
APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semBFloat);
assert(partCount() == 1);
-
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent + 127; // bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x80))
- myexponent = 0; // denormal
- } else if (category == fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category == fcInfinity) {
- myexponent = 0xff;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0xff;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
- (mysignificand & 0x7f)));
+ return convertIEEEFloatToAPInt<semBFloat>();
}
APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
assert(partCount()==1);
-
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent+15; //bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x400))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category==fcInfinity) {
- myexponent = 0x1f;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0x1f;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
- (mysignificand & 0x3ff)));
+ return convertIEEEFloatToAPInt<semIEEEhalf>();
}
APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);
assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E5M2>();
+}
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent + 15; // bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x4))
- myexponent = 0; // denormal
- } else if (category == fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category == fcInfinity) {
- myexponent = 0x1f;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0x1f;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
- (mysignificand & 0x3)));
+APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
+}
- uint32_t myexponent, mysignificand;
+APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
+}
- if (isFiniteNonZero()) {
- myexponent = exponent + 7; // bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x8))
- myexponent = 0; // denormal
- } else if (category == fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category == fcInfinity) {
- myexponent = 0xf;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0xf;
- mysignificand = (uint32_t)*significandParts();
- }
+APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
+}
- return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
- (mysignificand & 0x7)));
+APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloatTF32>();
}
// This function creates an APInt that is just a bit map of the floating
@@ -3560,9 +3636,21 @@ APInt IEEEFloat::bitcastToAPInt() const {
if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+ return convertFloat8E5M2FNUZAPFloatToAPInt();
+
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+ return convertFloat8E4M3FNUZAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
+ return convertFloat8E4M3B11FNUZAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
+ return convertFloatTF32APFloatToAPInt();
+
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();
@@ -3643,205 +3731,131 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
}
}
-void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- uint64_t i1 = api.getRawData()[0];
- uint64_t i2 = api.getRawData()[1];
- uint64_t myexponent = (i2 >> 48) & 0x7fff;
- uint64_t mysignificand = i1;
- uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+template <const fltSemantics &S>
+void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
+ assert(api.getBitWidth() == S.sizeInBits);
+ constexpr integerPart integer_bit = integerPart{1}
+ << ((S.precision - 1) % integerPartWidth);
+ constexpr uint64_t significand_mask = integer_bit - 1;
+ constexpr unsigned int trailing_significand_bits = S.precision - 1;
+ constexpr unsigned int stored_significand_parts =
+ partCountForBits(trailing_significand_bits);
+ constexpr unsigned int exponent_bits =
+ S.sizeInBits - 1 - trailing_significand_bits;
+ static_assert(exponent_bits < 64);
+ constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
+ constexpr int bias = -(S.minExponent - 1);
- initialize(&semIEEEquad);
- assert(partCount()==2);
-
- sign = static_cast<unsigned int>(i2>>63);
- if (myexponent==0 &&
- (mysignificand==0 && mysignificand2==0)) {
- makeZero(sign);
- } else if (myexponent==0x7fff &&
- (mysignificand==0 && mysignificand2==0)) {
- makeInf(sign);
- } else if (myexponent==0x7fff &&
- (mysignificand!=0 || mysignificand2 !=0)) {
- category = fcNaN;
- exponent = exponentNaN();
- significandParts()[0] = mysignificand;
- significandParts()[1] = mysignificand2;
- } else {
- category = fcNormal;
- exponent = myexponent - 16383;
- significandParts()[0] = mysignificand;
- significandParts()[1] = mysignificand2;
- if (myexponent==0) // denormal
- exponent = -16382;
- else
- significandParts()[1] |= 0x1000000000000LL; // integer bit
+ // Copy the bits of the significand. We need to clear out the exponent and
+ // sign bit in the last word.
+ std::array<integerPart, stored_significand_parts> mysignificand;
+ std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
+ if constexpr (significand_mask != 0) {
+ mysignificand[mysignificand.size() - 1] &= significand_mask;
}
-}
-void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- uint64_t i = *api.getRawData();
- uint64_t myexponent = (i >> 52) & 0x7ff;
- uint64_t mysignificand = i & 0xfffffffffffffLL;
+ // We assume the last word holds the sign bit, the exponent, and potentially
+ // some of the trailing significand field.
+ uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
+ uint64_t myexponent =
+ (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
- initialize(&semIEEEdouble);
- assert(partCount()==1);
+ initialize(&S);
+ assert(partCount() == mysignificand.size());
- sign = static_cast<unsigned int>(i>>63);
- if (myexponent==0 && mysignificand==0) {
- makeZero(sign);
- } else if (myexponent==0x7ff && mysignificand==0) {
- makeInf(sign);
- } else if (myexponent==0x7ff && mysignificand!=0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 1023;
- *significandParts() = mysignificand;
- if (myexponent==0) // denormal
- exponent = -1022;
- else
- *significandParts() |= 0x10000000000000LL; // integer bit
- }
-}
+ sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
-void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 23) & 0xff;
- uint32_t mysignificand = i & 0x7fffff;
+ bool all_zero_significand =
+ llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
- initialize(&semIEEEsingle);
- assert(partCount()==1);
+ bool is_zero = myexponent == 0 && all_zero_significand;
- sign = i >> 31;
- if (myexponent==0 && mysignificand==0) {
- makeZero(sign);
- } else if (myexponent==0xff && mysignificand==0) {
- makeInf(sign);
- } else if (myexponent==0xff && mysignificand!=0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 127; //bias
- *significandParts() = mysignificand;
- if (myexponent==0) // denormal
- exponent = -126;
- else
- *significandParts() |= 0x800000; // integer bit
+ if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
+ if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
+ makeInf(sign);
+ return;
+ }
}
-}
-void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 7) & 0xff;
- uint32_t mysignificand = i & 0x7f;
+ bool is_nan = false;
- initialize(&semBFloat);
- assert(partCount() == 1);
+ if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
+ is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
+ } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
+ bool all_ones_significand =
+ std::all_of(mysignificand.begin(), mysignificand.end() - 1,
+ [](integerPart bits) { return bits == ~integerPart{0}; }) &&
+ (!significand_mask ||
+ mysignificand[mysignificand.size() - 1] == significand_mask);
+ is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
+ } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
+ is_nan = is_zero && sign;
+ }
- sign = i >> 15;
- if (myexponent == 0 && mysignificand == 0) {
- makeZero(sign);
- } else if (myexponent == 0xff && mysignificand == 0) {
- makeInf(sign);
- } else if (myexponent == 0xff && mysignificand != 0) {
+ if (is_nan) {
category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 127; // bias
- *significandParts() = mysignificand;
- if (myexponent == 0) // denormal
- exponent = -126;
- else
- *significandParts() |= 0x80; // integer bit
+ exponent = ::exponentNaN(S);
+ std::copy_n(mysignificand.begin(), mysignificand.size(),
+ significandParts());
+ return;
+ }
+
+ if (is_zero) {
+ makeZero(sign);
+ return;
}
+
+ category = fcNormal;
+ exponent = myexponent - bias;
+ std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
+ if (myexponent == 0) // denormal
+ exponent = S.minExponent;
+ else
+ significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
}
-void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 10) & 0x1f;
- uint32_t mysignificand = i & 0x3ff;
+void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEquad>(api);
+}
- initialize(&semIEEEhalf);
- assert(partCount()==1);
+void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEdouble>(api);
+}
- sign = i >> 15;
- if (myexponent==0 && mysignificand==0) {
- makeZero(sign);
- } else if (myexponent==0x1f && mysignificand==0) {
- makeInf(sign);
- } else if (myexponent==0x1f && mysignificand!=0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 15; //bias
- *significandParts() = mysignificand;
- if (myexponent==0) // denormal
- exponent = -14;
- else
- *significandParts() |= 0x400; // integer bit
- }
+void IEEEFloat::initFromFloatAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEsingle>(api);
}
-void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 2) & 0x1f;
- uint32_t mysignificand = i & 0x3;
+void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
+ initFromIEEEAPInt<semBFloat>(api);
+}
- initialize(&semFloat8E5M2);
- assert(partCount() == 1);
+void IEEEFloat::initFromHalfAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEhalf>(api);
+}
- sign = i >> 7;
- if (myexponent == 0 && mysignificand == 0) {
- makeZero(sign);
- } else if (myexponent == 0x1f && mysignificand == 0) {
- makeInf(sign);
- } else if (myexponent == 0x1f && mysignificand != 0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 15; // bias
- *significandParts() = mysignificand;
- if (myexponent == 0) // denormal
- exponent = -14;
- else
- *significandParts() |= 0x4; // integer bit
- }
+void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E5M2>(api);
+}
+
+void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 3) & 0xf;
- uint32_t mysignificand = i & 0x7;
+ initFromIEEEAPInt<semFloat8E4M3FN>(api);
+}
- initialize(&semFloat8E4M3FN);
- assert(partCount() == 1);
+void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
+}
- sign = i >> 7;
- if (myexponent == 0 && mysignificand == 0) {
- makeZero(sign);
- } else if (myexponent == 0xf && mysignificand == 7) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 7; // bias
- *significandParts() = mysignificand;
- if (myexponent == 0) // denormal
- exponent = -6;
- else
- *significandParts() |= 0x8; // integer bit
- }
+void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
+}
+
+void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
+ initFromIEEEAPInt<semFloatTF32>(api);
}
/// Treat api as containing the bits of a floating point number.
@@ -3863,8 +3877,16 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
return initFromPPCDoubleDoubleAPInt(api);
if (Sem == &semFloat8E5M2)
return initFromFloat8E5M2APInt(api);
+ if (Sem == &semFloat8E5M2FNUZ)
+ return initFromFloat8E5M2FNUZAPInt(api);
if (Sem == &semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);
+ if (Sem == &semFloat8E4M3FNUZ)
+ return initFromFloat8E4M3FNUZAPInt(api);
+ if (Sem == &semFloat8E4M3B11FNUZ)
+ return initFromFloat8E4M3B11FNUZAPInt(api);
+ if (Sem == &semFloatTF32)
+ return initFromFloatTF32APInt(api);
llvm_unreachable(nullptr);
}
@@ -3893,7 +3915,8 @@ void IEEEFloat::makeLargest(bool Negative) {
? (~integerPart(0) >> NumUnusedHighBits)
: 0;
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes)
significand[0] &= ~integerPart(1);
}
@@ -4074,7 +4097,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
}
// Ignore trailing binary zeros.
- int trailingZeros = significand.countTrailingZeros();
+ int trailingZeros = significand.countr_zero();
exp += trailingZeros;
significand.lshrInPlace(trailingZeros);
@@ -4321,6 +4344,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
APInt::tcSet(significandParts(), 0, partCount());
category = fcZero;
exponent = 0;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
break;
}
@@ -4407,17 +4432,15 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
}
APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
- return semantics->maxExponent;
- return semantics->maxExponent + 1;
+ return ::exponentNaN(*semantics);
}
APFloatBase::ExponentType IEEEFloat::exponentInf() const {
- return semantics->maxExponent + 1;
+ return ::exponentInf(*semantics);
}
APFloatBase::ExponentType IEEEFloat::exponentZero() const {
- return semantics->minExponent - 1;
+ return ::exponentZero(*semantics);
}
void IEEEFloat::makeInf(bool Negative) {
@@ -4435,6 +4458,10 @@ void IEEEFloat::makeInf(bool Negative) {
void IEEEFloat::makeZero(bool Negative) {
category = fcZero;
sign = Negative;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // Merge negative zero to positive because 0b10000...000 is used for NaN
+ sign = false;
+ }
exponent = exponentZero();
APInt::tcSet(significandParts(), 0, partCount());
}
@@ -4477,7 +4504,7 @@ IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
// Clamp to one past the range ends to let normalize handle overlflow.
- X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
+ X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
X.normalize(RoundingMode, lfExactlyZero);
if (X.isNaN())
X.makeQuiet();
@@ -5114,6 +5141,19 @@ APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
consumeError(StatusOrErr.takeError());
}
+FPClassTest APFloat::classify() const {
+ if (isZero())
+ return isNegative() ? fcNegZero : fcPosZero;
+ if (isNormal())
+ return isNegative() ? fcNegNormal : fcPosNormal;
+ if (isDenormal())
+ return isNegative() ? fcNegSubnormal : fcPosSubnormal;
+ if (isInfinity())
+ return isNegative() ? fcNegInf : fcPosInf;
+ assert(isNaN() && "Other class of FP constant");
+ return isSignaling() ? fcSNan : fcQNan;
+}
+
APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
roundingMode RM, bool *losesInfo) {
if (&getSemantics() == &ToSemantics) {