diff options
Diffstat (limited to 'llvm/lib/Support/APFloat.cpp')
-rw-r--r-- | llvm/lib/Support/APFloat.cpp | 446 |
1 files changed, 358 insertions, 88 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 050c37baefb87..569cac790af99 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -69,6 +69,7 @@ namespace llvm { }; static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; + static const fltSemantics semBFloat = {127, -126, 8, 16}; static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; @@ -117,6 +118,8 @@ namespace llvm { switch (S) { case S_IEEEhalf: return IEEEhalf(); + case S_BFloat: + return BFloat(); case S_IEEEsingle: return IEEEsingle(); case S_IEEEdouble: @@ -135,6 +138,8 @@ namespace llvm { APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { if (&Sem == &llvm::APFloat::IEEEhalf()) return S_IEEEhalf; + else if (&Sem == &llvm::APFloat::BFloat()) + return S_BFloat; else if (&Sem == &llvm::APFloat::IEEEsingle()) return S_IEEEsingle; else if (&Sem == &llvm::APFloat::IEEEdouble()) @@ -152,6 +157,9 @@ namespace llvm { const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } + const fltSemantics &APFloatBase::BFloat() { + return semBFloat; + } const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } @@ -171,6 +179,12 @@ namespace llvm { return semPPCDoubleDouble; } + constexpr RoundingMode APFloatBase::rmNearestTiesToEven; + constexpr RoundingMode APFloatBase::rmTowardPositive; + constexpr RoundingMode APFloatBase::rmTowardNegative; + constexpr RoundingMode APFloatBase::rmTowardZero; + constexpr RoundingMode APFloatBase::rmNearestTiesToAway; + /* A tight upper bound on number of parts required to hold the value pow(5, power) is @@ -1323,6 +1337,9 @@ bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, case rmTowardNegative: return sign; + + default: + break; } llvm_unreachable("Invalid rounding mode found"); } @@ -1439,25 +1456,26 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, default: llvm_unreachable(nullptr); + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + assign(rhs); + LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): case PackCategoriesIntoKey(fcNaN, fcInfinity): case PackCategoriesIntoKey(fcNaN, fcNaN): + if (isSignaling()) { + makeQuiet(); + return opInvalidOp; + } + return rhs.isSignaling() ? opInvalidOp : opOK; + case PackCategoriesIntoKey(fcNormal, fcZero): case PackCategoriesIntoKey(fcInfinity, fcNormal): case PackCategoriesIntoKey(fcInfinity, fcZero): return opOK; - case PackCategoriesIntoKey(fcZero, fcNaN): - case PackCategoriesIntoKey(fcNormal, fcNaN): - case PackCategoriesIntoKey(fcInfinity, fcNaN): - // We need to be sure to flip the sign here for subtraction because we - // don't have a separate negate operation so -NaN becomes 0 - NaN here. - sign = rhs.sign ^ subtract; - category = fcNaN; - copySignificand(rhs); - return opOK; - case PackCategoriesIntoKey(fcNormal, fcInfinity): case PackCategoriesIntoKey(fcZero, fcInfinity): category = fcInfinity; @@ -1562,20 +1580,22 @@ IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { default: llvm_unreachable(nullptr); - case PackCategoriesIntoKey(fcNaN, fcZero): - case PackCategoriesIntoKey(fcNaN, fcNormal): - case PackCategoriesIntoKey(fcNaN, fcInfinity): - case PackCategoriesIntoKey(fcNaN, fcNaN): - sign = false; - return opOK; - case PackCategoriesIntoKey(fcZero, fcNaN): case PackCategoriesIntoKey(fcNormal, fcNaN): case PackCategoriesIntoKey(fcInfinity, fcNaN): + assign(rhs); sign = false; - category = fcNaN; - copySignificand(rhs); - return opOK; + LLVM_FALLTHROUGH; + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + sign ^= rhs.sign; // restore the original sign + if (isSignaling()) { + makeQuiet(); + return opInvalidOp; + } + return rhs.isSignaling() ? opInvalidOp : opOK; case PackCategoriesIntoKey(fcNormal, fcInfinity): case PackCategoriesIntoKey(fcInfinity, fcNormal): @@ -1607,15 +1627,20 @@ IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { case PackCategoriesIntoKey(fcZero, fcNaN): case PackCategoriesIntoKey(fcNormal, fcNaN): case PackCategoriesIntoKey(fcInfinity, fcNaN): - category = fcNaN; - copySignificand(rhs); + assign(rhs); + sign = false; LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): case PackCategoriesIntoKey(fcNaN, fcInfinity): case PackCategoriesIntoKey(fcNaN, fcNaN): - sign = false; - LLVM_FALLTHROUGH; + sign ^= rhs.sign; // restore the original sign + if (isSignaling()) { + makeQuiet(); + return opInvalidOp; + } + return rhs.isSignaling() ? opInvalidOp : opOK; + case PackCategoriesIntoKey(fcInfinity, fcZero): case PackCategoriesIntoKey(fcInfinity, fcNormal): case PackCategoriesIntoKey(fcZero, fcInfinity): @@ -1645,21 +1670,62 @@ IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { default: llvm_unreachable(nullptr); + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + assign(rhs); + LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): case PackCategoriesIntoKey(fcNaN, fcInfinity): case PackCategoriesIntoKey(fcNaN, fcNaN): + if (isSignaling()) { + makeQuiet(); + return opInvalidOp; + } + return rhs.isSignaling() ? opInvalidOp : opOK; + case PackCategoriesIntoKey(fcZero, fcInfinity): case PackCategoriesIntoKey(fcZero, fcNormal): case PackCategoriesIntoKey(fcNormal, fcInfinity): return opOK; + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): + makeNaN(); + return opInvalidOp; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + return opOK; + } +} + +IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(nullptr); + case PackCategoriesIntoKey(fcZero, fcNaN): case PackCategoriesIntoKey(fcNormal, fcNaN): case PackCategoriesIntoKey(fcInfinity, fcNaN): - sign = false; - category = fcNaN; - copySignificand(rhs); + assign(rhs); + LLVM_FALLTHROUGH; + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + if (isSignaling()) { + makeQuiet(); + return opInvalidOp; + } + return rhs.isSignaling() ? opInvalidOp : opOK; + + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcInfinity): return opOK; case PackCategoriesIntoKey(fcNormal, fcZero): @@ -1671,7 +1737,7 @@ IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { return opInvalidOp; case PackCategoriesIntoKey(fcNormal, fcNormal): - return opOK; + return opDivByZero; // fake status, indicating this is not a special case } } @@ -1759,40 +1825,108 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, return fs; } -/* Normalized remainder. This is not currently correct in all cases. */ +/* Normalized remainder. */ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { opStatus fs; - IEEEFloat V = *this; unsigned int origSign = sign; - fs = V.divide(rhs, rmNearestTiesToEven); - if (fs == opDivByZero) + // First handle the special cases. + fs = remainderSpecials(rhs); + if (fs != opDivByZero) return fs; - int parts = partCount(); - integerPart *x = new integerPart[parts]; - bool ignored; - fs = V.convertToInteger(makeMutableArrayRef(x, parts), - parts * integerPartWidth, true, rmNearestTiesToEven, - &ignored); - if (fs == opInvalidOp) { - delete[] x; - return fs; + fs = opOK; + + // Make sure the current value is less than twice the denom. If the addition + // did not succeed (an overflow has happened), which means that the finite + // value we currently posses must be less than twice the denom (as we are + // using the same semantics). + IEEEFloat P2 = rhs; + if (P2.add(rhs, rmNearestTiesToEven) == opOK) { + fs = mod(P2); + assert(fs == opOK); } - fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, - rmNearestTiesToEven); - assert(fs==opOK); // should always work + // Lets work with absolute numbers. + IEEEFloat P = rhs; + P.sign = false; + sign = false; - fs = V.multiply(rhs, rmNearestTiesToEven); - assert(fs==opOK || fs==opInexact); // should not overflow or underflow + // + // To calculate the remainder we use the following scheme. + // + // The remainder is defained as follows: + // + // remainder = numer - rquot * denom = x - r * p + // + // Where r is the result of: x/p, rounded toward the nearest integral value + // (with halfway cases rounded toward the even number). + // + // Currently, (after x mod 2p): + // r is the number of 2p's present inside x, which is inherently, an even + // number of p's. + // + // We may split the remaining calculation into 4 options: + // - if x < 0.5p then we round to the nearest number with is 0, and are done. + // - if x == 0.5p then we round to the nearest even number which is 0, and we + // are done as well. + // - if 0.5p < x < p then we round to nearest number which is 1, and we have + // to subtract 1p at least once. + // - if x >= p then we must subtract p at least once, as x must be a + // remainder. + // + // By now, we were done, or we added 1 to r, which in turn, now an odd number. + // + // We can now split the remaining calculation to the following 3 options: + // - if x < 0.5p then we round to the nearest number with is 0, and are done. + // - if x == 0.5p then we round to the nearest even number. As r is odd, we + // must round up to the next even number. so we must subtract p once more. + // - if x > 0.5p (and inherently x < p) then we must round r up to the next + // integral, and subtract p once more. + // - fs = subtract(V, rmNearestTiesToEven); - assert(fs==opOK || fs==opInexact); // likewise + // Extend the semantics to prevent an overflow/underflow or inexact result. + bool losesInfo; + fltSemantics extendedSemantics = *semantics; + extendedSemantics.maxExponent++; + extendedSemantics.minExponent--; + extendedSemantics.precision += 2; + + IEEEFloat VEx = *this; + fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + IEEEFloat PEx = P; + fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + + // It is simpler to work with 2x instead of 0.5p, and we do not need to lose + // any fraction. + fs = VEx.add(VEx, rmNearestTiesToEven); + assert(fs == opOK); + + if (VEx.compare(PEx) == cmpGreaterThan) { + fs = subtract(P, rmNearestTiesToEven); + assert(fs == opOK); + + // Make VEx = this.add(this), but because we have different semantics, we do + // not want to `convert` again, so we just subtract PEx twice (which equals + // to the desired value). + fs = VEx.subtract(PEx, rmNearestTiesToEven); + assert(fs == opOK); + fs = VEx.subtract(PEx, rmNearestTiesToEven); + assert(fs == opOK); + + cmpResult result = VEx.compare(PEx); + if (result == cmpGreaterThan || result == cmpEqual) { + fs = subtract(P, rmNearestTiesToEven); + assert(fs == opOK); + } + } if (isZero()) sign = origSign; // IEEE754 requires this - delete[] x; + else + sign ^= origSign; return fs; } @@ -1860,14 +1994,59 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, return fs; } -/* Rounding-mode corrrect round to integral value. */ +/* Rounding-mode correct round to integral value. */ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { opStatus fs; + if (isInfinity()) + // [IEEE Std 754-2008 6.1]: + // The behavior of infinity in floating-point arithmetic is derived from the + // limiting cases of real arithmetic with operands of arbitrarily + // large magnitude, when such a limit exists. + // ... + // Operations on infinite operands are usually exact and therefore signal no + // exceptions ... + return opOK; + + if (isNaN()) { + if (isSignaling()) { + // [IEEE Std 754-2008 6.2]: + // Under default exception handling, any operation signaling an invalid + // operation exception and for which a floating-point result is to be + // delivered shall deliver a quiet NaN. + makeQuiet(); + // [IEEE Std 754-2008 6.2]: + // Signaling NaNs shall be reserved operands that, under default exception + // handling, signal the invalid operation exception(see 7.2) for every + // general-computational and signaling-computational operation except for + // the conversions described in 5.12. + return opInvalidOp; + } else { + // [IEEE Std 754-2008 6.2]: + // For an operation with quiet NaN inputs, other than maximum and minimum + // operations, if a floating-point result is to be delivered the result + // shall be a quiet NaN which should be one of the input NaNs. + // ... + // Every general-computational and quiet-computational operation involving + // one or more input NaNs, none of them signaling, shall signal no + // exception, except fusedMultiplyAdd might signal the invalid operation + // exception(see 7.2). + return opOK; + } + } + + if (isZero()) { + // [IEEE Std 754-2008 6.3]: + // ... the sign of the result of conversions, the quantize operation, the + // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is + // the sign of the first or only operand. + return opOK; + } + // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate // to +/-Inf. Bail out early instead. - if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) + if (exponent+1 >= (int)semanticsPrecision(*semantics)) return opOK; // The algorithm here is quite simple: we add 2^(p-1), where p is the @@ -1881,19 +2060,18 @@ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { IEEEFloat MagicConstant(*semantics); fs = MagicConstant.convertFromAPInt(IntegerConstant, false, rmNearestTiesToEven); + assert(fs == opOK); MagicConstant.sign = sign; - if (fs != opOK) - return fs; - - // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. + // Preserve the input sign so that we can handle the case of zero result + // correctly. bool inputSign = isNegative(); fs = add(MagicConstant, rounding_mode); - if (fs != opOK && fs != opInexact) - return fs; - fs = subtract(MagicConstant, rounding_mode); + // Current value and 'MagicConstant' are both integers, so the result of the + // subtraction is always exact according to Sterbenz' lemma. + subtract(MagicConstant, rounding_mode); // Restore the input sign. if (inputSign != isNegative()) @@ -2621,24 +2799,70 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { } bool IEEEFloat::convertFromStringSpecials(StringRef str) { + const size_t MIN_NAME_SIZE = 3; + + if (str.size() < MIN_NAME_SIZE) + return false; + if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { makeInf(false); return true; } - if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) { - makeInf(true); - return true; + bool IsNegative = str.front() == '-'; + if (IsNegative) { + str = str.drop_front(); + if (str.size() < MIN_NAME_SIZE) + return false; + + if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { + makeInf(true); + return true; + } } - if (str.equals("nan") || str.equals("NaN")) { - makeNaN(false, false); - return true; + // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. + bool IsSignaling = str.front() == 's' || str.front() == 'S'; + if (IsSignaling) { + str = str.drop_front(); + if (str.size() < MIN_NAME_SIZE) + return false; } - if (str.equals("-nan") || str.equals("-NaN")) { - makeNaN(false, true); - return true; + if (str.startswith("nan") || str.startswith("NaN")) { + str = str.drop_front(3); + + // A NaN without payload. + if (str.empty()) { + makeNaN(IsSignaling, IsNegative); + return true; + } + + // Allow the payload to be inside parentheses. + if (str.front() == '(') { + // Parentheses should be balanced (and not empty). + if (str.size() <= 2 || str.back() != ')') + return false; + + str = str.slice(1, str.size() - 1); + } + + // Determine the payload number's radix. + unsigned Radix = 10; + if (str[0] == '0') { + if (str.size() > 1 && tolower(str[1]) == 'x') { + str = str.drop_front(2); + Radix = 16; + } else + Radix = 8; + } + + // Parse the payload and make the NaN. + APInt Payload; + if (!str.getAsInteger(Radix, Payload)) { + makeNaN(IsSignaling, IsNegative, &Payload); + return true; + } } return false; @@ -3039,6 +3263,33 @@ APInt IEEEFloat::convertFloatAPFloatToAPInt() const { (mysignificand & 0x7fffff))); } +APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics *)&semBFloat); + assert(partCount() == 1); + + uint32_t myexponent, mysignificand; + + if (isFiniteNonZero()) { + myexponent = exponent + 127; // bias + mysignificand = (uint32_t)*significandParts(); + if (myexponent == 1 && !(mysignificand & 0x80)) + myexponent = 0; // denormal + } else if (category == fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category == fcInfinity) { + myexponent = 0xff; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0xff; + mysignificand = (uint32_t)*significandParts(); + } + + return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | + (mysignificand & 0x7f))); +} + APInt IEEEFloat::convertHalfAPFloatToAPInt() const { assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); assert(partCount()==1); @@ -3074,6 +3325,9 @@ APInt IEEEFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) return convertHalfAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semBFloat) + return convertBFloatAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) return convertFloatAPFloatToAPInt(); @@ -3270,6 +3524,37 @@ void IEEEFloat::initFromFloatAPInt(const APInt &api) { } } +void IEEEFloat::initFromBFloatAPInt(const APInt &api) { + assert(api.getBitWidth() == 16); + uint32_t i = (uint32_t)*api.getRawData(); + uint32_t myexponent = (i >> 7) & 0xff; + uint32_t mysignificand = i & 0x7f; + + initialize(&semBFloat); + assert(partCount() == 1); + + sign = i >> 15; + if (myexponent == 0 && mysignificand == 0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent == 0xff && mysignificand == 0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent == 0xff && mysignificand != 0) { + // sign, exponent, significand meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 127; // bias + *significandParts() = mysignificand; + if (myexponent == 0) // denormal + exponent = -126; + else + *significandParts() |= 0x80; // integer bit + } +} + void IEEEFloat::initFromHalfAPInt(const APInt &api) { assert(api.getBitWidth()==16); uint32_t i = (uint32_t)*api.getRawData(); @@ -3308,6 +3593,8 @@ void IEEEFloat::initFromHalfAPInt(const APInt &api) { void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { if (Sem == &semIEEEhalf) return initFromHalfAPInt(api); + if (Sem == &semBFloat) + return initFromBFloatAPInt(api); if (Sem == &semIEEEsingle) return initFromFloatAPInt(api); if (Sem == &semIEEEdouble) @@ -4425,7 +4712,7 @@ bool DoubleAPFloat::isDenormal() const { return getCategory() == fcNormal && (Floats[0].isDenormal() || Floats[1].isDenormal() || // (double)(Hi + Lo) == Hi defines a normal number. - Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual); + Floats[0] != Floats[0] + Floats[1]); } bool DoubleAPFloat::isSmallest() const { @@ -4547,26 +4834,9 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, llvm_unreachable("Unexpected semantics"); } -APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { - if (isIEEE) { - switch (BitWidth) { - case 16: - return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth)); - case 32: - return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth)); - case 64: - return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth)); - case 80: - return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth)); - case 128: - return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth)); - default: - llvm_unreachable("Unknown floating bit width"); - } - } else { - assert(BitWidth == 128); - return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); - } +APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics, + unsigned BitWidth) { + return APFloat(Semantics, APInt::getAllOnesValue(BitWidth)); } void APFloat::print(raw_ostream &OS) const { |