aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Support/APInt.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/APInt.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APInt.cpp2989
1 files changed, 2989 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/APInt.cpp b/contrib/llvm-project/llvm/lib/Support/APInt.cpp
new file mode 100644
index 000000000000..43173311cd80
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/APInt.cpp
@@ -0,0 +1,2989 @@
+//===-- APInt.cpp - Implement APInt class ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integer
+// constant values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/bit.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+#define DEBUG_TYPE "apint"
+
+/// A utility function for allocating memory, checking for allocation failures,
+/// and ensuring the contents are zeroed.
+inline static uint64_t* getClearedMemory(unsigned numWords) {
+ uint64_t *result = new uint64_t[numWords];
+ memset(result, 0, numWords * sizeof(uint64_t));
+ return result;
+}
+
+/// A utility function for allocating memory and checking for allocation
+/// failure. The content is not zeroed.
+inline static uint64_t* getMemory(unsigned numWords) {
+ return new uint64_t[numWords];
+}
+
+/// A utility function that converts a character to a digit.
+inline static unsigned getDigit(char cdigit, uint8_t radix) {
+ unsigned r;
+
+ if (radix == 16 || radix == 36) {
+ r = cdigit - '0';
+ if (r <= 9)
+ return r;
+
+ r = cdigit - 'A';
+ if (r <= radix - 11U)
+ return r + 10;
+
+ r = cdigit - 'a';
+ if (r <= radix - 11U)
+ return r + 10;
+
+ radix = 10;
+ }
+
+ r = cdigit - '0';
+ if (r < radix)
+ return r;
+
+ return -1U;
+}
+
+
+void APInt::initSlowCase(uint64_t val, bool isSigned) {
+ U.pVal = getClearedMemory(getNumWords());
+ U.pVal[0] = val;
+ if (isSigned && int64_t(val) < 0)
+ for (unsigned i = 1; i < getNumWords(); ++i)
+ U.pVal[i] = WORDTYPE_MAX;
+ clearUnusedBits();
+}
+
+void APInt::initSlowCase(const APInt& that) {
+ U.pVal = getMemory(getNumWords());
+ memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
+ assert(BitWidth && "Bitwidth too small");
+ assert(bigVal.data() && "Null pointer detected!");
+ if (isSingleWord())
+ U.VAL = bigVal[0];
+ else {
+ // Get memory, cleared to 0
+ U.pVal = getClearedMemory(getNumWords());
+ // Calculate the number of words to copy
+ unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
+ // Copy the words from bigVal to pVal
+ memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
+ }
+ // Make sure unused high bits are cleared
+ clearUnusedBits();
+}
+
+APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
+ : BitWidth(numBits) {
+ initFromArray(bigVal);
+}
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+ : BitWidth(numBits) {
+ initFromArray(makeArrayRef(bigVal, numWords));
+}
+
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
+ : BitWidth(numbits) {
+ assert(BitWidth && "Bitwidth too small");
+ fromString(numbits, Str, radix);
+}
+
+void APInt::reallocate(unsigned NewBitWidth) {
+ // If the number of words is the same we can just change the width and stop.
+ if (getNumWords() == getNumWords(NewBitWidth)) {
+ BitWidth = NewBitWidth;
+ return;
+ }
+
+ // If we have an allocation, delete it.
+ if (!isSingleWord())
+ delete [] U.pVal;
+
+ // Update BitWidth.
+ BitWidth = NewBitWidth;
+
+ // If we are supposed to have an allocation, create it.
+ if (!isSingleWord())
+ U.pVal = getMemory(getNumWords());
+}
+
+void APInt::AssignSlowCase(const APInt& RHS) {
+ // Don't do anything for X = X
+ if (this == &RHS)
+ return;
+
+ // Adjust the bit width and handle allocations as necessary.
+ reallocate(RHS.getBitWidth());
+
+ // Copy the data.
+ if (isSingleWord())
+ U.VAL = RHS.U.VAL;
+ else
+ memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+/// This method 'profiles' an APInt for use with FoldingSet.
+void APInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger(BitWidth);
+
+ if (isSingleWord()) {
+ ID.AddInteger(U.VAL);
+ return;
+ }
+
+ unsigned NumWords = getNumWords();
+ for (unsigned i = 0; i < NumWords; ++i)
+ ID.AddInteger(U.pVal[i]);
+}
+
+/// Prefix increment operator. Increments the APInt by one.
+APInt& APInt::operator++() {
+ if (isSingleWord())
+ ++U.VAL;
+ else
+ tcIncrement(U.pVal, getNumWords());
+ return clearUnusedBits();
+}
+
+/// Prefix decrement operator. Decrements the APInt by one.
+APInt& APInt::operator--() {
+ if (isSingleWord())
+ --U.VAL;
+ else
+ tcDecrement(U.pVal, getNumWords());
+ return clearUnusedBits();
+}
+
+/// Adds the RHS APint to this APInt.
+/// @returns this, after addition of RHS.
+/// Addition assignment operator.
+APInt& APInt::operator+=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ U.VAL += RHS.U.VAL;
+ else
+ tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
+ return clearUnusedBits();
+}
+
+APInt& APInt::operator+=(uint64_t RHS) {
+ if (isSingleWord())
+ U.VAL += RHS;
+ else
+ tcAddPart(U.pVal, RHS, getNumWords());
+ return clearUnusedBits();
+}
+
+/// Subtracts the RHS APInt from this APInt
+/// @returns this, after subtraction
+/// Subtraction assignment operator.
+APInt& APInt::operator-=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ U.VAL -= RHS.U.VAL;
+ else
+ tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
+ return clearUnusedBits();
+}
+
+APInt& APInt::operator-=(uint64_t RHS) {
+ if (isSingleWord())
+ U.VAL -= RHS;
+ else
+ tcSubtractPart(U.pVal, RHS, getNumWords());
+ return clearUnusedBits();
+}
+
+APInt APInt::operator*(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, U.VAL * RHS.U.VAL);
+
+ APInt Result(getMemory(getNumWords()), getBitWidth());
+
+ tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
+
+ Result.clearUnusedBits();
+ return Result;
+}
+
+void APInt::AndAssignSlowCase(const APInt& RHS) {
+ tcAnd(U.pVal, RHS.U.pVal, getNumWords());
+}
+
+void APInt::OrAssignSlowCase(const APInt& RHS) {
+ tcOr(U.pVal, RHS.U.pVal, getNumWords());
+}
+
+void APInt::XorAssignSlowCase(const APInt& RHS) {
+ tcXor(U.pVal, RHS.U.pVal, getNumWords());
+}
+
+APInt& APInt::operator*=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ *this = *this * RHS;
+ return *this;
+}
+
+APInt& APInt::operator*=(uint64_t RHS) {
+ if (isSingleWord()) {
+ U.VAL *= RHS;
+ } else {
+ unsigned NumWords = getNumWords();
+ tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
+ }
+ return clearUnusedBits();
+}
+
+bool APInt::EqualSlowCase(const APInt& RHS) const {
+ return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
+}
+
+int APInt::compare(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord())
+ return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
+
+ return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
+}
+
+int APInt::compareSigned(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord()) {
+ int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
+ int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
+ return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
+ }
+
+ bool lhsNeg = isNegative();
+ bool rhsNeg = RHS.isNegative();
+
+ // If the sign bits don't match, then (LHS < RHS) if LHS is negative
+ if (lhsNeg != rhsNeg)
+ return lhsNeg ? -1 : 1;
+
+ // Otherwise we can just use an unsigned comparison, because even negative
+ // numbers compare correctly this way if both have the same signed-ness.
+ return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
+}
+
+void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
+ unsigned loWord = whichWord(loBit);
+ unsigned hiWord = whichWord(hiBit);
+
+ // Create an initial mask for the low word with zeros below loBit.
+ uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
+
+ // If hiBit is not aligned, we need a high mask.
+ unsigned hiShiftAmt = whichBit(hiBit);
+ if (hiShiftAmt != 0) {
+ // Create a high mask with zeros above hiBit.
+ uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
+ // If loWord and hiWord are equal, then we combine the masks. Otherwise,
+ // set the bits in hiWord.
+ if (hiWord == loWord)
+ loMask &= hiMask;
+ else
+ U.pVal[hiWord] |= hiMask;
+ }
+ // Apply the mask to the low word.
+ U.pVal[loWord] |= loMask;
+
+ // Fill any words between loWord and hiWord with all ones.
+ for (unsigned word = loWord + 1; word < hiWord; ++word)
+ U.pVal[word] = WORDTYPE_MAX;
+}
+
+/// Toggle every bit to its opposite value.
+void APInt::flipAllBitsSlowCase() {
+ tcComplement(U.pVal, getNumWords());
+ clearUnusedBits();
+}
+
+/// Toggle a given bit to its opposite value whose position is given
+/// as "bitPosition".
+/// Toggles a given bit to its opposite value.
+void APInt::flipBit(unsigned bitPosition) {
+ assert(bitPosition < BitWidth && "Out of the bit-width range!");
+ if ((*this)[bitPosition]) clearBit(bitPosition);
+ else setBit(bitPosition);
+}
+
+void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
+ unsigned subBitWidth = subBits.getBitWidth();
+ assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
+ "Illegal bit insertion");
+
+ // Insertion is a direct copy.
+ if (subBitWidth == BitWidth) {
+ *this = subBits;
+ return;
+ }
+
+ // Single word result can be done as a direct bitmask.
+ if (isSingleWord()) {
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ U.VAL &= ~(mask << bitPosition);
+ U.VAL |= (subBits.U.VAL << bitPosition);
+ return;
+ }
+
+ unsigned loBit = whichBit(bitPosition);
+ unsigned loWord = whichWord(bitPosition);
+ unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
+
+ // Insertion within a single word can be done as a direct bitmask.
+ if (loWord == hi1Word) {
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ U.pVal[loWord] &= ~(mask << loBit);
+ U.pVal[loWord] |= (subBits.U.VAL << loBit);
+ return;
+ }
+
+ // Insert on word boundaries.
+ if (loBit == 0) {
+ // Direct copy whole words.
+ unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
+ memcpy(U.pVal + loWord, subBits.getRawData(),
+ numWholeSubWords * APINT_WORD_SIZE);
+
+ // Mask+insert remaining bits.
+ unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
+ if (remainingBits != 0) {
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
+ U.pVal[hi1Word] &= ~mask;
+ U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
+ }
+ return;
+ }
+
+ // General case - set/clear individual bits in dst based on src.
+ // TODO - there is scope for optimization here, but at the moment this code
+ // path is barely used so prefer readability over performance.
+ for (unsigned i = 0; i != subBitWidth; ++i) {
+ if (subBits[i])
+ setBit(bitPosition + i);
+ else
+ clearBit(bitPosition + i);
+ }
+}
+
+APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
+ assert(numBits > 0 && "Can't extract zero bits");
+ assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
+ "Illegal bit extraction");
+
+ if (isSingleWord())
+ return APInt(numBits, U.VAL >> bitPosition);
+
+ unsigned loBit = whichBit(bitPosition);
+ unsigned loWord = whichWord(bitPosition);
+ unsigned hiWord = whichWord(bitPosition + numBits - 1);
+
+ // Single word result extracting bits from a single word source.
+ if (loWord == hiWord)
+ return APInt(numBits, U.pVal[loWord] >> loBit);
+
+ // Extracting bits that start on a source word boundary can be done
+ // as a fast memory copy.
+ if (loBit == 0)
+ return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
+
+ // General case - shift + copy source words directly into place.
+ APInt Result(numBits, 0);
+ unsigned NumSrcWords = getNumWords();
+ unsigned NumDstWords = Result.getNumWords();
+
+ uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
+ for (unsigned word = 0; word < NumDstWords; ++word) {
+ uint64_t w0 = U.pVal[loWord + word];
+ uint64_t w1 =
+ (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
+ DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
+ }
+
+ return Result.clearUnusedBits();
+}
+
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
+ assert(!str.empty() && "Invalid string length");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
+
+ size_t slen = str.size();
+
+ // Each computation below needs to know if it's negative.
+ StringRef::iterator p = str.begin();
+ unsigned isNegative = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String is only a sign, needs a value.");
+ }
+
+ // For radixes of power-of-two values, the bits required is accurately and
+ // easily computed
+ if (radix == 2)
+ return slen + isNegative;
+ if (radix == 8)
+ return slen * 3 + isNegative;
+ if (radix == 16)
+ return slen * 4 + isNegative;
+
+ // FIXME: base 36
+
+ // This is grossly inefficient but accurate. We could probably do something
+ // with a computation of roughly slen*64/20 and then adjust by the value of
+ // the first few digits. But, I'm not sure how accurate that could be.
+
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large. This avoids the assertion in the constructor. This
+ // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+ // bits in that case.
+ unsigned sufficient
+ = radix == 10? (slen == 1 ? 4 : slen * 64/18)
+ : (slen == 1 ? 7 : slen * 16/3);
+
+ // Convert to the actual binary value.
+ APInt tmp(sufficient, StringRef(p, slen), radix);
+
+ // Compute how many bits are required. If the log is infinite, assume we need
+ // just bit. If the log is exact and value is negative, then the value is
+ // MinSignedValue with (log + 1) bits.
+ unsigned log = tmp.logBase2();
+ if (log == (unsigned)-1) {
+ return isNegative + 1;
+ } else if (isNegative && tmp.isPowerOf2()) {
+ return isNegative + log;
+ } else {
+ return isNegative + log + 1;
+ }
+}
+
+hash_code llvm::hash_value(const APInt &Arg) {
+ if (Arg.isSingleWord())
+ return hash_combine(Arg.U.VAL);
+
+ return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
+}
+
+bool APInt::isSplat(unsigned SplatSizeInBits) const {
+ assert(getBitWidth() % SplatSizeInBits == 0 &&
+ "SplatSizeInBits must divide width!");
+ // We can check that all parts of an integer are equal by making use of a
+ // little trick: rotate and check if it's still the same value.
+ return *this == rotl(SplatSizeInBits);
+}
+
+/// This function returns the high "numBits" bits of this APInt.
+APInt APInt::getHiBits(unsigned numBits) const {
+ return this->lshr(BitWidth - numBits);
+}
+
+/// This function returns the low "numBits" bits of this APInt.
+APInt APInt::getLoBits(unsigned numBits) const {
+ APInt Result(getLowBitsSet(BitWidth, numBits));
+ Result &= *this;
+ return Result;
+}
+
+/// Return a value containing V broadcasted over NewLen bits.
+APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
+ assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
+
+ APInt Val = V.zextOrSelf(NewLen);
+ for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
+ Val |= Val << I;
+
+ return Val;
+}
+
+unsigned APInt::countLeadingZerosSlowCase() const {
+ unsigned Count = 0;
+ for (int i = getNumWords()-1; i >= 0; --i) {
+ uint64_t V = U.pVal[i];
+ if (V == 0)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += llvm::countLeadingZeros(V);
+ break;
+ }
+ }
+ // Adjust for unused bits in the most significant word (they are zero).
+ unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
+ Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
+ return Count;
+}
+
+unsigned APInt::countLeadingOnesSlowCase() const {
+ unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
+ unsigned shift;
+ if (!highWordBits) {
+ highWordBits = APINT_BITS_PER_WORD;
+ shift = 0;
+ } else {
+ shift = APINT_BITS_PER_WORD - highWordBits;
+ }
+ int i = getNumWords() - 1;
+ unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
+ if (Count == highWordBits) {
+ for (i--; i >= 0; --i) {
+ if (U.pVal[i] == WORDTYPE_MAX)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += llvm::countLeadingOnes(U.pVal[i]);
+ break;
+ }
+ }
+ }
+ return Count;
+}
+
+unsigned APInt::countTrailingZerosSlowCase() const {
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && U.pVal[i] == 0; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += llvm::countTrailingZeros(U.pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countTrailingOnesSlowCase() const {
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += llvm::countTrailingOnes(U.pVal[i]);
+ assert(Count <= BitWidth);
+ return Count;
+}
+
+unsigned APInt::countPopulationSlowCase() const {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < getNumWords(); ++i)
+ Count += llvm::countPopulation(U.pVal[i]);
+ return Count;
+}
+
+bool APInt::intersectsSlowCase(const APInt &RHS) const {
+ for (unsigned i = 0, e = getNumWords(); i != e; ++i)
+ if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
+ return true;
+
+ return false;
+}
+
+bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
+ for (unsigned i = 0, e = getNumWords(); i != e; ++i)
+ if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
+ return false;
+
+ return true;
+}
+
+APInt APInt::byteSwap() const {
+ assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+ if (BitWidth == 16)
+ return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
+ if (BitWidth == 32)
+ return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
+ if (BitWidth == 48) {
+ unsigned Tmp1 = unsigned(U.VAL >> 16);
+ Tmp1 = ByteSwap_32(Tmp1);
+ uint16_t Tmp2 = uint16_t(U.VAL);
+ Tmp2 = ByteSwap_16(Tmp2);
+ return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
+ }
+ if (BitWidth == 64)
+ return APInt(BitWidth, ByteSwap_64(U.VAL));
+
+ APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
+ for (unsigned I = 0, N = getNumWords(); I != N; ++I)
+ Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
+ if (Result.BitWidth != BitWidth) {
+ Result.lshrInPlace(Result.BitWidth - BitWidth);
+ Result.BitWidth = BitWidth;
+ }
+ return Result;
+}
+
+APInt APInt::reverseBits() const {
+ switch (BitWidth) {
+ case 64:
+ return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
+ case 32:
+ return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
+ case 16:
+ return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
+ case 8:
+ return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
+ default:
+ break;
+ }
+
+ APInt Val(*this);
+ APInt Reversed(BitWidth, 0);
+ unsigned S = BitWidth;
+
+ for (; Val != 0; Val.lshrInPlace(1)) {
+ Reversed <<= 1;
+ Reversed |= Val[0];
+ --S;
+ }
+
+ Reversed <<= S;
+ return Reversed;
+}
+
+APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
+ // Fast-path a common case.
+ if (A == B) return A;
+
+ // Corner cases: if either operand is zero, the other is the gcd.
+ if (!A) return B;
+ if (!B) return A;
+
+ // Count common powers of 2 and remove all other powers of 2.
+ unsigned Pow2;
+ {
+ unsigned Pow2_A = A.countTrailingZeros();
+ unsigned Pow2_B = B.countTrailingZeros();
+ if (Pow2_A > Pow2_B) {
+ A.lshrInPlace(Pow2_A - Pow2_B);
+ Pow2 = Pow2_B;
+ } else if (Pow2_B > Pow2_A) {
+ B.lshrInPlace(Pow2_B - Pow2_A);
+ Pow2 = Pow2_A;
+ } else {
+ Pow2 = Pow2_A;
+ }
+ }
+
+ // Both operands are odd multiples of 2^Pow_2:
+ //
+ // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
+ //
+ // This is a modified version of Stein's algorithm, taking advantage of
+ // efficient countTrailingZeros().
+ while (A != B) {
+ if (A.ugt(B)) {
+ A -= B;
+ A.lshrInPlace(A.countTrailingZeros() - Pow2);
+ } else {
+ B -= A;
+ B.lshrInPlace(B.countTrailingZeros() - Pow2);
+ }
+ }
+
+ return A;
+}
+
+APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
+ uint64_t I = bit_cast<uint64_t>(Double);
+
+ // Get the sign bit from the highest order bit
+ bool isNeg = I >> 63;
+
+ // Get the 11-bit exponent and adjust for the 1023 bit bias
+ int64_t exp = ((I >> 52) & 0x7ff) - 1023;
+
+ // If the exponent is negative, the value is < 0 so just return 0.
+ if (exp < 0)
+ return APInt(width, 0u);
+
+ // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+ uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
+
+ // If the exponent doesn't shift all bits out of the mantissa
+ if (exp < 52)
+ return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
+ APInt(width, mantissa >> (52 - exp));
+
+ // If the client didn't provide enough bits for us to shift the mantissa into
+ // then the result is undefined, just return 0
+ if (width <= exp - 52)
+ return APInt(width, 0);
+
+ // Otherwise, we have to shift the mantissa bits up to the right location
+ APInt Tmp(width, mantissa);
+ Tmp <<= (unsigned)exp - 52;
+ return isNeg ? -Tmp : Tmp;
+}
+
+/// This function converts this APInt to a double.
+/// The layout for double is as following (IEEE Standard 754):
+/// --------------------------------------
+/// | Sign Exponent Fraction Bias |
+/// |-------------------------------------- |
+/// | 1[63] 11[62-52] 52[51-00] 1023 |
+/// --------------------------------------
+double APInt::roundToDouble(bool isSigned) const {
+
+ // Handle the simple case where the value is contained in one uint64_t.
+ // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
+ if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+ if (isSigned) {
+ int64_t sext = SignExtend64(getWord(0), BitWidth);
+ return double(sext);
+ } else
+ return double(getWord(0));
+ }
+
+ // Determine if the value is negative.
+ bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+ // Construct the absolute value if we're negative.
+ APInt Tmp(isNeg ? -(*this) : (*this));
+
+ // Figure out how many bits we're using.
+ unsigned n = Tmp.getActiveBits();
+
+ // The exponent (without bias normalization) is just the number of bits
+ // we are using. Note that the sign bit is gone since we constructed the
+ // absolute value.
+ uint64_t exp = n;
+
+ // Return infinity for exponent overflow
+ if (exp > 1023) {
+ if (!isSigned || !isNeg)
+ return std::numeric_limits<double>::infinity();
+ else
+ return -std::numeric_limits<double>::infinity();
+ }
+ exp += 1023; // Increment for 1023 bias
+
+ // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+ // extract the high 52 bits from the correct words in pVal.
+ uint64_t mantissa;
+ unsigned hiWord = whichWord(n-1);
+ if (hiWord == 0) {
+ mantissa = Tmp.U.pVal[0];
+ if (n > 52)
+ mantissa >>= n - 52; // shift down, we want the top 52 bits.
+ } else {
+ assert(hiWord > 0 && "huh?");
+ uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+ uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+ mantissa = hibits | lobits;
+ }
+
+ // The leading bit of mantissa is implicit, so get rid of it.
+ uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+ uint64_t I = sign | (exp << 52) | mantissa;
+ return bit_cast<double>(I);
+}
+
+// Truncate to new width.
+APInt APInt::trunc(unsigned width) const {
+ assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width && "Can't truncate to 0 bits");
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, getRawData()[0]);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+ Result.U.pVal[i] = U.pVal[i];
+
+ // Truncate and copy any partial word.
+ unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.U.pVal[i] = U.pVal[i] << bits >> bits;
+
+ return Result;
+}
+
+// Sign extend to a new width.
+APInt APInt::sext(unsigned Width) const {
+ assert(Width > BitWidth && "Invalid APInt SignExtend request");
+
+ if (Width <= APINT_BITS_PER_WORD)
+ return APInt(Width, SignExtend64(U.VAL, BitWidth));
+
+ APInt Result(getMemory(getNumWords(Width)), Width);
+
+ // Copy words.
+ std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
+
+ // Sign extend the last word since there may be unused bits in the input.
+ Result.U.pVal[getNumWords() - 1] =
+ SignExtend64(Result.U.pVal[getNumWords() - 1],
+ ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
+
+ // Fill with sign bits.
+ std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
+ (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
+ Result.clearUnusedBits();
+ return Result;
+}
+
+// Zero extend to a new width.
+APInt APInt::zext(unsigned width) const {
+ assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, U.VAL);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy words.
+ std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
+
+ // Zero remaining words.
+ std::memset(Result.U.pVal + getNumWords(), 0,
+ (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
+
+ return Result;
+}
+
+APInt APInt::zextOrTrunc(unsigned width) const {
+ if (BitWidth < width)
+ return zext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+APInt APInt::sextOrTrunc(unsigned width) const {
+ if (BitWidth < width)
+ return sext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+APInt APInt::zextOrSelf(unsigned width) const {
+ if (BitWidth < width)
+ return zext(width);
+ return *this;
+}
+
+APInt APInt::sextOrSelf(unsigned width) const {
+ if (BitWidth < width)
+ return sext(width);
+ return *this;
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// Arithmetic right-shift function.
+void APInt::ashrInPlace(const APInt &shiftAmt) {
+ ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// Arithmetic right-shift function.
+void APInt::ashrSlowCase(unsigned ShiftAmt) {
+ // Don't bother performing a no-op shift.
+ if (!ShiftAmt)
+ return;
+
+ // Save the original sign bit for later.
+ bool Negative = isNegative();
+
+ // WordShift is the inter-part shift; BitShift is intra-part shift.
+ unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
+ unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
+
+ unsigned WordsToMove = getNumWords() - WordShift;
+ if (WordsToMove != 0) {
+ // Sign extend the last word to fill in the unused bits.
+ U.pVal[getNumWords() - 1] = SignExtend64(
+ U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
+
+ // Fastpath for moving by whole words.
+ if (BitShift == 0) {
+ std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
+ } else {
+ // Move the words containing significant bits.
+ for (unsigned i = 0; i != WordsToMove - 1; ++i)
+ U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
+ (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
+
+ // Handle the last word which has no high bits to copy.
+ U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
+ // Sign extend one more time.
+ U.pVal[WordsToMove - 1] =
+ SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
+ }
+ }
+
+ // Fill in the remainder based on the original sign.
+ std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
+ WordShift * APINT_WORD_SIZE);
+ clearUnusedBits();
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// Logical right-shift function.
+void APInt::lshrInPlace(const APInt &shiftAmt) {
+ lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// Logical right-shift function.
+void APInt::lshrSlowCase(unsigned ShiftAmt) {
+ tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
+}
+
+/// Left-shift this APInt by shiftAmt.
+/// Left-shift function.
+APInt &APInt::operator<<=(const APInt &shiftAmt) {
+ // It's undefined behavior in C to shift by BitWidth or greater.
+ *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
+ return *this;
+}
+
+void APInt::shlSlowCase(unsigned ShiftAmt) {
+ tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
+ clearUnusedBits();
+}
+
+// Calculate the rotate amount modulo the bit width.
+static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
+ unsigned rotBitWidth = rotateAmt.getBitWidth();
+ APInt rot = rotateAmt;
+ if (rotBitWidth < BitWidth) {
+ // Extend the rotate APInt, so that the urem doesn't divide by 0.
+ // e.g. APInt(1, 32) would give APInt(1, 0).
+ rot = rotateAmt.zext(BitWidth);
+ }
+ rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
+ return rot.getLimitedValue(BitWidth);
+}
+
+APInt APInt::rotl(const APInt &rotateAmt) const {
+ return rotl(rotateModulo(BitWidth, rotateAmt));
+}
+
+APInt APInt::rotl(unsigned rotateAmt) const {
+ rotateAmt %= BitWidth;
+ if (rotateAmt == 0)
+ return *this;
+ return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
+}
+
+APInt APInt::rotr(const APInt &rotateAmt) const {
+ return rotr(rotateModulo(BitWidth, rotateAmt));
+}
+
+APInt APInt::rotr(unsigned rotateAmt) const {
+ rotateAmt %= BitWidth;
+ if (rotateAmt == 0)
+ return *this;
+ return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+APInt APInt::sqrt() const {
+
+ // Determine the magnitude of the value.
+ unsigned magnitude = getActiveBits();
+
+ // Use a fast table for some small values. This also gets rid of some
+ // rounding errors in libc sqrt for small values.
+ if (magnitude <= 5) {
+ static const uint8_t results[32] = {
+ /* 0 */ 0,
+ /* 1- 2 */ 1, 1,
+ /* 3- 6 */ 2, 2, 2, 2,
+ /* 7-12 */ 3, 3, 3, 3, 3, 3,
+ /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+ /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ /* 31 */ 6
+ };
+ return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
+ }
+
+ // If the magnitude of the value fits in less than 52 bits (the precision of
+ // an IEEE double precision floating point value), then we can use the
+ // libc sqrt function which will probably use a hardware sqrt computation.
+ // This should be faster than the algorithm below.
+ if (magnitude < 52) {
+ return APInt(BitWidth,
+ uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
+ : U.pVal[0])))));
+ }
+
+ // Okay, all the short cuts are exhausted. We must compute it. The following
+ // is a classical Babylonian method for computing the square root. This code
+ // was adapted to APInt from a wikipedia article on such computations.
+ // See http://www.wikipedia.org/ and go to the page named
+ // Calculate_an_integer_square_root.
+ unsigned nbits = BitWidth, i = 4;
+ APInt testy(BitWidth, 16);
+ APInt x_old(BitWidth, 1);
+ APInt x_new(BitWidth, 0);
+ APInt two(BitWidth, 2);
+
+ // Select a good starting value using binary logarithms.
+ for (;; i += 2, testy = testy.shl(2))
+ if (i >= nbits || this->ule(testy)) {
+ x_old = x_old.shl(i / 2);
+ break;
+ }
+
+ // Use the Babylonian method to arrive at the integer square root:
+ for (;;) {
+ x_new = (this->udiv(x_old) + x_old).udiv(two);
+ if (x_old.ule(x_new))
+ break;
+ x_old = x_new;
+ }
+
+ // Make sure we return the closest approximation
+ // NOTE: The rounding calculation below is correct. It will produce an
+ // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+ // determined to be a rounding issue with pari/gp as it begins to use a
+ // floating point representation after 192 bits. There are no discrepancies
+ // between this algorithm and pari/gp for bit widths < 192 bits.
+ APInt square(x_old * x_old);
+ APInt nextSquare((x_old + 1) * (x_old +1));
+ if (this->ult(square))
+ return x_old;
+ assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
+ APInt midpoint((nextSquare - square).udiv(two));
+ APInt offset(*this - square);
+ if (offset.ult(midpoint))
+ return x_old;
+ return x_old + 1;
+}
+
+/// Computes the multiplicative inverse of this APInt for a given modulo. The
+/// iterative extended Euclidean algorithm is used to solve for this value,
+/// however we simplify it to speed up calculating only the inverse, and take
+/// advantage of div+rem calculations. We also use some tricks to avoid copying
+/// (potentially large) APInts around.
+/// WARNING: a value of '0' may be returned,
+/// signifying that no multiplicative inverse exists!
+APInt APInt::multiplicativeInverse(const APInt& modulo) const {
+ assert(ult(modulo) && "This APInt must be smaller than the modulo");
+
+ // Using the properties listed at the following web page (accessed 06/21/08):
+ // http://www.numbertheory.org/php/euclid.html
+ // (especially the properties numbered 3, 4 and 9) it can be proved that
+ // BitWidth bits suffice for all the computations in the algorithm implemented
+ // below. More precisely, this number of bits suffice if the multiplicative
+ // inverse exists, but may not suffice for the general extended Euclidean
+ // algorithm.
+
+ APInt r[2] = { modulo, *this };
+ APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
+ APInt q(BitWidth, 0);
+
+ unsigned i;
+ for (i = 0; r[i^1] != 0; i ^= 1) {
+ // An overview of the math without the confusing bit-flipping:
+ // q = r[i-2] / r[i-1]
+ // r[i] = r[i-2] % r[i-1]
+ // t[i] = t[i-2] - t[i-1] * q
+ udivrem(r[i], r[i^1], q, r[i]);
+ t[i] -= t[i^1] * q;
+ }
+
+ // If this APInt and the modulo are not coprime, there is no multiplicative
+ // inverse, so return 0. We check this by looking at the next-to-last
+ // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
+ // algorithm.
+ if (r[i] != 1)
+ return APInt(BitWidth, 0);
+
+ // The next-to-last t is the multiplicative inverse. However, we are
+ // interested in a positive inverse. Calculate a positive one from a negative
+ // one if necessary. A simple addition of the modulo suffices because
+ // abs(t[i]) is known to be less than *this/2 (see the link above).
+ if (t[i].isNegative())
+ t[i] += modulo;
+
+ return std::move(t[i]);
+}
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts. Requires that
+/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., chapter 10.
+APInt::ms APInt::magic() const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt ad, anc, delta, q1, r1, q2, r2, t;
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ struct ms mag;
+
+ ad = d.abs();
+ t = signedMin + (d.lshr(d.getBitWidth() - 1));
+ anc = t - 1 - t.urem(ad); // absolute value of nc
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
+ r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
+ r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = q1<<1; // update q1 = 2p/abs(nc)
+ r1 = r1<<1; // update r1 = rem(2p/abs(nc))
+ if (r1.uge(anc)) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = q2<<1; // update q2 = 2p/abs(d)
+ r2 = r2<<1; // update r2 = rem(2p/abs(d))
+ if (r2.uge(ad)) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1.ult(delta) || (q1 == delta && r1 == 0));
+
+ mag.m = q2 + 1;
+ if (d.isNegative()) mag.m = -mag.m; // resulting magic number
+ mag.s = p - d.getBitWidth(); // resulting shift
+ return mag;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+APInt::mu APInt::magicu(unsigned LeadingZeros) const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+ nc = allOnes - (allOnes - d).urem(d);
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
+ r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
+ r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1.uge(nc - r1)) {
+ q1 = q1 + q1 + 1; // update q1
+ r1 = r1 + r1 - nc; // update r1
+ }
+ else {
+ q1 = q1+q1; // update q1
+ r1 = r1+r1; // update r1
+ }
+ if ((r2 + 1).uge(d - r2)) {
+ if (q2.uge(signedMax)) magu.a = 1;
+ q2 = q2+q2 + 1; // update q2
+ r2 = r2+r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2.uge(signedMin)) magu.a = 1;
+ q2 = q2+q2; // update q2
+ r2 = r2+r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < d.getBitWidth()*2 &&
+ (q1.ult(delta) || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - d.getBitWidth(); // resulting shift
+ return magu;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
+ unsigned m, unsigned n) {
+ assert(u && "Must provide dividend");
+ assert(v && "Must provide divisor");
+ assert(q && "Must provide quotient");
+ assert(u != v && u != q && v != q && "Must use different memory");
+ assert(n>1 && "n must be > 1");
+
+ // b denotes the base of the number system. In our case b is 2^32.
+ const uint64_t b = uint64_t(1) << 32;
+
+// The DEBUG macros here tend to be spam in the debug output if you're not
+// debugging this code. Disable them unless KNUTH_DEBUG is defined.
+#ifdef KNUTH_DEBUG
+#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
+#else
+#define DEBUG_KNUTH(X) do {} while(false)
+#endif
+
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << " by");
+ DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
+ DEBUG_KNUTH(dbgs() << '\n');
+ // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+ // u and v by d. Note that we have taken Knuth's advice here to use a power
+ // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+ // 2 allows us to shift instead of multiply and it is easy to determine the
+ // shift amount from the leading zeros. We are basically normalizing the u
+ // and v so that its high bits are shifted to the top of v's range without
+ // overflow. Note that this can require an extra word in u so that u must
+ // be of length m+n+1.
+ unsigned shift = countLeadingZeros(v[n-1]);
+ uint32_t v_carry = 0;
+ uint32_t u_carry = 0;
+ if (shift) {
+ for (unsigned i = 0; i < m+n; ++i) {
+ uint32_t u_tmp = u[i] >> (32 - shift);
+ u[i] = (u[i] << shift) | u_carry;
+ u_carry = u_tmp;
+ }
+ for (unsigned i = 0; i < n; ++i) {
+ uint32_t v_tmp = v[i] >> (32 - shift);
+ v[i] = (v[i] << shift) | v_carry;
+ v_carry = v_tmp;
+ }
+ }
+ u[m+n] = u_carry;
+
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << " by");
+ DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
+ DEBUG_KNUTH(dbgs() << '\n');
+
+ // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
+ int j = m;
+ do {
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+ // D3. [Calculate q'.].
+ // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+ // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+ // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+ // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
+ // on v[n-2] determines at high speed most of the cases in which the trial
+ // value qp is one too large, and it eliminates all cases where qp is two
+ // too large.
+ uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+ uint64_t qp = dividend / v[n-1];
+ uint64_t rp = dividend % v[n-1];
+ if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+ qp--;
+ rp += v[n-1];
+ if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+ qp--;
+ }
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+ // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+ // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+ // consists of a simple multiplication by a one-place number, combined with
+ // a subtraction.
+ // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+ // this step is actually negative, (u[j+n]...u[j]) should be left as the
+ // true value plus b**(n+1), namely as the b's complement of
+ // the true value, and a "borrow" to the left should be remembered.
+ int64_t borrow = 0;
+ for (unsigned i = 0; i < n; ++i) {
+ uint64_t p = uint64_t(qp) * uint64_t(v[i]);
+ int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
+ u[j+i] = Lo_32(subres);
+ borrow = Hi_32(p) - Hi_32(subres);
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
+ << ", borrow = " << borrow << '\n');
+ }
+ bool isNeg = u[j+n] < borrow;
+ u[j+n] -= Lo_32(borrow);
+
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << '\n');
+
+ // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+ // negative, go to step D6; otherwise go on to step D7.
+ q[j] = Lo_32(qp);
+ if (isNeg) {
+ // D6. [Add back]. The probability that this step is necessary is very
+ // small, on the order of only 2/b. Make sure that test data accounts for
+ // this possibility. Decrease q[j] by 1
+ q[j]--;
+ // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+ // A carry will occur to the left of u[j+n], and it should be ignored
+ // since it cancels with the borrow that occurred in D4.
+ bool carry = false;
+ for (unsigned i = 0; i < n; i++) {
+ uint32_t limit = std::min(u[j+i],v[i]);
+ u[j+i] += v[i] + carry;
+ carry = u[j+i] < limit || (carry && u[j+i] == limit);
+ }
+ u[j+n] += carry;
+ }
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+
+ // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
+ } while (--j >= 0);
+
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
+ DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
+ DEBUG_KNUTH(dbgs() << '\n');
+
+ // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+ // remainder may be obtained by dividing u[...] by d. If r is non-null we
+ // compute the remainder (urem uses this).
+ if (r) {
+ // The value d is expressed by the "shift" value above since we avoided
+ // multiplication by d by using a shift left. So, all we have to do is
+ // shift right here.
+ if (shift) {
+ uint32_t carry = 0;
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = (u[i] >> shift) | carry;
+ carry = u[i] << (32 - shift);
+ DEBUG_KNUTH(dbgs() << " " << r[i]);
+ }
+ } else {
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = u[i];
+ DEBUG_KNUTH(dbgs() << " " << r[i]);
+ }
+ }
+ DEBUG_KNUTH(dbgs() << '\n');
+ }
+ DEBUG_KNUTH(dbgs() << '\n');
+}
+
+void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
+ unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
+ assert(lhsWords >= rhsWords && "Fractional result");
+
+ // First, compose the values into an array of 32-bit words instead of
+ // 64-bit words. This is a necessity of both the "short division" algorithm
+ // and the Knuth "classical algorithm" which requires there to be native
+ // operations for +, -, and * on an m bit value with an m*2 bit result. We
+ // can't use 64-bit operands here because we don't have native results of
+ // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+ // work on large-endian machines.
+ unsigned n = rhsWords * 2;
+ unsigned m = (lhsWords * 2) - n;
+
+ // Allocate space for the temporary values we need either on the stack, if
+ // it will fit, or on the heap if it won't.
+ uint32_t SPACE[128];
+ uint32_t *U = nullptr;
+ uint32_t *V = nullptr;
+ uint32_t *Q = nullptr;
+ uint32_t *R = nullptr;
+ if ((Remainder?4:3)*n+2*m+1 <= 128) {
+ U = &SPACE[0];
+ V = &SPACE[m+n+1];
+ Q = &SPACE[(m+n+1) + n];
+ if (Remainder)
+ R = &SPACE[(m+n+1) + n + (m+n)];
+ } else {
+ U = new uint32_t[m + n + 1];
+ V = new uint32_t[n];
+ Q = new uint32_t[m+n];
+ if (Remainder)
+ R = new uint32_t[n];
+ }
+
+ // Initialize the dividend
+ memset(U, 0, (m+n+1)*sizeof(uint32_t));
+ for (unsigned i = 0; i < lhsWords; ++i) {
+ uint64_t tmp = LHS[i];
+ U[i * 2] = Lo_32(tmp);
+ U[i * 2 + 1] = Hi_32(tmp);
+ }
+ U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+ // Initialize the divisor
+ memset(V, 0, (n)*sizeof(uint32_t));
+ for (unsigned i = 0; i < rhsWords; ++i) {
+ uint64_t tmp = RHS[i];
+ V[i * 2] = Lo_32(tmp);
+ V[i * 2 + 1] = Hi_32(tmp);
+ }
+
+ // initialize the quotient and remainder
+ memset(Q, 0, (m+n) * sizeof(uint32_t));
+ if (Remainder)
+ memset(R, 0, n * sizeof(uint32_t));
+
+ // Now, adjust m and n for the Knuth division. n is the number of words in
+ // the divisor. m is the number of words by which the dividend exceeds the
+ // divisor (i.e. m+n is the length of the dividend). These sizes must not
+ // contain any zero words or the Knuth algorithm fails.
+ for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+ n--;
+ m++;
+ }
+ for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+ m--;
+
+ // If we're left with only a single word for the divisor, Knuth doesn't work
+ // so we implement the short division algorithm here. This is much simpler
+ // and faster because we are certain that we can divide a 64-bit quantity
+ // by a 32-bit quantity at hardware speed and short division is simply a
+ // series of such operations. This is just like doing short division but we
+ // are using base 2^32 instead of base 10.
+ assert(n != 0 && "Divide by zero?");
+ if (n == 1) {
+ uint32_t divisor = V[0];
+ uint32_t remainder = 0;
+ for (int i = m; i >= 0; i--) {
+ uint64_t partial_dividend = Make_64(remainder, U[i]);
+ if (partial_dividend == 0) {
+ Q[i] = 0;
+ remainder = 0;
+ } else if (partial_dividend < divisor) {
+ Q[i] = 0;
+ remainder = Lo_32(partial_dividend);
+ } else if (partial_dividend == divisor) {
+ Q[i] = 1;
+ remainder = 0;
+ } else {
+ Q[i] = Lo_32(partial_dividend / divisor);
+ remainder = Lo_32(partial_dividend - (Q[i] * divisor));
+ }
+ }
+ if (R)
+ R[0] = remainder;
+ } else {
+ // Now we're ready to invoke the Knuth classical divide algorithm. In this
+ // case n > 1.
+ KnuthDiv(U, V, Q, R, m, n);
+ }
+
+ // If the caller wants the quotient
+ if (Quotient) {
+ for (unsigned i = 0; i < lhsWords; ++i)
+ Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
+ }
+
+ // If the caller wants the remainder
+ if (Remainder) {
+ for (unsigned i = 0; i < rhsWords; ++i)
+ Remainder[i] = Make_64(R[i*2+1], R[i*2]);
+ }
+
+ // Clean up the memory we allocated.
+ if (U != &SPACE[0]) {
+ delete [] U;
+ delete [] V;
+ delete [] Q;
+ delete [] R;
+ }
+}
+
+APInt APInt::udiv(const APInt &RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+ // First, deal with the easy case
+ if (isSingleWord()) {
+ assert(RHS.U.VAL != 0 && "Divide by zero?");
+ return APInt(BitWidth, U.VAL / RHS.U.VAL);
+ }
+
+ // Get some facts about the LHS and RHS number of bits and words
+ unsigned lhsWords = getNumWords(getActiveBits());
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = getNumWords(rhsBits);
+ assert(rhsWords && "Divided by zero???");
+
+ // Deal with some degenerate cases
+ if (!lhsWords)
+ // 0 / X ===> 0
+ return APInt(BitWidth, 0);
+ if (rhsBits == 1)
+ // X / 1 ===> X
+ return *this;
+ if (lhsWords < rhsWords || this->ult(RHS))
+ // X / Y ===> 0, iff X < Y
+ return APInt(BitWidth, 0);
+ if (*this == RHS)
+ // X / X ===> 1
+ return APInt(BitWidth, 1);
+ if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
+ // All high words are zero, just use native divide
+ return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Quotient(BitWidth, 0); // to hold result.
+ divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
+ return Quotient;
+}
+
+APInt APInt::udiv(uint64_t RHS) const {
+ assert(RHS != 0 && "Divide by zero?");
+
+ // First, deal with the easy case
+ if (isSingleWord())
+ return APInt(BitWidth, U.VAL / RHS);
+
+ // Get some facts about the LHS words.
+ unsigned lhsWords = getNumWords(getActiveBits());
+
+ // Deal with some degenerate cases
+ if (!lhsWords)
+ // 0 / X ===> 0
+ return APInt(BitWidth, 0);
+ if (RHS == 1)
+ // X / 1 ===> X
+ return *this;
+ if (this->ult(RHS))
+ // X / Y ===> 0, iff X < Y
+ return APInt(BitWidth, 0);
+ if (*this == RHS)
+ // X / X ===> 1
+ return APInt(BitWidth, 1);
+ if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
+ // All high words are zero, just use native divide
+ return APInt(BitWidth, this->U.pVal[0] / RHS);
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Quotient(BitWidth, 0); // to hold result.
+ divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
+ return Quotient;
+}
+
+APInt APInt::sdiv(const APInt &RHS) const {
+ if (isNegative()) {
+ if (RHS.isNegative())
+ return (-(*this)).udiv(-RHS);
+ return -((-(*this)).udiv(RHS));
+ }
+ if (RHS.isNegative())
+ return -(this->udiv(-RHS));
+ return this->udiv(RHS);
+}
+
+APInt APInt::sdiv(int64_t RHS) const {
+ if (isNegative()) {
+ if (RHS < 0)
+ return (-(*this)).udiv(-RHS);
+ return -((-(*this)).udiv(RHS));
+ }
+ if (RHS < 0)
+ return -(this->udiv(-RHS));
+ return this->udiv(RHS);
+}
+
+APInt APInt::urem(const APInt &RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ assert(RHS.U.VAL != 0 && "Remainder by zero?");
+ return APInt(BitWidth, U.VAL % RHS.U.VAL);
+ }
+
+ // Get some facts about the LHS
+ unsigned lhsWords = getNumWords(getActiveBits());
+
+ // Get some facts about the RHS
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = getNumWords(rhsBits);
+ assert(rhsWords && "Performing remainder operation by zero ???");
+
+ // Check the degenerate cases
+ if (lhsWords == 0)
+ // 0 % Y ===> 0
+ return APInt(BitWidth, 0);
+ if (rhsBits == 1)
+ // X % 1 ===> 0
+ return APInt(BitWidth, 0);
+ if (lhsWords < rhsWords || this->ult(RHS))
+ // X % Y ===> X, iff X < Y
+ return *this;
+ if (*this == RHS)
+ // X % X == 0;
+ return APInt(BitWidth, 0);
+ if (lhsWords == 1)
+ // All high words are zero, just use native remainder
+ return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Remainder(BitWidth, 0);
+ divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
+ return Remainder;
+}
+
+uint64_t APInt::urem(uint64_t RHS) const {
+ assert(RHS != 0 && "Remainder by zero?");
+
+ if (isSingleWord())
+ return U.VAL % RHS;
+
+ // Get some facts about the LHS
+ unsigned lhsWords = getNumWords(getActiveBits());
+
+ // Check the degenerate cases
+ if (lhsWords == 0)
+ // 0 % Y ===> 0
+ return 0;
+ if (RHS == 1)
+ // X % 1 ===> 0
+ return 0;
+ if (this->ult(RHS))
+ // X % Y ===> X, iff X < Y
+ return getZExtValue();
+ if (*this == RHS)
+ // X % X == 0;
+ return 0;
+ if (lhsWords == 1)
+ // All high words are zero, just use native remainder
+ return U.pVal[0] % RHS;
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ uint64_t Remainder;
+ divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
+ return Remainder;
+}
+
+APInt APInt::srem(const APInt &RHS) const {
+ if (isNegative()) {
+ if (RHS.isNegative())
+ return -((-(*this)).urem(-RHS));
+ return -((-(*this)).urem(RHS));
+ }
+ if (RHS.isNegative())
+ return this->urem(-RHS);
+ return this->urem(RHS);
+}
+
+int64_t APInt::srem(int64_t RHS) const {
+ if (isNegative()) {
+ if (RHS < 0)
+ return -((-(*this)).urem(-RHS));
+ return -((-(*this)).urem(RHS));
+ }
+ if (RHS < 0)
+ return this->urem(-RHS);
+ return this->urem(RHS);
+}
+
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
+ APInt &Quotient, APInt &Remainder) {
+ assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ unsigned BitWidth = LHS.BitWidth;
+
+ // First, deal with the easy case
+ if (LHS.isSingleWord()) {
+ assert(RHS.U.VAL != 0 && "Divide by zero?");
+ uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
+ uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
+ Quotient = APInt(BitWidth, QuotVal);
+ Remainder = APInt(BitWidth, RemVal);
+ return;
+ }
+
+ // Get some size facts about the dividend and divisor
+ unsigned lhsWords = getNumWords(LHS.getActiveBits());
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = getNumWords(rhsBits);
+ assert(rhsWords && "Performing divrem operation by zero ???");
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
+ Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0
+ return;
+ }
+
+ if (rhsBits == 1) {
+ Quotient = LHS; // X / 1 ===> X
+ Remainder = APInt(BitWidth, 0); // X % 1 ===> 0
+ }
+
+ if (lhsWords < rhsWords || LHS.ult(RHS)) {
+ Remainder = LHS; // X % Y ===> X, iff X < Y
+ Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
+ return;
+ }
+
+ if (LHS == RHS) {
+ Quotient = APInt(BitWidth, 1); // X / X ===> 1
+ Remainder = APInt(BitWidth, 0); // X % X ===> 0;
+ return;
+ }
+
+ // Make sure there is enough space to hold the results.
+ // NOTE: This assumes that reallocate won't affect any bits if it doesn't
+ // change the size. This is necessary if Quotient or Remainder is aliased
+ // with LHS or RHS.
+ Quotient.reallocate(BitWidth);
+ Remainder.reallocate(BitWidth);
+
+ if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
+ // There is only one word to consider so use the native versions.
+ uint64_t lhsValue = LHS.U.pVal[0];
+ uint64_t rhsValue = RHS.U.pVal[0];
+ Quotient = lhsValue / rhsValue;
+ Remainder = lhsValue % rhsValue;
+ return;
+ }
+
+ // Okay, lets do it the long way
+ divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
+ Remainder.U.pVal);
+ // Clear the rest of the Quotient and Remainder.
+ std::memset(Quotient.U.pVal + lhsWords, 0,
+ (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
+ std::memset(Remainder.U.pVal + rhsWords, 0,
+ (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
+}
+
+void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
+ uint64_t &Remainder) {
+ assert(RHS != 0 && "Divide by zero?");
+ unsigned BitWidth = LHS.BitWidth;
+
+ // First, deal with the easy case
+ if (LHS.isSingleWord()) {
+ uint64_t QuotVal = LHS.U.VAL / RHS;
+ Remainder = LHS.U.VAL % RHS;
+ Quotient = APInt(BitWidth, QuotVal);
+ return;
+ }
+
+ // Get some size facts about the dividend and divisor
+ unsigned lhsWords = getNumWords(LHS.getActiveBits());
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
+ Remainder = 0; // 0 % Y ===> 0
+ return;
+ }
+
+ if (RHS == 1) {
+ Quotient = LHS; // X / 1 ===> X
+ Remainder = 0; // X % 1 ===> 0
+ return;
+ }
+
+ if (LHS.ult(RHS)) {
+ Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
+ Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
+ return;
+ }
+
+ if (LHS == RHS) {
+ Quotient = APInt(BitWidth, 1); // X / X ===> 1
+ Remainder = 0; // X % X ===> 0;
+ return;
+ }
+
+ // Make sure there is enough space to hold the results.
+ // NOTE: This assumes that reallocate won't affect any bits if it doesn't
+ // change the size. This is necessary if Quotient is aliased with LHS.
+ Quotient.reallocate(BitWidth);
+
+ if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
+ // There is only one word to consider so use the native versions.
+ uint64_t lhsValue = LHS.U.pVal[0];
+ Quotient = lhsValue / RHS;
+ Remainder = lhsValue % RHS;
+ return;
+ }
+
+ // Okay, lets do it the long way
+ divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
+ // Clear the rest of the Quotient.
+ std::memset(Quotient.U.pVal + lhsWords, 0,
+ (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
+}
+
+void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
+ APInt &Quotient, APInt &Remainder) {
+ if (LHS.isNegative()) {
+ if (RHS.isNegative())
+ APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
+ else {
+ APInt::udivrem(-LHS, RHS, Quotient, Remainder);
+ Quotient.negate();
+ }
+ Remainder.negate();
+ } else if (RHS.isNegative()) {
+ APInt::udivrem(LHS, -RHS, Quotient, Remainder);
+ Quotient.negate();
+ } else {
+ APInt::udivrem(LHS, RHS, Quotient, Remainder);
+ }
+}
+
+void APInt::sdivrem(const APInt &LHS, int64_t RHS,
+ APInt &Quotient, int64_t &Remainder) {
+ uint64_t R = Remainder;
+ if (LHS.isNegative()) {
+ if (RHS < 0)
+ APInt::udivrem(-LHS, -RHS, Quotient, R);
+ else {
+ APInt::udivrem(-LHS, RHS, Quotient, R);
+ Quotient.negate();
+ }
+ R = -R;
+ } else if (RHS < 0) {
+ APInt::udivrem(LHS, -RHS, Quotient, R);
+ Quotient.negate();
+ } else {
+ APInt::udivrem(LHS, RHS, Quotient, R);
+ }
+ Remainder = R;
+}
+
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = isNonNegative() == RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = Res.ult(RHS);
+ return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this - RHS;
+ Overflow = isNonNegative() != RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this-RHS;
+ Overflow = Res.ugt(*this);
+ return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+ // MININT/-1 --> overflow.
+ Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+ return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this * RHS;
+
+ if (*this != 0 && RHS != 0)
+ Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+ else
+ Overflow = false;
+ return Res;
+}
+
+APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
+ if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
+ Overflow = true;
+ return *this * RHS;
+ }
+
+ APInt Res = lshr(1) * RHS;
+ Overflow = Res.isNegative();
+ Res <<= 1;
+ if ((*this)[0]) {
+ Res += RHS;
+ if (Res.ult(RHS))
+ Overflow = true;
+ }
+ return Res;
+}
+
+APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
+ Overflow = ShAmt.uge(getBitWidth());
+ if (Overflow)
+ return APInt(BitWidth, 0);
+
+ if (isNonNegative()) // Don't allow sign change.
+ Overflow = ShAmt.uge(countLeadingZeros());
+ else
+ Overflow = ShAmt.uge(countLeadingOnes());
+
+ return *this << ShAmt;
+}
+
+APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
+ Overflow = ShAmt.uge(getBitWidth());
+ if (Overflow)
+ return APInt(BitWidth, 0);
+
+ Overflow = ShAmt.ugt(countLeadingZeros());
+
+ return *this << ShAmt;
+}
+
+APInt APInt::sadd_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = sadd_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return isNegative() ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::uadd_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = uadd_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return APInt::getMaxValue(BitWidth);
+}
+
+APInt APInt::ssub_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = ssub_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return isNegative() ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::usub_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = usub_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return APInt(BitWidth, 0);
+}
+
+
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
+ // Check our assumptions here
+ assert(!str.empty() && "Invalid string length");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
+
+ StringRef::iterator p = str.begin();
+ size_t slen = str.size();
+ bool isNeg = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String is only a sign, needs a value.");
+ }
+ assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+ assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+ assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+ assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+ "Insufficient bit width");
+
+ // Allocate memory if needed
+ if (isSingleWord())
+ U.VAL = 0;
+ else
+ U.pVal = getClearedMemory(getNumWords());
+
+ // Figure out if we can shift instead of multiply
+ unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+ // Enter digit traversal loop
+ for (StringRef::iterator e = str.end(); p != e; ++p) {
+ unsigned digit = getDigit(*p, radix);
+ assert(digit < radix && "Invalid character in digit string");
+
+ // Shift or multiply the value by the radix
+ if (slen > 1) {
+ if (shift)
+ *this <<= shift;
+ else
+ *this *= radix;
+ }
+
+ // Add in the digit we just interpreted
+ *this += digit;
+ }
+ // If its negative, put it in two's complement form
+ if (isNeg)
+ this->negate();
+}
+
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
+ bool Signed, bool formatAsCLiteral) const {
+ assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
+ Radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
+
+ const char *Prefix = "";
+ if (formatAsCLiteral) {
+ switch (Radix) {
+ case 2:
+ // Binary literals are a non-standard extension added in gcc 4.3:
+ // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+ Prefix = "0b";
+ break;
+ case 8:
+ Prefix = "0";
+ break;
+ case 10:
+ break; // No prefix
+ case 16:
+ Prefix = "0x";
+ break;
+ default:
+ llvm_unreachable("Invalid radix!");
+ }
+ }
+
+ // First, check for a zero value and just short circuit the logic below.
+ if (*this == 0) {
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+ Str.push_back('0');
+ return;
+ }
+
+ static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ if (isSingleWord()) {
+ char Buffer[65];
+ char *BufPtr = std::end(Buffer);
+
+ uint64_t N;
+ if (!Signed) {
+ N = getZExtValue();
+ } else {
+ int64_t I = getSExtValue();
+ if (I >= 0) {
+ N = I;
+ } else {
+ Str.push_back('-');
+ N = -(uint64_t)I;
+ }
+ }
+
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
+ while (N) {
+ *--BufPtr = Digits[N % Radix];
+ N /= Radix;
+ }
+ Str.append(BufPtr, std::end(Buffer));
+ return;
+ }
+
+ APInt Tmp(*this);
+
+ if (Signed && isNegative()) {
+ // They want to print the signed version and it is a negative value
+ // Flip the bits and add one to turn it into the equivalent positive
+ // value and put a '-' in the result.
+ Tmp.negate();
+ Str.push_back('-');
+ }
+
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
+ // We insert the digits backward, then reverse them to get the right order.
+ unsigned StartDig = Str.size();
+
+ // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+ // because the number of bits per digit (1, 3 and 4 respectively) divides
+ // equally. We just shift until the value is zero.
+ if (Radix == 2 || Radix == 8 || Radix == 16) {
+ // Just shift tmp right for each digit width until it becomes zero
+ unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
+ unsigned MaskAmt = Radix - 1;
+
+ while (Tmp.getBoolValue()) {
+ unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+ Str.push_back(Digits[Digit]);
+ Tmp.lshrInPlace(ShiftAmt);
+ }
+ } else {
+ while (Tmp.getBoolValue()) {
+ uint64_t Digit;
+ udivrem(Tmp, Radix, Tmp, Digit);
+ assert(Digit < Radix && "divide failed");
+ Str.push_back(Digits[Digit]);
+ }
+ }
+
+ // Reverse the digits before returning.
+ std::reverse(Str.begin()+StartDig, Str.end());
+}
+
+/// Returns the APInt as a std::string. Note that this is an inefficient method.
+/// It is better to pass in a SmallVector/SmallString to the methods above.
+std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
+ SmallString<40> S;
+ toString(S, Radix, Signed, /* formatAsCLiteral = */false);
+ return S.str();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void APInt::dump() const {
+ SmallString<40> S, U;
+ this->toStringUnsigned(U);
+ this->toStringSigned(S);
+ dbgs() << "APInt(" << BitWidth << "b, "
+ << U << "u " << S << "s)\n";
+}
+#endif
+
+void APInt::print(raw_ostream &OS, bool isSigned) const {
+ SmallString<40> S;
+ this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
+ OS << S;
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
+// and unrestricting assumption.
+static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
+ "Part width must be divisible by 2!");
+
+/* Some handy functions local to this file. */
+
+/* Returns the integer part with the least significant BITS set.
+ BITS cannot be zero. */
+static inline APInt::WordType lowBitMask(unsigned bits) {
+ assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
+
+ return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
+}
+
+/* Returns the value of the lower half of PART. */
+static inline APInt::WordType lowHalf(APInt::WordType part) {
+ return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
+}
+
+/* Returns the value of the upper half of PART. */
+static inline APInt::WordType highHalf(APInt::WordType part) {
+ return part >> (APInt::APINT_BITS_PER_WORD / 2);
+}
+
+/* Returns the bit number of the most significant set bit of a part.
+ If the input number has no bits set -1U is returned. */
+static unsigned partMSB(APInt::WordType value) {
+ return findLastSet(value, ZB_Max);
+}
+
+/* Returns the bit number of the least significant set bit of a
+ part. If the input number has no bits set -1U is returned. */
+static unsigned partLSB(APInt::WordType value) {
+ return findFirstSet(value, ZB_Max);
+}
+
+/* Sets the least significant part of a bignum to the input value, and
+ zeroes out higher parts. */
+void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
+ assert(parts > 0);
+
+ dst[0] = part;
+ for (unsigned i = 1; i < parts; i++)
+ dst[i] = 0;
+}
+
+/* Assign one bignum to another. */
+void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] = src[i];
+}
+
+/* Returns true if a bignum is zero, false otherwise. */
+bool APInt::tcIsZero(const WordType *src, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ if (src[i])
+ return false;
+
+ return true;
+}
+
+/* Extract the given bit of a bignum; returns 0 or 1. */
+int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
+ return (parts[whichWord(bit)] & maskBit(bit)) != 0;
+}
+
+/* Set the given bit of a bignum. */
+void APInt::tcSetBit(WordType *parts, unsigned bit) {
+ parts[whichWord(bit)] |= maskBit(bit);
+}
+
+/* Clears the given bit of a bignum. */
+void APInt::tcClearBit(WordType *parts, unsigned bit) {
+ parts[whichWord(bit)] &= ~maskBit(bit);
+}
+
+/* Returns the bit number of the least significant set bit of a
+ number. If the input number has no bits set -1U is returned. */
+unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
+ for (unsigned i = 0; i < n; i++) {
+ if (parts[i] != 0) {
+ unsigned lsb = partLSB(parts[i]);
+
+ return lsb + i * APINT_BITS_PER_WORD;
+ }
+ }
+
+ return -1U;
+}
+
+/* Returns the bit number of the most significant set bit of a number.
+ If the input number has no bits set -1U is returned. */
+unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
+ do {
+ --n;
+
+ if (parts[n] != 0) {
+ unsigned msb = partMSB(parts[n]);
+
+ return msb + n * APINT_BITS_PER_WORD;
+ }
+ } while (n);
+
+ return -1U;
+}
+
+/* Copy the bit vector of width srcBITS from SRC, starting at bit
+ srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
+ the least significant bit of DST. All high bits above srcBITS in
+ DST are zero-filled. */
+void
+APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
+ unsigned srcBits, unsigned srcLSB) {
+ unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
+ assert(dstParts <= dstCount);
+
+ unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
+ tcAssign (dst, src + firstSrcPart, dstParts);
+
+ unsigned shift = srcLSB % APINT_BITS_PER_WORD;
+ tcShiftRight (dst, dstParts, shift);
+
+ /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
+ in DST. If this is less that srcBits, append the rest, else
+ clear the high bits. */
+ unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
+ if (n < srcBits) {
+ WordType mask = lowBitMask (srcBits - n);
+ dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
+ << n % APINT_BITS_PER_WORD);
+ } else if (n > srcBits) {
+ if (srcBits % APINT_BITS_PER_WORD)
+ dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
+ }
+
+ /* Clear high parts. */
+ while (dstParts < dstCount)
+ dst[dstParts++] = 0;
+}
+
+/* DST += RHS + C where C is zero or one. Returns the carry flag. */
+APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
+ WordType c, unsigned parts) {
+ assert(c <= 1);
+
+ for (unsigned i = 0; i < parts; i++) {
+ WordType l = dst[i];
+ if (c) {
+ dst[i] += rhs[i] + 1;
+ c = (dst[i] <= l);
+ } else {
+ dst[i] += rhs[i];
+ c = (dst[i] < l);
+ }
+ }
+
+ return c;
+}
+
+/// This function adds a single "word" integer, src, to the multiple
+/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
+ unsigned parts) {
+ for (unsigned i = 0; i < parts; ++i) {
+ dst[i] += src;
+ if (dst[i] >= src)
+ return 0; // No need to carry so exit early.
+ src = 1; // Carry one to next digit.
+ }
+
+ return 1;
+}
+
+/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
+ WordType c, unsigned parts) {
+ assert(c <= 1);
+
+ for (unsigned i = 0; i < parts; i++) {
+ WordType l = dst[i];
+ if (c) {
+ dst[i] -= rhs[i] + 1;
+ c = (dst[i] >= l);
+ } else {
+ dst[i] -= rhs[i];
+ c = (dst[i] > l);
+ }
+ }
+
+ return c;
+}
+
+/// This function subtracts a single "word" (64-bit word), src, from
+/// the multi-word integer array, dst[], propagating the borrowed 1 value until
+/// no further borrowing is needed or it runs out of "words" in dst. The result
+/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
+/// exhausted. In other words, if src > dst then this function returns 1,
+/// otherwise 0.
+/// @returns the borrow out of the subtraction
+APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
+ unsigned parts) {
+ for (unsigned i = 0; i < parts; ++i) {
+ WordType Dst = dst[i];
+ dst[i] -= src;
+ if (src <= Dst)
+ return 0; // No need to borrow so exit early.
+ src = 1; // We have to "borrow 1" from next "word"
+ }
+
+ return 1;
+}
+
+/* Negate a bignum in-place. */
+void APInt::tcNegate(WordType *dst, unsigned parts) {
+ tcComplement(dst, parts);
+ tcIncrement(dst, parts);
+}
+
+/* DST += SRC * MULTIPLIER + CARRY if add is true
+ DST = SRC * MULTIPLIER + CARRY if add is false
+
+ Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
+ they must start at the same point, i.e. DST == SRC.
+
+ If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+ returned. Otherwise DST is filled with the least significant
+ DSTPARTS parts of the result, and if all of the omitted higher
+ parts were zero return zero, otherwise overflow occurred and
+ return one. */
+int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
+ WordType multiplier, WordType carry,
+ unsigned srcParts, unsigned dstParts,
+ bool add) {
+ /* Otherwise our writes of DST kill our later reads of SRC. */
+ assert(dst <= src || dst >= src + srcParts);
+ assert(dstParts <= srcParts + 1);
+
+ /* N loops; minimum of dstParts and srcParts. */
+ unsigned n = std::min(dstParts, srcParts);
+
+ for (unsigned i = 0; i < n; i++) {
+ WordType low, mid, high, srcPart;
+
+ /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+
+ This cannot overflow, because
+
+ (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+
+ which is less than n^2. */
+
+ srcPart = src[i];
+
+ if (multiplier == 0 || srcPart == 0) {
+ low = carry;
+ high = 0;
+ } else {
+ low = lowHalf(srcPart) * lowHalf(multiplier);
+ high = highHalf(srcPart) * highHalf(multiplier);
+
+ mid = lowHalf(srcPart) * highHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= APINT_BITS_PER_WORD / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ mid = highHalf(srcPart) * lowHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= APINT_BITS_PER_WORD / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ /* Now add carry. */
+ if (low + carry < low)
+ high++;
+ low += carry;
+ }
+
+ if (add) {
+ /* And now DST[i], and store the new low part there. */
+ if (low + dst[i] < low)
+ high++;
+ dst[i] += low;
+ } else
+ dst[i] = low;
+
+ carry = high;
+ }
+
+ if (srcParts < dstParts) {
+ /* Full multiplication, there is no overflow. */
+ assert(srcParts + 1 == dstParts);
+ dst[srcParts] = carry;
+ return 0;
+ }
+
+ /* We overflowed if there is carry. */
+ if (carry)
+ return 1;
+
+ /* We would overflow if any significant unwritten parts would be
+ non-zero. This is true if any remaining src parts are non-zero
+ and the multiplier is non-zero. */
+ if (multiplier)
+ for (unsigned i = dstParts; i < srcParts; i++)
+ if (src[i])
+ return 1;
+
+ /* We fitted in the narrow destination. */
+ return 0;
+}
+
+/* DST = LHS * RHS, where DST has the same width as the operands and
+ is filled with the least significant parts of the result. Returns
+ one if overflow occurred, otherwise zero. DST must be disjoint
+ from both operands. */
+int APInt::tcMultiply(WordType *dst, const WordType *lhs,
+ const WordType *rhs, unsigned parts) {
+ assert(dst != lhs && dst != rhs);
+
+ int overflow = 0;
+ tcSet(dst, 0, parts);
+
+ for (unsigned i = 0; i < parts; i++)
+ overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
+ parts - i, true);
+
+ return overflow;
+}
+
+/// DST = LHS * RHS, where DST has width the sum of the widths of the
+/// operands. No overflow occurs. DST must be disjoint from both operands.
+void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
+ const WordType *rhs, unsigned lhsParts,
+ unsigned rhsParts) {
+ /* Put the narrower number on the LHS for less loops below. */
+ if (lhsParts > rhsParts)
+ return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
+
+ assert(dst != lhs && dst != rhs);
+
+ tcSet(dst, 0, rhsParts);
+
+ for (unsigned i = 0; i < lhsParts; i++)
+ tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
+}
+
+/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
+ Otherwise set LHS to LHS / RHS with the fractional part discarded,
+ set REMAINDER to the remainder, return zero. i.e.
+
+ OLD_LHS = RHS * LHS + REMAINDER
+
+ SCRATCH is a bignum of the same size as the operands and result for
+ use by the routine; its contents need not be initialized and are
+ destroyed. LHS, REMAINDER and SCRATCH must be distinct.
+*/
+int APInt::tcDivide(WordType *lhs, const WordType *rhs,
+ WordType *remainder, WordType *srhs,
+ unsigned parts) {
+ assert(lhs != remainder && lhs != srhs && remainder != srhs);
+
+ unsigned shiftCount = tcMSB(rhs, parts) + 1;
+ if (shiftCount == 0)
+ return true;
+
+ shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
+ unsigned n = shiftCount / APINT_BITS_PER_WORD;
+ WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
+
+ tcAssign(srhs, rhs, parts);
+ tcShiftLeft(srhs, parts, shiftCount);
+ tcAssign(remainder, lhs, parts);
+ tcSet(lhs, 0, parts);
+
+ /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
+ the total. */
+ for (;;) {
+ int compare = tcCompare(remainder, srhs, parts);
+ if (compare >= 0) {
+ tcSubtract(remainder, srhs, 0, parts);
+ lhs[n] |= mask;
+ }
+
+ if (shiftCount == 0)
+ break;
+ shiftCount--;
+ tcShiftRight(srhs, parts, 1);
+ if ((mask >>= 1) == 0) {
+ mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
+ n--;
+ }
+ }
+
+ return false;
+}
+
+/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
+/// no restrictions on Count.
+void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
+ // Don't bother performing a no-op shift.
+ if (!Count)
+ return;
+
+ // WordShift is the inter-part shift; BitShift is the intra-part shift.
+ unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
+ unsigned BitShift = Count % APINT_BITS_PER_WORD;
+
+ // Fastpath for moving by whole words.
+ if (BitShift == 0) {
+ std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
+ } else {
+ while (Words-- > WordShift) {
+ Dst[Words] = Dst[Words - WordShift] << BitShift;
+ if (Words > WordShift)
+ Dst[Words] |=
+ Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
+ }
+ }
+
+ // Fill in the remainder with 0s.
+ std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
+}
+
+/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
+/// are no restrictions on Count.
+void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
+ // Don't bother performing a no-op shift.
+ if (!Count)
+ return;
+
+ // WordShift is the inter-part shift; BitShift is the intra-part shift.
+ unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
+ unsigned BitShift = Count % APINT_BITS_PER_WORD;
+
+ unsigned WordsToMove = Words - WordShift;
+ // Fastpath for moving by whole words.
+ if (BitShift == 0) {
+ std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
+ } else {
+ for (unsigned i = 0; i != WordsToMove; ++i) {
+ Dst[i] = Dst[i + WordShift] >> BitShift;
+ if (i + 1 != WordsToMove)
+ Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
+ }
+ }
+
+ // Fill in the remainder with 0s.
+ std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
+}
+
+/* Bitwise and of two bignums. */
+void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] &= rhs[i];
+}
+
+/* Bitwise inclusive or of two bignums. */
+void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] |= rhs[i];
+}
+
+/* Bitwise exclusive or of two bignums. */
+void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] ^= rhs[i];
+}
+
+/* Complement a bignum in-place. */
+void APInt::tcComplement(WordType *dst, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] = ~dst[i];
+}
+
+/* Comparison (unsigned) of two bignums. */
+int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
+ unsigned parts) {
+ while (parts) {
+ parts--;
+ if (lhs[parts] != rhs[parts])
+ return (lhs[parts] > rhs[parts]) ? 1 : -1;
+ }
+
+ return 0;
+}
+
+/* Set the least significant BITS bits of a bignum, clear the
+ rest. */
+void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
+ unsigned bits) {
+ unsigned i = 0;
+ while (bits > APINT_BITS_PER_WORD) {
+ dst[i++] = ~(WordType) 0;
+ bits -= APINT_BITS_PER_WORD;
+ }
+
+ if (bits)
+ dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
+
+ while (i < parts)
+ dst[i++] = 0;
+}
+
+APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
+ APInt::Rounding RM) {
+ // Currently udivrem always rounds down.
+ switch (RM) {
+ case APInt::Rounding::DOWN:
+ case APInt::Rounding::TOWARD_ZERO:
+ return A.udiv(B);
+ case APInt::Rounding::UP: {
+ APInt Quo, Rem;
+ APInt::udivrem(A, B, Quo, Rem);
+ if (Rem == 0)
+ return Quo;
+ return Quo + 1;
+ }
+ }
+ llvm_unreachable("Unknown APInt::Rounding enum");
+}
+
+APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
+ APInt::Rounding RM) {
+ switch (RM) {
+ case APInt::Rounding::DOWN:
+ case APInt::Rounding::UP: {
+ APInt Quo, Rem;
+ APInt::sdivrem(A, B, Quo, Rem);
+ if (Rem == 0)
+ return Quo;
+ // This algorithm deals with arbitrary rounding mode used by sdivrem.
+ // We want to check whether the non-integer part of the mathematical value
+ // is negative or not. If the non-integer part is negative, we need to round
+ // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
+ // already rounded down.
+ if (RM == APInt::Rounding::DOWN) {
+ if (Rem.isNegative() != B.isNegative())
+ return Quo - 1;
+ return Quo;
+ }
+ if (Rem.isNegative() != B.isNegative())
+ return Quo;
+ return Quo + 1;
+ }
+ // Currently sdiv rounds twards zero.
+ case APInt::Rounding::TOWARD_ZERO:
+ return A.sdiv(B);
+ }
+ llvm_unreachable("Unknown APInt::Rounding enum");
+}
+
+Optional<APInt>
+llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
+ unsigned RangeWidth) {
+ unsigned CoeffWidth = A.getBitWidth();
+ assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
+ assert(RangeWidth <= CoeffWidth &&
+ "Value range width should be less than coefficient width");
+ assert(RangeWidth > 1 && "Value range bit width should be > 1");
+
+ LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
+ << "x + " << C << ", rw:" << RangeWidth << '\n');
+
+ // Identify 0 as a (non)solution immediately.
+ if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
+ LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
+ return APInt(CoeffWidth, 0);
+ }
+
+ // The result of APInt arithmetic has the same bit width as the operands,
+ // so it can actually lose high bits. A product of two n-bit integers needs
+ // 2n-1 bits to represent the full value.
+ // The operation done below (on quadratic coefficients) that can produce
+ // the largest value is the evaluation of the equation during bisection,
+ // which needs 3 times the bitwidth of the coefficient, so the total number
+ // of required bits is 3n.
+ //
+ // The purpose of this extension is to simulate the set Z of all integers,
+ // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
+ // and negative numbers (not so much in a modulo arithmetic). The method
+ // used to solve the equation is based on the standard formula for real
+ // numbers, and uses the concepts of "positive" and "negative" with their
+ // usual meanings.
+ CoeffWidth *= 3;
+ A = A.sext(CoeffWidth);
+ B = B.sext(CoeffWidth);
+ C = C.sext(CoeffWidth);
+
+ // Make A > 0 for simplicity. Negate cannot overflow at this point because
+ // the bit width has increased.
+ if (A.isNegative()) {
+ A.negate();
+ B.negate();
+ C.negate();
+ }
+
+ // Solving an equation q(x) = 0 with coefficients in modular arithmetic
+ // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
+ // and R = 2^BitWidth.
+ // Since we're trying not only to find exact solutions, but also values
+ // that "wrap around", such a set will always have a solution, i.e. an x
+ // that satisfies at least one of the equations, or such that |q(x)|
+ // exceeds kR, while |q(x-1)| for the same k does not.
+ //
+ // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
+ // positive solution n (in the above sense), and also such that the n
+ // will be the least among all solutions corresponding to k = 0, 1, ...
+ // (more precisely, the least element in the set
+ // { n(k) | k is such that a solution n(k) exists }).
+ //
+ // Consider the parabola (over real numbers) that corresponds to the
+ // quadratic equation. Since A > 0, the arms of the parabola will point
+ // up. Picking different values of k will shift it up and down by R.
+ //
+ // We want to shift the parabola in such a way as to reduce the problem
+ // of solving q(x) = kR to solving shifted_q(x) = 0.
+ // (The interesting solutions are the ceilings of the real number
+ // solutions.)
+ APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
+ APInt TwoA = 2 * A;
+ APInt SqrB = B * B;
+ bool PickLow;
+
+ auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
+ assert(A.isStrictlyPositive());
+ APInt T = V.abs().urem(A);
+ if (T.isNullValue())
+ return V;
+ return V.isNegative() ? V+T : V+(A-T);
+ };
+
+ // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
+ // iff B is positive.
+ if (B.isNonNegative()) {
+ // If B >= 0, the vertex it at a negative location (or at 0), so in
+ // order to have a non-negative solution we need to pick k that makes
+ // C-kR negative. To satisfy all the requirements for the solution
+ // that we are looking for, it needs to be closest to 0 of all k.
+ C = C.srem(R);
+ if (C.isStrictlyPositive())
+ C -= R;
+ // Pick the greater solution.
+ PickLow = false;
+ } else {
+ // If B < 0, the vertex is at a positive location. For any solution
+ // to exist, the discriminant must be non-negative. This means that
+ // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
+ // lower bound on values of k: kR >= C - B^2/4A.
+ APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
+ // Round LowkR up (towards +inf) to the nearest kR.
+ LowkR = RoundUp(LowkR, R);
+
+ // If there exists k meeting the condition above, and such that
+ // C-kR > 0, there will be two positive real number solutions of
+ // q(x) = kR. Out of all such values of k, pick the one that makes
+ // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
+ // In other words, find maximum k such that LowkR <= kR < C.
+ if (C.sgt(LowkR)) {
+ // If LowkR < C, then such a k is guaranteed to exist because
+ // LowkR itself is a multiple of R.
+ C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
+ // Pick the smaller solution.
+ PickLow = true;
+ } else {
+ // If C-kR < 0 for all potential k's, it means that one solution
+ // will be negative, while the other will be positive. The positive
+ // solution will shift towards 0 if the parabola is moved up.
+ // Pick the kR closest to the lower bound (i.e. make C-kR closest
+ // to 0, or in other words, out of all parabolas that have solutions,
+ // pick the one that is the farthest "up").
+ // Since LowkR is itself a multiple of R, simply take C-LowkR.
+ C -= LowkR;
+ // Pick the greater solution.
+ PickLow = false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
+ << B << "x + " << C << ", rw:" << RangeWidth << '\n');
+
+ APInt D = SqrB - 4*A*C;
+ assert(D.isNonNegative() && "Negative discriminant");
+ APInt SQ = D.sqrt();
+
+ APInt Q = SQ * SQ;
+ bool InexactSQ = Q != D;
+ // The calculated SQ may actually be greater than the exact (non-integer)
+ // value. If that's the case, decremement SQ to get a value that is lower.
+ if (Q.sgt(D))
+ SQ -= 1;
+
+ APInt X;
+ APInt Rem;
+
+ // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
+ // When using the quadratic formula directly, the calculated low root
+ // may be greater than the exact one, since we would be subtracting SQ.
+ // To make sure that the calculated root is not greater than the exact
+ // one, subtract SQ+1 when calculating the low root (for inexact value
+ // of SQ).
+ if (PickLow)
+ APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
+ else
+ APInt::sdivrem(-B + SQ, TwoA, X, Rem);
+
+ // The updated coefficients should be such that the (exact) solution is
+ // positive. Since APInt division rounds towards 0, the calculated one
+ // can be 0, but cannot be negative.
+ assert(X.isNonNegative() && "Solution should be non-negative");
+
+ if (!InexactSQ && Rem.isNullValue()) {
+ LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
+ return X;
+ }
+
+ assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
+ // The exact value of the square root of D should be between SQ and SQ+1.
+ // This implies that the solution should be between that corresponding to
+ // SQ (i.e. X) and that corresponding to SQ+1.
+ //
+ // The calculated X cannot be greater than the exact (real) solution.
+ // Actually it must be strictly less than the exact solution, while
+ // X+1 will be greater than or equal to it.
+
+ APInt VX = (A*X + B)*X + C;
+ APInt VY = VX + TwoA*X + A + B;
+ bool SignChange = VX.isNegative() != VY.isNegative() ||
+ VX.isNullValue() != VY.isNullValue();
+ // If the sign did not change between X and X+1, X is not a valid solution.
+ // This could happen when the actual (exact) roots don't have an integer
+ // between them, so they would both be contained between X and X+1.
+ if (!SignChange) {
+ LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
+ return None;
+ }
+
+ X += 1;
+ LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
+ return X;
+}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+ unsigned StoreBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+ const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
+
+ if (sys::IsLittleEndianHost) {
+ // Little-endian host - the source is ordered from LSB to MSB. Order the
+ // destination from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, StoreBytes);
+ } else {
+ // Big-endian host - the source is an array of 64 bit words ordered from
+ // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
+ // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+ while (StoreBytes > sizeof(uint64_t)) {
+ StoreBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+ Src += sizeof(uint64_t);
+ }
+
+ memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+ }
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+ uint8_t *Dst = reinterpret_cast<uint8_t *>(
+ const_cast<uint64_t *>(IntVal.getRawData()));
+
+ if (sys::IsLittleEndianHost)
+ // Little-endian host - the destination must be ordered from LSB to MSB.
+ // The source is ordered from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, LoadBytes);
+ else {
+ // Big-endian - the destination is an array of 64 bit words ordered from
+ // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
+ // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+ // a word.
+ while (LoadBytes > sizeof(uint64_t)) {
+ LoadBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+ Dst += sizeof(uint64_t);
+ }
+
+ memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+ }
+}