diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-26 19:45:00 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-26 19:45:00 +0000 |
commit | 12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (patch) | |
tree | ae1a7fcfc24a8d4b23206c57121c3f361d4b7f84 /lib | |
parent | d99dafe2e4a385dd2a6c76da6d8258deb100657b (diff) |
Notes
Diffstat (limited to 'lib')
239 files changed, 5090 insertions, 3509 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 3db041cc0fa6..537823020301 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -924,8 +924,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V2Size, const DataLayout &DL) { - assert(GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && + assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == + GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && "Expected GEPs with the same pointer operand"); @@ -1184,8 +1184,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // If we know the two GEPs are based off of the exact same pointer (and not // just the same underlying object), see if that tells us anything about // the resulting pointers. - if (GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && + if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == + GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) { AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. @@ -1500,8 +1500,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCasts(); - V2 = V2->stripPointerCasts(); + V1 = V1->stripPointerCastsAndBarriers(); + V2 = V2->stripPointerCastsAndBarriers(); // If V1 or V2 is undef, the result is NoAlias because we can always pick a // value for undef that aliases nothing in the program. diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 14176dac2104..863fbdba7e67 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cerrno> @@ -687,21 +688,21 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, if (Opc == Instruction::And) { unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); - APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); - APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); - computeKnownBits(Op0, KnownZero0, KnownOne0, DL); - computeKnownBits(Op1, KnownZero1, KnownOne1, DL); - if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + KnownBits Known0(BitWidth); + KnownBits Known1(BitWidth); + computeKnownBits(Op0, Known0, DL); + computeKnownBits(Op1, Known1, DL); + if ((Known1.One | Known0.Zero).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; } - if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + if ((Known0.One | Known1.Zero).isAllOnesValue()) { // All the bits of Op1 that the 'and' could be masking are already zero. return Op1; } - APInt KnownZero = KnownZero0 | KnownZero1; - APInt KnownOne = KnownOne0 & KnownOne1; + APInt KnownZero = Known0.Zero | Known1.Zero; + APInt KnownOne = Known0.One & Known1.One; if ((KnownZero | KnownOne).isAllOnesValue()) { return ConstantInt::get(Op0->getType(), KnownOne); } diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 151c0b0e6c93..285339deaaf5 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -72,8 +73,7 @@ static bool isAlwaysLive(Instruction *I) { void DemandedBits::determineLiveOperandBits( const Instruction *UserI, const Instruction *I, unsigned OperandNo, - const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2) { + const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) { unsigned BitWidth = AB.getBitWidth(); // We're called once per operand, but for some instructions, we need to @@ -85,15 +85,13 @@ void DemandedBits::determineLiveOperandBits( auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1, const Value *V2) { const DataLayout &DL = I->getModule()->getDataLayout(); - KnownZero = APInt(BitWidth, 0); - KnownOne = APInt(BitWidth, 0); - computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0, + Known = KnownBits(BitWidth); + computeKnownBits(const_cast<Value *>(V1), Known, DL, 0, &AC, UserI, &DT); if (V2) { - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); - computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL, + Known2 = KnownBits(BitWidth); + computeKnownBits(const_cast<Value *>(V2), Known2, DL, 0, &AC, UserI, &DT); } }; @@ -120,7 +118,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getHighBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countLeadingZeros()+1)); + std::min(BitWidth, Known.One.countLeadingZeros()+1)); } break; case Intrinsic::cttz: @@ -130,7 +128,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getLowBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countTrailingZeros()+1)); + std::min(BitWidth, Known.One.countTrailingZeros()+1)); } break; } @@ -200,11 +198,11 @@ void DemandedBits::determineLiveOperandBits( // dead). if (OperandNo == 0) { ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownZero2; + AB &= ~Known2.Zero; } else { if (!isa<Instruction>(UserI->getOperand(0))) ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownZero & ~KnownZero2); + AB &= ~(Known.Zero & ~Known2.Zero); } break; case Instruction::Or: @@ -216,11 +214,11 @@ void DemandedBits::determineLiveOperandBits( // dead). if (OperandNo == 0) { ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownOne2; + AB &= ~Known2.One; } else { if (!isa<Instruction>(UserI->getOperand(0))) ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownOne & ~KnownOne2); + AB &= ~(Known.One & ~Known2.One); } break; case Instruction::Xor: @@ -318,7 +316,7 @@ void DemandedBits::performAnalysis() { if (!UserI->getType()->isIntegerTy()) Visited.insert(UserI); - APInt KnownZero, KnownOne, KnownZero2, KnownOne2; + KnownBits Known, Known2; // Compute the set of alive bits for each operand. These are anded into the // existing set, if any, and if that changes the set of alive bits, the // operand is added to the work-list. @@ -335,8 +333,7 @@ void DemandedBits::performAnalysis() { // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, - KnownZero, KnownOne, - KnownZero2, KnownOne2); + Known, Known2); } // If we've added to the set of alive bits (or the operand has not diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index 7acfb41500d4..8abc0e7d0df9 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -80,6 +80,22 @@ struct DOTGraphTraits<PostDominatorTree*> }; } +void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "DomTree dump not available, build with DEBUG\n"; +#endif // NDEBUG +} + +void DominatorTree::viewGraph() { +#ifndef NDEBUG + this->viewGraph("domtree", "Dominator Tree for function"); +#else + errs() << "DomTree dump not available, build with DEBUG\n"; +#endif // NDEBUG +} + namespace { struct DominatorTreeWrapperPassAnalysisGraphTraits { static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) { diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index fde805a5fde5..c30feb973e60 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -253,18 +253,8 @@ bool IVUsers::AddUsersImpl(Instruction *I, const SCEV *OriginalISE = ISE; auto NormalizePred = [&](const SCEVAddRecExpr *AR) { - // We only allow affine AddRecs to be normalized, otherwise we would not - // be able to correctly denormalize. - // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} - // Normalized form: {-2,+,1,+,2} - // Denormalized form: {1,+,3,+,2} - // - // However, denormalization would use a different step expression than - // normalization (see getPostIncExpr), generating the wrong final - // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} auto *L = AR->getLoop(); - bool Result = - AR->isAffine() && IVUseShouldUsePostIncValue(User, I, L, DT); + bool Result = IVUseShouldUsePostIncValue(User, I, L, DT); if (Result) NewUse.PostIncLoops.insert(L); return Result; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 1f8dec2aed80..788f908bafca 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -1557,7 +1557,6 @@ InlineParams llvm::getInlineParams(int Threshold) { Params.ColdCallSiteThreshold = ColdCallSiteThreshold; // Set the OptMinSizeThreshold and OptSizeThreshold params only if the - // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // -inlinehint-threshold commandline option is not explicitly given. If that // option is present, then its value applies even for callees with size and // minsize attributes. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 2259fbaeb982..e720e3ebecdb 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/KnownBits.h" #include <algorithm> using namespace llvm; using namespace llvm::PatternMatch; @@ -46,34 +47,19 @@ enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); -namespace { -struct Query { - const DataLayout &DL; - const TargetLibraryInfo *TLI; - const DominatorTree *DT; - AssumptionCache *AC; - const Instruction *CxtI; - - Query(const DataLayout &DL, const TargetLibraryInfo *tli, - const DominatorTree *dt, AssumptionCache *ac = nullptr, - const Instruction *cxti = nullptr) - : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} -}; -} // end anonymous namespace - -static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, +static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const Query &, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, + const SimplifyQuery &, unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse); -static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); + const SimplifyQuery &Q, unsigned MaxRecurse); +static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, - const Query &, unsigned); + const SimplifyQuery &, unsigned); /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. @@ -138,7 +124,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, - Instruction::BinaryOps OpcodeToExpand, const Query &Q, + Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -196,7 +182,7 @@ static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS, const Query &Q, + Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -295,7 +281,7 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, /// of the select results in the same value. Returns the common value if so, /// otherwise returns null. static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -367,7 +353,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// comparison by seeing whether both branches of the select result in the same /// value. Returns the common value if so, otherwise returns null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -449,7 +435,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// phi values yields the same result for every value. If so returns the common /// value, otherwise returns null. static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -492,7 +478,7 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, /// yields the same result every time. If so returns the common result, /// otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -527,7 +513,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, Value *&Op0, Value *&Op1, - const Query &Q) { + const SimplifyQuery &Q) { if (auto *CLHS = dyn_cast<Constant>(Op0)) { if (auto *CRHS = dyn_cast<Constant>(Op1)) return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); @@ -542,7 +528,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; @@ -601,10 +587,15 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Query) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit); +} + /// \brief Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and @@ -679,7 +670,7 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, /// Given operands for a Sub, see if we can fold the result. /// If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) return C; @@ -703,10 +694,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return Op0; unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownZero.isMaxSignedValue()) { + KnownBits Known(BitWidth); + computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. if (isNSW) @@ -813,14 +803,19 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); +} + /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; @@ -854,7 +849,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; @@ -886,7 +881,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given the operands for an FMul, see if we can fold the result static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; @@ -903,7 +898,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) return C; @@ -963,34 +958,52 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFAddInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); +} + Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFSubInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); +} + Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFMulInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); +} + Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); } /// Check for common or similar folds of integer division or integer remainder. @@ -1047,7 +1060,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { /// Given operands for an SDiv or UDiv, see if we can fold the result. /// If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1103,7 +1116,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; @@ -1115,13 +1128,16 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a UDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; @@ -1143,12 +1159,15 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; @@ -1193,14 +1212,19 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFDivInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); +} + /// Given operands for an SRem or URem, see if we can fold the result. /// If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1231,7 +1255,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SRem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; @@ -1243,13 +1267,16 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a URem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; @@ -1271,12 +1298,15 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; @@ -1302,10 +1332,15 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFRemInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); +} + /// Returns true if a shift by \c Amount always yields undef. static bool isUndefShift(Value *Amount) { Constant *C = dyn_cast<Constant>(Amount); @@ -1336,7 +1371,7 @@ static bool isUndefShift(Value *Amount) { /// Given operands for an Shl, LShr or AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, const Query &Q, unsigned MaxRecurse) { + Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1367,17 +1402,15 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If any bits in the shift amount make that value greater than or equal to // the number of bits in the type, the shift is undefined. unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownOne.getLimitedValue() >= BitWidth) + KnownBits Known(BitWidth); + computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.One.getLimitedValue() >= BitWidth) return UndefValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); - APInt ShiftAmountMask = APInt::getLowBitsSet(BitWidth, NumValidShiftBits); - if ((KnownZero & ShiftAmountMask) == ShiftAmountMask) + if (Known.Zero.countTrailingOnes() >= NumValidShiftBits) return Op0; return nullptr; @@ -1386,7 +1419,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, /// \brief Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, bool isExact, const Query &Q, + Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) return V; @@ -1403,11 +1436,9 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); - APInt Op0KnownZero(BitWidth, 0); - APInt Op0KnownOne(BitWidth, 0); - computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC, - Q.CxtI, Q.DT); - if (Op0KnownOne[0]) + KnownBits Op0Known(BitWidth); + computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + if (Op0Known.One[0]) return Op0; } @@ -1417,7 +1448,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, /// Given operands for an Shl, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; @@ -1437,14 +1468,19 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); +} + /// Given operands for an LShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1462,14 +1498,19 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyLShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); +} + /// Given operands for an AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1496,10 +1537,15 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyAShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); +} + static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst *UnsignedICmp, bool IsAnd) { Value *X, *Y; @@ -1575,6 +1621,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) return X; + // FIXME: This should be shared with or-of-icmps. // Look for this pattern: (icmp V, C0) & (icmp V, C1)). Type *ITy = Op0->getType(); ICmpInst::Predicate Pred0, Pred1; @@ -1584,10 +1631,16 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) { // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. - auto Range0 = ConstantRange::makeAllowedICmpRegion(Pred0, *C0); - auto Range1 = ConstantRange::makeAllowedICmpRegion(Pred1, *C1); + auto Range0 = ConstantRange::makeExactICmpRegion(Pred0, *C0); + auto Range1 = ConstantRange::makeExactICmpRegion(Pred1, *C1); if (Range0.intersectWith(Range1).isEmptySet()) return getFalse(ITy); + + // If a range is a superset of the other, the smaller set is all we need. + if (Range0.contains(Range1)) + return Op1; + if (Range1.contains(Range0)) + return Op0; } // (icmp (add V, C0), C1) & (icmp V, C0) @@ -1633,7 +1686,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { /// Given operands for an And, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) return C; @@ -1744,8 +1797,11 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); } /// Commuted variants are assumed to be handled by calling this function again @@ -1830,7 +1886,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { /// Given operands for an Or, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) return C; @@ -1877,6 +1933,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, (A == Op0 || B == Op0)) return Constant::getAllOnesValue(Op0->getType()); + // (A & ~B) | (A ^ B) -> (A ^ B) + // (~B & A) | (A ^ B) -> (A ^ B) + // (A & ~B) | (B ^ A) -> (B ^ A) + // (~B & A) | (B ^ A) -> (B ^ A) + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) + return Op1; + + // Commute the 'or' operands. + // (A ^ B) | (A & ~B) -> (A ^ B) + // (A ^ B) | (~B & A) -> (A ^ B) + // (B ^ A) | (A & ~B) -> (B ^ A) + // (B ^ A) | (~B & A) -> (B ^ A) + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) + return Op0; + if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) { if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) { if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS)) @@ -1952,13 +2027,16 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a Xor, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) return C; @@ -2001,10 +2079,14 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); } + static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } @@ -2238,7 +2320,7 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, /// Fold an icmp when its operands have i1 scalar type. static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q) { + Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. if (!OpTy->getScalarType()->isIntegerTy(1)) @@ -2301,7 +2383,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, /// Try hard to fold icmp with zero RHS because this is a common case. static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q) { + Value *RHS, const SimplifyQuery &Q) { if (!match(RHS, m_Zero())) return nullptr; @@ -2556,7 +2638,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, } static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. @@ -2866,7 +2948,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, /// Simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. Value *A, *B; @@ -3070,7 +3152,7 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); @@ -3342,11 +3424,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const APInt *RHSVal; if (match(RHS, m_APInt(RHSVal))) { unsigned BitWidth = RHSVal->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC, - Q.CxtI, Q.DT); - if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0)) + KnownBits LHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.Zero.intersects(*RHSVal) || + !LHSKnown.One.isSubsetOf(*RHSVal)) return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) : ConstantInt::getTrue(ITy); } @@ -3372,14 +3453,19 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyICmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); +} + /// Given operands for an FCmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const Query &Q, + FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); @@ -3505,13 +3591,18 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI}, + RecursionLimit); +} + +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } /// See if V simplifies when its operand Op is replaced with RepOp. static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const Query &Q, + const SimplifyQuery &Q, unsigned MaxRecurse) { // Trivial replacement. if (V == Op) @@ -3659,7 +3750,7 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal, /// Try to simplify a select instruction when its condition operand is an /// integer comparison. static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, - Value *FalseVal, const Query &Q, + Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { ICmpInst::Predicate Pred; Value *CmpLHS, *CmpRHS; @@ -3738,7 +3829,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, - Value *FalseVal, const Query &Q, + Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y @@ -3775,14 +3866,19 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, {DL, TLI, DT, AC, CxtI}, + RecursionLimit); +} + +Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); } /// Given operands for an GetElementPtrInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { // The type of the GEP pointer operand. unsigned AS = cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace(); @@ -3896,14 +3992,18 @@ Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyGEPInst(SrcTy, Ops, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyGEPInst(SrcTy, Ops, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, + const SimplifyQuery &Q) { + return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef<unsigned> Idxs, const Query &Q, + ArrayRef<unsigned> Idxs, const SimplifyQuery &Q, unsigned) { if (Constant *CAgg = dyn_cast<Constant>(Agg)) if (Constant *CVal = dyn_cast<Constant>(Val)) @@ -3933,14 +4033,20 @@ Value *llvm::SimplifyInsertValueInst( Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyInsertValueInst(Agg, Val, Idxs, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, + const SimplifyQuery &Q) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); +} + /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, - const Query &, unsigned) { + const SimplifyQuery &, unsigned) { if (auto *CAgg = dyn_cast<Constant>(Agg)) return ConstantFoldExtractValueInstruction(CAgg, Idxs); @@ -3968,13 +4074,18 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyExtractValueInst(Agg, Idxs, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, + const SimplifyQuery &Q) { + return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); +} + /// Given operands for an ExtractElementInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, +static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &, unsigned) { if (auto *CVec = dyn_cast<Constant>(Vec)) { if (auto *CIdx = dyn_cast<Constant>(Idx)) @@ -4000,12 +4111,17 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, Value *llvm::SimplifyExtractElementInst( Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyExtractElementInst(Vec, Idx, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q) { + return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); +} + /// See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { +static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = nullptr; @@ -4038,7 +4154,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { } static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, - Type *Ty, const Query &Q, unsigned MaxRecurse) { + Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *C = dyn_cast<Constant>(Op)) return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); @@ -4076,10 +4192,15 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyCastInst(CastOpc, Op, Ty, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q) { + return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); +} + /// For the given destination element of a shuffle, peek through shuffles to /// match a root vector source operand that contains that element in the same /// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). @@ -4135,7 +4256,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, } static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const Query &Q, + Type *RetTy, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *InVecTy = Op0->getType(); unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); @@ -4207,8 +4328,13 @@ Value *llvm::SimplifyShuffleVectorInst( Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShuffleVectorInst( - Op0, Op1, Mask, RetTy, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, + {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q) { + return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -4216,7 +4342,7 @@ Value *llvm::SimplifyShuffleVectorInst( /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); @@ -4264,7 +4390,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const Query &Q, + const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: @@ -4284,22 +4410,32 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyBinOp(Opcode, LHS, RHS, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); +} + Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const DataLayout &DL, + FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); +} + /// Given operands for a CmpInst, see if we can fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); @@ -4309,10 +4445,15 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyCmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); +} + static bool IsIdempotent(Intrinsic::ID ID) { switch (ID) { default: return false; @@ -4403,7 +4544,7 @@ static bool maskIsAllZeroOrUndef(Value *Mask) { template <typename IterTy> static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { Intrinsic::ID IID = F->getIntrinsicID(); unsigned NumOperands = std::distance(ArgBegin, ArgEnd); @@ -4497,7 +4638,7 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, template <typename IterTy> static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { Type *Ty = V->getType(); if (PointerType *PTy = dyn_cast<PointerType>(Ty)) Ty = PTy->getElementType(); @@ -4535,16 +4676,26 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyCall(V, ArgBegin, ArgEnd, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit); +} + Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCall(V, Args.begin(), Args.end(), - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyCall(V, Args.begin(), Args.end(), {DL, TLI, DT, AC, CxtI}, + RecursionLimit); +} + +Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, + const SimplifyQuery &Q) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. @@ -4553,152 +4704,141 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE) { + return SimplifyInstruction(I, {DL, TLI, DT, AC, I}, ORE); +} + +Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE) { Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, DL, TLI); + Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Mul: - Result = - SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FRem: Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), - cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), - cast<BinaryOperator>(I)->isExact(), DL, TLI, DT, - AC, I); + cast<BinaryOperator>(I)->isExact(), Q); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), - cast<BinaryOperator>(I)->isExact(), DL, TLI, DT, - AC, I); + cast<BinaryOperator>(I)->isExact(), Q); break; case Instruction::And: - Result = - SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Or: - Result = - SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Xor: - Result = - SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::ICmp: - Result = - SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0), - I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FCmp: - Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + Result = + SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0), + I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT, AC, I); + I->getOperand(2), Q); break; case Instruction::GetElementPtr: { - SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); + SmallVector<Value *, 8> Ops(I->op_begin(), I->op_end()); Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(), - Ops, DL, TLI, DT, AC, I); + Ops, Q); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast<InsertValueInst>(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT, AC, I); + IV->getIndices(), Q); break; } case Instruction::ExtractValue: { auto *EVI = cast<ExtractValueInst>(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), - EVI->getIndices(), DL, TLI, DT, AC, I); + EVI->getIndices(), Q); break; } case Instruction::ExtractElement: { auto *EEI = cast<ExtractElementInst>(I); - Result = SimplifyExtractElementInst( - EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); + Result = SimplifyExtractElementInst(EEI->getVectorOperand(), + EEI->getIndexOperand(), Q); break; } case Instruction::ShuffleVector: { auto *SVI = cast<ShuffleVectorInst>(I); Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), - SVI->getMask(), SVI->getType(), DL, TLI, - DT, AC, I); + SVI->getMask(), SVI->getType(), Q); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I)); + Result = SimplifyPHINode(cast<PHINode>(I), Q); break; case Instruction::Call: { CallSite CS(cast<CallInst>(I)); - Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL, - TLI, DT, AC, I); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST - Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), - DL, TLI, DT, AC, I); + Result = + SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q); break; case Instruction::Alloca: // No simplifications for Alloca and it can't be constant folded. @@ -4710,11 +4850,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, // value even when the operands are not all constants. if (!Result && I->getType()->isIntOrIntVectorTy()) { unsigned BitWidth = I->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT, ORE); - if ((KnownZero | KnownOne).isAllOnesValue()) - Result = ConstantInt::get(I->getType(), KnownOne); + KnownBits Known(BitWidth); + computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); + if ((Known.Zero | Known.One).isAllOnesValue()) + Result = ConstantInt::get(I->getType(), Known.One); } /// If called on unreachable code, the above logic may report that the diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 2ca46b1fe872..0f04af54cdc7 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -70,6 +70,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -534,10 +535,9 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast<VectorType>(V->getType()); if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, - dyn_cast<Instruction>(V), DT); - return KnownZero.isAllOnesValue(); + KnownBits Known(BitWidth); + computeKnownBits(V, Known, DL, 0, AC, dyn_cast<Instruction>(V), DT); + return Known.Zero.isAllOnesValue(); } // Per-component check doesn't work with zeroinitializer @@ -556,9 +556,9 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, if (isa<UndefValue>(Elem)) return true; - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Elem, KnownZero, KnownOne, DL); - if (KnownZero.isAllOnesValue()) + KnownBits Known(BitWidth); + computeKnownBits(Elem, Known, DL); + if (Known.Zero.isAllOnesValue()) return true; } diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index c63677fe5502..da5c79ab6c81 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -29,7 +29,7 @@ #define DEBUG_TYPE "memoryssa" using namespace llvm; -namespace llvm { + // This is the marker algorithm from "Simple and Efficient Construction of // Static Single Assignment Form" // The simple, non-marker algorithm places phi nodes at any join @@ -211,8 +211,8 @@ void MemorySSAUpdater::insertUse(MemoryUse *MU) { } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. -void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, - MemoryAccess *NewDef) { +static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, + MemoryAccess *NewDef) { // Replace any operand with us an incoming block with the new defining // access. int i = MP->getBasicBlockIndex(BB); @@ -415,6 +415,7 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { } return MA; } + void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { assert(!MSSA->isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); @@ -490,5 +491,3 @@ MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter( ++InsertPt->getIterator()); return NewAccess; } - -} // namespace llvm diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 700c383a9dd4..3ac4bf1276eb 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -89,6 +89,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SaveAndRestore.h" @@ -4575,10 +4576,10 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, + KnownBits Known(BitWidth); + computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC, nullptr, &DT); - return Zeros.countTrailingOnes(); + return Known.Zero.countTrailingOnes(); } // SCEVUDivExpr @@ -4757,11 +4758,12 @@ ScalarEvolution::getRange(const SCEV *S, const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); - if (Ones != ~Zeros + 1) + KnownBits Known(BitWidth); + computeKnownBits(U->getValue(), Known, DL, 0, &AC, nullptr, &DT); + if (Known.One != ~Known.Zero + 1) ConservativeResult = - ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + ConservativeResult.intersectWith(ConstantRange(Known.One, + ~Known.Zero + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); @@ -5292,13 +5294,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned LZ = A.countLeadingZeros(); unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(), + KnownBits Known(BitWidth); + computeKnownBits(BO->LHS, Known, getDataLayout(), 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); - if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { + if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) { const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); const SCEV *LHS = getSCEV(BO->LHS); const SCEV *ShiftedLHS = nullptr; @@ -5328,12 +5330,28 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case Instruction::Or: - // Use ValueTracking to check whether this is actually an add. - if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC, - nullptr, &DT)) { - // There aren't any common bits set, so the add can't wrap. - auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW); - return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { + const SCEV *LHS = getSCEV(BO->LHS); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); + const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); + } + return S; + } } break; @@ -6063,24 +6081,74 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, return getCouldNotCompute(); } -ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimitFromCond(const Loop *L, - Value *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB, - bool ControlsExit, - bool AllowPredicates) { +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( + const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, bool AllowPredicates) { + ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates); + return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB, + ControlsExit, AllowPredicates); +} + +Optional<ScalarEvolution::ExitLimit> +ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, bool AllowPredicates) { + (void)this->L; + (void)this->TBB; + (void)this->FBB; + (void)this->AllowPredicates; + + assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + this->AllowPredicates == AllowPredicates && + "Variance in assumed invariant key components!"); + auto Itr = TripCountMap.find({ExitCond, ControlsExit}); + if (Itr == TripCountMap.end()) + return None; + return Itr->second; +} + +void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, + bool AllowPredicates, + const ExitLimit &EL) { + assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + this->AllowPredicates == AllowPredicates && + "Variance in assumed invariant key components!"); + + auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); + assert(InsertResult.second && "Expected successful insertion!"); + (void)InsertResult; +} + +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { + + if (auto MaybeEL = + Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates)) + return *MaybeEL; + + ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB, + ControlsExit, AllowPredicates); + Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL); + return EL; +} + +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); - ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); + ExitLimit EL0 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -6124,12 +6192,12 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); - ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); + ExitLimit EL0 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -10221,84 +10289,75 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } -typedef DenseMap<const Loop *, std::string> VerifyMap; +void ScalarEvolution::verify() const { + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + ScalarEvolution SE2(F, TLI, AC, DT, LI); + + SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end()); -/// replaceSubString - Replaces all occurrences of From in Str with To. -static void replaceSubString(std::string &Str, StringRef From, StringRef To) { - size_t Pos = 0; - while ((Pos = Str.find(From, Pos)) != std::string::npos) { - Str.replace(Pos, From.size(), To.data(), To.size()); - Pos += To.size(); - } -} + // Map's SCEV expressions from one ScalarEvolution "universe" to another. + struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> { + const SCEV *visitConstant(const SCEVConstant *Constant) { + return SE.getConstant(Constant->getAPInt()); + } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return SE.getUnknown(Expr->getValue()); + } -/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. -static void -getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { - std::string &S = Map[L]; - if (S.empty()) { - raw_string_ostream OS(S); - SE.getBackedgeTakenCount(L)->print(OS); + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return SE.getCouldNotCompute(); + } + SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {} + }; - // false and 0 are semantically equivalent. This can happen in dead loops. - replaceSubString(OS.str(), "false", "0"); - // Remove wrap flags, their use in SCEV is highly fragile. - // FIXME: Remove this when SCEV gets smarter about them. - replaceSubString(OS.str(), "<nw>", ""); - replaceSubString(OS.str(), "<nsw>", ""); - replaceSubString(OS.str(), "<nuw>", ""); - } + SCEVMapper SCM(SE2); - for (auto *R : reverse(*L)) - getLoopBackedgeTakenCounts(R, Map, SE); // recurse. -} + while (!LoopStack.empty()) { + auto *L = LoopStack.pop_back_val(); + LoopStack.insert(LoopStack.end(), L->begin(), L->end()); -void ScalarEvolution::verify() const { - ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + auto *CurBECount = SCM.visit( + const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L)); + auto *NewBECount = SE2.getBackedgeTakenCount(L); - // Gather stringified backedge taken counts for all loops using SCEV's caches. - // FIXME: It would be much better to store actual values instead of strings, - // but SCEV pointers will change if we drop the caches. - VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; - for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + if (CurBECount == SE2.getCouldNotCompute() || + NewBECount == SE2.getCouldNotCompute()) { + // NB! This situation is legal, but is very suspicious -- whatever pass + // change the loop to make a trip count go from could not compute to + // computable or vice-versa *should have* invalidated SCEV. However, we + // choose not to assert here (for now) since we don't want false + // positives. + continue; + } - // Gather stringified backedge taken counts for all loops using a fresh - // ScalarEvolution object. - ScalarEvolution SE2(F, TLI, AC, DT, LI); - for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); - - // Now compare whether they're the same with and without caches. This allows - // verifying that no pass changed the cache. - assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && - "New loops suddenly appeared!"); - - for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), - OldE = BackedgeDumpsOld.end(), - NewI = BackedgeDumpsNew.begin(); - OldI != OldE; ++OldI, ++NewI) { - assert(OldI->first == NewI->first && "Loop order changed!"); - - // Compare the stringified SCEVs. We don't care if undef backedgetaken count - // changes. - // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This - // means that a pass is buggy or SCEV has to learn a new pattern but is - // usually not harmful. - if (OldI->second != NewI->second && - OldI->second.find("undef") == std::string::npos && - NewI->second.find("undef") == std::string::npos && - OldI->second != "***COULDNOTCOMPUTE***" && - NewI->second != "***COULDNOTCOMPUTE***") { - dbgs() << "SCEVValidator: SCEV for loop '" - << OldI->first->getHeader()->getName() - << "' changed from '" << OldI->second - << "' to '" << NewI->second << "'!\n"; + if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { + // SCEV treats "undef" as an unknown but consistent value (i.e. it does + // not propagate undef aggressively). This means we can (and do) fail + // verification in cases where a transform makes the trip count of a loop + // go from "undef" to "undef+1" (say). The transform is fine, since in + // both cases the loop iterates "undef" times, but SCEV thinks we + // increased the trip count of the loop by 1 incorrectly. + continue; + } + + if (SE.getTypeSizeInBits(CurBECount->getType()) > + SE.getTypeSizeInBits(NewBECount->getType())) + NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); + else if (SE.getTypeSizeInBits(CurBECount->getType()) < + SE.getTypeSizeInBits(NewBECount->getType())) + CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); + + auto *ConstantDelta = + dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount)); + + if (ConstantDelta && ConstantDelta->getAPInt() != 0) { + dbgs() << "Trip Count Changed!\n"; + dbgs() << "Old: " << *CurBECount << "\n"; + dbgs() << "New: " << *NewBECount << "\n"; + dbgs() << "Delta: " << *ConstantDelta << "\n"; std::abort(); } } - - // TODO: Verify more things. } bool ScalarEvolution::invalidate( diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 2aaa4c1ae117..54c44c8e542d 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -51,40 +51,47 @@ NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) { transform(AR->operands(), std::back_inserter(Operands), [&](const SCEV *Op) { return visit(Op); }); - // Conservatively use AnyWrap until/unless we need FlagNW. - const SCEV *Result = - SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); - switch (Kind) { - case Normalize: - // We want to normalize step expression, because otherwise we might not be - // able to denormalize to the original expression. + if (!Pred(AR)) + return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); + + // Normalization and denormalization are fancy names for decrementing and + // incrementing a SCEV expression with respect to a set of loops. Since + // Pred(AR) has returned true, we know we need to normalize or denormalize AR + // with respect to its loop. + + if (Kind == Denormalize) { + // Denormalization / "partial increment" is essentially the same as \c + // SCEVAddRecExpr::getPostIncExpr. Here we use an explicit loop to make the + // symmetry with Normalization clear. + for (int i = 0, e = Operands.size() - 1; i < e; i++) + Operands[i] = SE.getAddExpr(Operands[i], Operands[i + 1]); + } else { + assert(Kind == Normalize && "Only two possibilities!"); + + // Normalization / "partial decrement" is a bit more subtle. Since + // incrementing a SCEV expression (in general) changes the step of the SCEV + // expression as well, we cannot use the step of the current expression. + // Instead, we have to use the step of the very expression we're trying to + // compute! + // + // We solve the issue by recursively building up the result, starting from + // the "least significant" operand in the add recurrence: // - // Here is an example what will happen if we don't normalize step: - // ORIGINAL ISE: - // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25> - // NORMALIZED ISE: - // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+, - // (100 /u {0,+,1}<%bb16>)}<%bb25> - // DENORMALIZED BACK ISE: - // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+, - // (100 /u {1,+,1}<%bb16>)}<%bb25> - // Note that the initial value changes after normalization + - // denormalization, which isn't correct. - if (Pred(AR)) { - const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE)); - Result = SE.getMinusSCEV(Result, TransformedStep); - } - break; - case Denormalize: - // Here we want to normalize step expressions for the same reasons, as - // stated above. - if (Pred(AR)) { - const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE)); - Result = SE.getAddExpr(Result, TransformedStep); - } - break; + // Base case: + // Single operand add recurrence. It's its own normalization. + // + // N-operand case: + // {S_{N-1},+,S_{N-2},+,...,+,S_0} = S + // + // Since the step recurrence of S is {S_{N-2},+,...,+,S_0}, we know its + // normalization by induction. We subtract the normalized step + // recurrence from S_{N-1} to get the normalization of S. + + for (int i = Operands.size() - 2; i >= 0; i--) + Operands[i] = SE.getMinusSCEV(Operands[i], Operands[i + 1]); } - return Result; + + return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); } const SCEV *llvm::normalizeForPostIncUse(const SCEV *S, diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 900a2363e60d..af964b6259bb 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <array> @@ -130,15 +131,15 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { return nullptr; } -static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, +static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q); -void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, +void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE) { - ::computeKnownBits(V, KnownZero, KnownOne, Depth, + ::computeKnownBits(V, Known, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } @@ -151,11 +152,11 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, assert(LHS->getType()->isIntOrIntVectorTy() && "LHS and RHS should be integers"); IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType()); - APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); - APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); - return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); + KnownBits LHSKnown(IT->getBitWidth()); + KnownBits RHSKnown(IT->getBitWidth()); + computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT); + return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, @@ -252,67 +253,65 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, + KnownBits &KnownOut, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = KnownOut.getBitWidth(); // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, Depth + 1, Q); - computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); + KnownBits LHSKnown(BitWidth); + computeKnownBits(Op0, LHSKnown, Depth + 1, Q); + computeKnownBits(Op1, Known2, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. uint64_t CarryIn = 0; if (!Add) { // Sum = LHS + ~RHS + 1 - std::swap(KnownZero2, KnownOne2); + std::swap(Known2.Zero, Known2.One); CarryIn = 1; } - APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn; - APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn; + APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn; + APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn; // Compute known bits of the carry. - APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2); - APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2; + APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero); + APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One; // Compute set of known bits (where all three relevant bits are known). - APInt LHSKnown = LHSKnownZero | LHSKnownOne; - APInt RHSKnown = KnownZero2 | KnownOne2; - APInt CarryKnown = CarryKnownZero | CarryKnownOne; - APInt Known = LHSKnown & RHSKnown & CarryKnown; + APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One; + APInt RHSKnownUnion = Known2.Zero | Known2.One; + APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne; + APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion; assert((PossibleSumZero & Known) == (PossibleSumOne & Known) && "known bits of sum differ"); // Compute known bits of the result. - KnownZero = ~PossibleSumOne & Known; - KnownOne = PossibleSumOne & Known; + KnownOut.Zero = ~PossibleSumOne & Known; + KnownOut.One = PossibleSumOne & Known; // Are we still trying to solve for the sign bit? if (!Known.isSignBitSet()) { if (NSW) { // Adding two non-negative numbers, or subtracting a negative number from // a non-negative one, can't wrap into negative. - if (LHSKnownZero.isSignBitSet() && KnownZero2.isSignBitSet()) - KnownZero.setSignBit(); + if (LHSKnown.Zero.isSignBitSet() && Known2.Zero.isSignBitSet()) + KnownOut.Zero.setSignBit(); // Adding two negative numbers, or subtracting a non-negative number from // a negative one, can't wrap into non-negative. - else if (LHSKnownOne.isSignBitSet() && KnownOne2.isSignBitSet()) - KnownOne.setSignBit(); + else if (LHSKnown.One.isSignBitSet() && Known2.One.isSignBitSet()) + KnownOut.One.setSignBit(); } } } static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, + KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(Op0, KnownZero2, KnownOne2, Depth + 1, Q); + unsigned BitWidth = Known.getBitWidth(); + computeKnownBits(Op1, Known, Depth + 1, Q); + computeKnownBits(Op0, Known2, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -322,10 +321,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // The product of a number with itself is non-negative. isKnownNonNegative = true; } else { - bool isKnownNonNegativeOp1 = KnownZero.isSignBitSet(); - bool isKnownNonNegativeOp0 = KnownZero2.isSignBitSet(); - bool isKnownNegativeOp1 = KnownOne.isSignBitSet(); - bool isKnownNegativeOp0 = KnownOne2.isSignBitSet(); + bool isKnownNonNegativeOp1 = Known.Zero.isSignBitSet(); + bool isKnownNonNegativeOp0 = Known2.Zero.isSignBitSet(); + bool isKnownNegativeOp1 = Known.One.isSignBitSet(); + bool isKnownNegativeOp0 = Known2.One.isSignBitSet(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); @@ -343,28 +342,28 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), + Known.One.clearAllBits(); + unsigned TrailZ = Known.Zero.countTrailingOnes() + + Known2.Zero.countTrailingOnes(); + unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + + Known2.Zero.countLeadingOnes(), BitWidth) - BitWidth; TrailZ = std::min(TrailZ, BitWidth); LeadZ = std::min(LeadZ, BitWidth); - KnownZero.clearAllBits(); - KnownZero.setLowBits(TrailZ); - KnownZero.setHighBits(LeadZ); + Known.Zero.clearAllBits(); + Known.Zero.setLowBits(TrailZ); + Known.Zero.setHighBits(LeadZ); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in // which case we prefer to follow the result of the direct computation, // though as the program is invoking undefined behaviour we can choose // whatever we like here. - if (isKnownNonNegative && !KnownOne.isSignBitSet()) - KnownZero.setSignBit(); - else if (isKnownNegative && !KnownZero.isSignBitSet()) - KnownOne.setSignBit(); + if (isKnownNonNegative && !Known.One.isSignBitSet()) + Known.Zero.setSignBit(); + else if (isKnownNegative && !Known.Zero.isSignBitSet()) + Known.One.setSignBit(); } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, @@ -499,15 +498,14 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return !isEphemeralValueOf(Inv, CxtI); } -static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - const Query &Q) { +static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, + unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! if (!Q.AC || !Q.CxtI) return; - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); // Note that the patterns below need to be kept in sync with the code // in AssumptionCache::updateAffectedValues. @@ -532,15 +530,15 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); - KnownZero.clearAllBits(); - KnownOne.setAllBits(); + Known.Zero.clearAllBits(); + Known.One.setAllBits(); return; } if (match(Arg, m_Not(m_Specific(V))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); - KnownZero.setAllBits(); - KnownOne.clearAllBits(); + Known.Zero.setAllBits(); + Known.One.clearAllBits(); return; } @@ -558,126 +556,126 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - KnownZero |= RHSKnownZero; - KnownOne |= RHSKnownOne; + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + Known.Zero |= RHSKnown.Zero; + Known.One |= RHSKnown.One; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. - KnownZero |= RHSKnownZero & MaskKnownOne; - KnownOne |= RHSKnownOne & MaskKnownOne; + Known.Zero |= RHSKnown.Zero & MaskKnown.One; + Known.One |= RHSKnown.One & MaskKnown.One; // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. - KnownZero |= RHSKnownOne & MaskKnownOne; - KnownOne |= RHSKnownZero & MaskKnownOne; + Known.Zero |= RHSKnown.One & MaskKnown.One; + Known.One |= RHSKnown.Zero & MaskKnown.One; // assume(v | b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. - KnownZero |= RHSKnownZero & BKnownZero; - KnownOne |= RHSKnownOne & BKnownZero; + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. - KnownZero |= RHSKnownOne & BKnownZero; - KnownOne |= RHSKnownZero & BKnownZero; + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; // assume(v ^ b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, // we can propagate inverted known bits from the RHS to V. - KnownZero |= RHSKnownZero & BKnownZero; - KnownOne |= RHSKnownOne & BKnownZero; - KnownZero |= RHSKnownOne & BKnownOne; - KnownOne |= RHSKnownZero & BKnownOne; + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; + Known.Zero |= RHSKnown.One & BKnown.One; + Known.One |= RHSKnown.Zero & BKnown.One; // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are // known to be one, we can propagate known bits from the RHS to V. - KnownZero |= RHSKnownOne & BKnownZero; - KnownOne |= RHSKnownZero & BKnownZero; - KnownZero |= RHSKnownZero & BKnownOne; - KnownOne |= RHSKnownOne & BKnownOne; + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; + Known.Zero |= RHSKnown.Zero & BKnown.One; + Known.One |= RHSKnown.One & BKnown.One; // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - RHSKnownZero.lshrInPlace(C->getZExtValue()); - KnownZero |= RHSKnownZero; - RHSKnownOne.lshrInPlace(C->getZExtValue()); - KnownOne |= RHSKnownOne; + RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + Known.Zero |= RHSKnown.Zero; + RHSKnown.One.lshrInPlace(C->getZExtValue()); + Known.One |= RHSKnown.One; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - RHSKnownOne.lshrInPlace(C->getZExtValue()); - KnownZero |= RHSKnownOne; - RHSKnownZero.lshrInPlace(C->getZExtValue()); - KnownOne |= RHSKnownZero; + RHSKnown.One.lshrInPlace(C->getZExtValue()); + Known.Zero |= RHSKnown.One; + RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), @@ -685,12 +683,12 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - KnownZero |= RHSKnownZero << C->getZExtValue(); - KnownOne |= RHSKnownOne << C->getZExtValue(); + Known.Zero |= RHSKnown.Zero << C->getZExtValue(); + Known.One |= RHSKnown.One << C->getZExtValue(); // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( m_LShr(m_V, m_ConstantInt(C)), @@ -698,78 +696,78 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - KnownZero |= RHSKnownOne << C->getZExtValue(); - KnownOne |= RHSKnownZero << C->getZExtValue(); + Known.Zero |= RHSKnown.One << C->getZExtValue(); + Known.One |= RHSKnown.Zero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownZero.isSignBitSet()) { + if (RHSKnown.Zero.isSignBitSet()) { // We know that the sign bit is zero. - KnownZero.setSignBit(); + Known.Zero.setSignBit(); } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isSignBitSet()) { + if (RHSKnown.One.isAllOnesValue() || RHSKnown.Zero.isSignBitSet()) { // We know that the sign bit is zero. - KnownZero.setSignBit(); + Known.Zero.setSignBit(); } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownOne.isSignBitSet()) { + if (RHSKnown.One.isSignBitSet()) { // We know that the sign bit is one. - KnownOne.setSignBit(); + Known.One.setSignBit(); } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isSignBitSet()) { + if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.One.isSignBitSet()) { // We know that the sign bit is one. - KnownOne.setSignBit(); + Known.One.setSignBit(); } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); + Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()+1); + Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()+1); else - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); + Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()); } } @@ -778,9 +776,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // so this isn't a real bug. On the other hand, the program may have undefined // behavior, or we might have a bug in the compiler. We can't assert/crash, so // clear out the known bits, try to warn the user, and hope for the best. - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if (Known.Zero.intersects(Known.One)) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); if (Q.ORE) { auto *CxtI = const_cast<Instruction *>(Q.CxtI); @@ -793,57 +791,57 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } // Compute known bits from a shift operator, including those with a -// non-constant shift amount. KnownZero and KnownOne are the outputs of this -// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the -// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific -// functors that, given the known-zero or known-one bits respectively, and a -// shift amount, compute the implied known-zero or known-one bits of the shift -// operator's result respectively for that shift amount. The results from calling -// KZF and KOF are conservatively combined for all permitted shift amounts. +// non-constant shift amount. Known is the outputs of this function. Known2 is a +// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are +// operator-specific functors that, given the known-zero or known-one bits +// respectively, and a shift amount, compute the implied known-zero or known-one +// bits of the shift operator's result respectively for that shift amount. The +// results from calling KZF and KOF are conservatively combined for all +// permitted shift amounts. static void computeKnownBitsFromShiftOperator( - const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, - APInt &KnownOne2, unsigned Depth, const Query &Q, + const Operator *I, KnownBits &Known, KnownBits &Known2, + unsigned Depth, const Query &Q, function_ref<APInt(const APInt &, unsigned)> KZF, function_ref<APInt(const APInt &, unsigned)> KOF) { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KZF(KnownZero, ShiftAmt); - KnownOne = KOF(KnownOne, ShiftAmt); - // If there is conflict between KnownZero and KnownOne, this must be an - // overflowing left shift, so the shift result is undefined. Clear KnownZero - // and KnownOne bits so that other code could propagate this undef. - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero = KZF(Known.Zero, ShiftAmt); + Known.One = KOF(Known.One, ShiftAmt); + // If there is conflict between Known.Zero and Known.One, this must be an + // overflowing left shift, so the shift result is undefined. Clear Known + // bits so that other code could propagate this undef. + if ((Known.Zero & Known.One) != 0) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); } return; } - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); // If the shift amount could be greater than or equal to the bit-width of the LHS, the // value could be undef, so we don't know anything about it. - if ((~KnownZero).uge(BitWidth)) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if ((~Known.Zero).uge(BitWidth)) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); return; } - // Note: We cannot use KnownZero.getLimitedValue() here, because if + // Note: We cannot use Known.Zero.getLimitedValue() here, because if // BitWidth > 64 and any upper bits are known, we'll end up returning the // limit value (which implies all bits are known). - uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); - uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); // It would be more-clearly correct to use the two temporaries for this // calculation. Reusing the APInts here to prevent unnecessary allocations. - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); // If we know the shifter operand is nonzero, we can sometimes infer more // known bits. However this is expensive to compute, so be lazy about it and @@ -858,9 +856,10 @@ static void computeKnownBitsFromShiftOperator( return; } - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { // Combine the shifted known input bits only for those shift amounts // compatible with its known constraints. @@ -879,8 +878,8 @@ static void computeKnownBitsFromShiftOperator( continue; } - KnownZero &= KZF(KnownZero2, ShiftAmt); - KnownOne &= KOF(KnownOne2, ShiftAmt); + Known.Zero &= KZF(Known2.Zero, ShiftAmt); + Known.One &= KOF(Known2.One, ShiftAmt); } // If there are no compatible shift amounts, then we've proven that the shift @@ -888,33 +887,32 @@ static void computeKnownBitsFromShiftOperator( // return anything we'd like, but we need to make sure the sets of known bits // stay disjoint (it should be better for some other code to actually // propagate the undef than to pick a value here using known bits). - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if (Known.Zero.intersects(Known.One)) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); } } -static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); +static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, + unsigned Depth, const Query &Q) { + unsigned BitWidth = Known.getBitWidth(); - APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + KnownBits Known2(Known); switch (I->getOpcode()) { default: break; case Instruction::Load: if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; // and(x, add (x, -1)) is a common idiom that always clears the low bit; // here we handle the more general case of adding any odd number by @@ -922,115 +920,115 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. Value *Y = nullptr; - if (!KnownZero[0] && !KnownOne[0] && + if (!Known.Zero[0] && !Known.One[0] && (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), m_Value(Y))) || match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), m_Value(Y))))) { - KnownZero2.clearAllBits(); KnownOne2.clearAllBits(); - computeKnownBits(Y, KnownZero2, KnownOne2, Depth + 1, Q); - if (KnownOne2.countTrailingOnes() > 0) - KnownZero.setBit(0); + Known2.Zero.clearAllBits(); Known2.One.clearAllBits(); + computeKnownBits(Y, Known2, Depth + 1, Q); + if (Known2.One.countTrailingOnes() > 0) + Known.Zero.setBit(0); } break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = std::move(KnownZeroOut); + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = std::move(KnownZeroOut); break; } case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known, + Known2, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - unsigned LeadZ = KnownZero2.countLeadingOnes(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + unsigned LeadZ = Known2.Zero.countLeadingOnes(); - KnownOne2.clearAllBits(); - KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + Known2.One.clearAllBits(); + Known2.Zero.clearAllBits(); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero.setHighBits(LeadZ); + Known.Zero.setHighBits(LeadZ); break; } case Instruction::Select: { const Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { - computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(RHS, Known, Depth + 1, Q); + computeKnownBits(LHS, Known2, Depth + 1, Q); } else { - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); } unsigned MaxHighOnes = 0; unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. - if (KnownOne.isSignBitSet() && KnownOne2.isSignBitSet()) + if (Known.One.isSignBitSet() && Known2.One.isSignBitSet()) // We can derive a lower bound on the result by taking the max of the // leading one bits. - MaxHighOnes = - std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + MaxHighOnes = std::max(Known.One.countLeadingOnes(), + Known2.One.countLeadingOnes()); // If either side is non-negative, the result is non-negative. - else if (KnownZero.isSignBitSet() || KnownZero2.isSignBitSet()) + else if (Known.Zero.isSignBitSet() || Known2.Zero.isSignBitSet()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. - if (KnownZero.isSignBitSet() && KnownZero2.isSignBitSet()) + if (Known.Zero.isSignBitSet() && Known2.Zero.isSignBitSet()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. - MaxHighZeros = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); + MaxHighZeros = std::max(Known.Zero.countLeadingOnes(), + Known2.Zero.countLeadingOnes()); // If either side is negative, the result is negative. - else if (KnownOne.isSignBitSet() || KnownOne2.isSignBitSet()) + else if (Known.One.isSignBitSet() || Known2.One.isSignBitSet()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = - std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + std::max(Known.One.countLeadingOnes(), Known2.One.countLeadingOnes()); } else if (SPF == SPF_UMIN) { // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = - std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); + std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes()); } // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; if (MaxHighOnes > 0) - KnownOne.setHighBits(MaxHighOnes); + Known.One.setHighBits(MaxHighOnes); if (MaxHighZeros > 0) - KnownZero.setHighBits(MaxHighZeros); + Known.Zero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: @@ -1054,14 +1052,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType()); assert(SrcBitWidth && "SrcBitWidth can't be zero"); - KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); - KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KnownZero.zextOrTrunc(BitWidth); - KnownOne = KnownOne.zextOrTrunc(BitWidth); + Known.Zero = Known.Zero.zextOrTrunc(SrcBitWidth); + Known.One = Known.One.zextOrTrunc(SrcBitWidth); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero = Known.Zero.zextOrTrunc(BitWidth); + Known.One = Known.One.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) - KnownZero.setBitsFrom(SrcBitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); break; } case Instruction::BitCast: { @@ -1070,7 +1068,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; } break; @@ -1079,13 +1077,13 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + Known.Zero = Known.Zero.trunc(SrcBitWidth); + Known.One = Known.One.trunc(SrcBitWidth); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - KnownZero = KnownZero.sext(BitWidth); - KnownOne = KnownOne.sext(BitWidth); + Known.Zero = Known.Zero.sext(BitWidth); + Known.One = Known.One.sext(BitWidth); break; } case Instruction::Shl: { @@ -1108,9 +1106,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KOResult; }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::LShr: { @@ -1126,9 +1122,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KnownOne.lshr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::AShr: { @@ -1141,23 +1135,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KnownOne.ashr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } case Instruction::SRem: @@ -1165,34 +1155,33 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, - Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // The low bits of the first operand are unchanged by the srem. - KnownZero = KnownZero2 & LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2.isSignBitSet() || ((KnownZero2 & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (Known2.Zero.isSignBitSet() || ((Known2.Zero & LowBits) == LowBits)) + Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2.isSignBitSet() && ((KnownOne2 & LowBits) != 0)) - KnownOne |= ~LowBits; + if (Known2.One.isSignBitSet() && ((Known2.One & LowBits) != 0)) + Known.One |= ~LowBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If it's known zero, our sign bit is also zero. - if (KnownZero2.isSignBitSet()) - KnownZero.setSignBit(); + if (Known2.Zero.isSignBitSet()) + Known.Zero.setSignBit(); break; case Instruction::URem: { @@ -1200,23 +1189,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, const APInt &RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero |= ~LowBits; - KnownOne &= LowBits; + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero |= ~LowBits; + Known.One &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - - unsigned Leaders = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); - KnownZero.setHighBits(Leaders); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + + unsigned Leaders = std::max(Known.Zero.countLeadingOnes(), + Known2.Zero.countLeadingOnes()); + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + Known.Zero.setHighBits(Leaders); break; } @@ -1227,16 +1216,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); if (Align > 0) - KnownZero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, Depth + 1, - Q); - unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + KnownBits LocalKnown(BitWidth); + computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); + unsigned TrailZ = LocalKnown.Zero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { @@ -1266,15 +1254,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy); - LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, Depth + 1, Q); + LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); + computeKnownBits(Index, LocalKnown, Depth + 1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + - LocalKnownZero.countTrailingOnes())); + LocalKnown.Zero.countTrailingOnes())); } } - KnownZero.setLowBits(TrailZ); + Known.Zero.setLowBits(TrailZ); break; } case Instruction::PHI: { @@ -1309,14 +1297,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(R, Known2, Depth + 1, Q); // We need to take the minimum number of known bits - APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); + KnownBits Known3(Known); + computeKnownBits(L, Known3, Depth + 1, Q); - KnownZero.setLowBits(std::min(KnownZero2.countTrailingOnes(), - KnownZero3.countTrailingOnes())); + Known.Zero.setLowBits(std::min(Known2.Zero.countTrailingOnes(), + Known3.Zero.countTrailingOnes())); if (DontImproveNonNegativePhiBits) break; @@ -1333,25 +1321,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // (add non-negative, non-negative) --> non-negative // (add negative, negative) --> negative if (Opcode == Instruction::Add) { - if (KnownZero2.isSignBitSet() && KnownZero3.isSignBitSet()) - KnownZero.setSignBit(); - else if (KnownOne2.isSignBitSet() && KnownOne3.isSignBitSet()) - KnownOne.setSignBit(); + if (Known2.Zero.isSignBitSet() && Known3.Zero.isSignBitSet()) + Known.Zero.setSignBit(); + else if (Known2.One.isSignBitSet() && Known3.One.isSignBitSet()) + Known.One.setSignBit(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { - if (KnownZero2.isSignBitSet() && KnownOne3.isSignBitSet()) - KnownZero.setSignBit(); - else if (KnownOne2.isSignBitSet() && KnownZero3.isSignBitSet()) - KnownOne.setSignBit(); + if (Known2.Zero.isSignBitSet() && Known3.One.isSignBitSet()) + Known.Zero.setSignBit(); + else if (Known2.One.isSignBitSet() && Known3.Zero.isSignBitSet()) + Known.One.setSignBit(); } // (mul nsw non-negative, non-negative) --> non-negative - else if (Opcode == Instruction::Mul && KnownZero2.isSignBitSet() && - KnownZero3.isSignBitSet()) - KnownZero.setSignBit(); + else if (Opcode == Instruction::Mul && Known2.Zero.isSignBitSet() && + Known3.Zero.isSignBitSet()) + Known.Zero.setSignBit(); } break; @@ -1365,27 +1353,26 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. - if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) { // Skip if every incoming value references to ourself. if (dyn_cast_or_null<UndefValue>(P->hasConstantValue())) break; - KnownZero.setAllBits(); - KnownOne.setAllBits(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (Value *IncValue : P->incoming_values()) { // Skip direct self references. if (IncValue == P) continue; - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); + Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, KnownZero2, KnownOne2, MaxDepth - 1, Q); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + computeKnownBits(IncValue, Known2, MaxDepth - 1, Q); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; // If all bits have been ruled out, there's no need to check // more operands. - if (!KnownZero && !KnownOne) + if (!Known.Zero && !Known.One) break; } } @@ -1397,24 +1384,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // and then intersect with known bits based on other properties of the // function. if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One); if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { - computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2; - KnownOne |= KnownOne2; + computeKnownBits(RV, Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero; + Known.One |= Known2.One; } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bitreverse: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2.reverseBits(); - KnownOne |= KnownOne2.reverseBits(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero.reverseBits(); + Known.One |= Known2.One.reverseBits(); break; case Intrinsic::bswap: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2.byteSwap(); - KnownOne |= KnownOne2.byteSwap(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero.byteSwap(); + Known.One |= Known2.One.byteSwap(); break; case Intrinsic::ctlz: case Intrinsic::cttz: { @@ -1422,22 +1409,22 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero.setBitsFrom(LowBits); + Known.Zero.setBitsFrom(LowBits); break; } case Intrinsic::ctpop: { - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. - unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned BitsPossiblySet = BitWidth - Known2.Zero.countPopulation(); unsigned LowBits = Log2_32(BitsPossiblySet)+1; - KnownZero.setBitsFrom(LowBits); + Known.Zero.setBitsFrom(LowBits); // TODO: we could bound KnownOne using the lower bound on the number // of bits which might be set provided by popcnt KnownOne2. break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero.setBitsFrom(32); + Known.Zero.setBitsFrom(32); break; } } @@ -1447,7 +1434,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // tracking the specific element. But at least we might find information // valid for all elements of the vector (for example if vector is sign // extended, shifted, etc). - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; case Instruction::ExtractValue: if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { @@ -1459,20 +1446,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + II->getArgOperand(1), false, Known, Known2, + Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + II->getArgOperand(1), false, Known, Known2, + Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } } @@ -1481,7 +1467,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } /// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. +/// them in the Known bit set. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing @@ -1495,11 +1481,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth, const Query &Q) { +void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarType()->isPointerTy()) && @@ -1507,22 +1493,20 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "V, KnownOne and KnownZero should have same BitWidth"); + "V and Known should have same BitWidth"); (void)BitWidth; const APInt *C; if (match(V, m_APInt(C))) { // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C; - KnownZero = ~KnownOne; + Known.One = *C; + Known.Zero = ~Known.One; return; } // Null and aggregate-zero are all-zeros. if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { - KnownOne.clearAllBits(); - KnownZero.setAllBits(); + Known.One.clearAllBits(); + Known.Zero.setAllBits(); return; } // Handle a constant vector by taking the intersection of the known bits of @@ -1530,12 +1514,12 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { // We know that CDS must be a vector of integers. Take the intersection of // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); - KnownZero &= ~Elt; - KnownOne &= Elt; + Known.Zero &= ~Elt; + Known.One &= Elt; } return; } @@ -1543,25 +1527,25 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (const auto *CV = dyn_cast<ConstantVector>(V)) { // We know that CV must be a vector of integers. Take the intersection of // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); if (!ElementCI) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); return; } Elt = ElementCI->getValue(); - KnownZero &= ~Elt; - KnownOne &= Elt; + Known.Zero &= ~Elt; + Known.One &= Elt; } return; } // Start out not knowing anything. - KnownZero.clearAllBits(); KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); // We can't imply anything about undefs. if (isa<UndefValue>(V)) @@ -1580,27 +1564,27 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, // the bits of its aliasee. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (!GA->isInterposable()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); return; } if (const Operator *I = dyn_cast<Operator>(V)) - computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q); + computeKnownBitsFromOperator(I, Known, Depth, Q); - // Aligned pointers have trailing zeros - refine KnownZero set + // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { unsigned Align = V->getPointerAlignment(Q.DL); if (Align) - KnownZero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align)); } - // computeKnownBitsFromAssume strictly refines KnownZero and - // KnownOne. Therefore, we run them after computeKnownBitsFromOperator. + // computeKnownBitsFromAssume strictly refines Known. + // Therefore, we run them after computeKnownBitsFromOperator. // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q); + computeKnownBitsFromAssume(V, Known, Depth, Q); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } /// Determine whether the sign bit is known to be zero or one. @@ -1613,11 +1597,10 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, KnownOne = false; return; } - APInt ZeroBits(BitWidth, 0); - APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, Depth, Q); - KnownOne = OneBits.isSignBitSet(); - KnownZero = ZeroBits.isSignBitSet(); + KnownBits Bits(BitWidth); + computeKnownBits(V, Bits, Depth, Q); + KnownOne = Bits.One.isSignBitSet(); + KnownZero = Bits.Zero.isSignBitSet(); } /// Return true if the given value is known to have exactly one @@ -1689,18 +1672,18 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); - APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, Depth, Q); + KnownBits LHSBits(BitWidth); + computeKnownBits(X, LHSBits, Depth, Q); - APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, Depth, Q); + KnownBits RHSBits(BitWidth); + computeKnownBits(Y, RHSBits, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 - if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2()) + if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) // If OrZero isn't set, we cannot give back a zero result. // Make sure either the LHS or RHS has a bit set. - if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue()) + if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) return true; } } @@ -1871,10 +1854,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, Depth, Q); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - if (KnownOne[0]) + KnownBits Known(BitWidth); + computeKnownBits(X, Known, Depth, Q); + if (Known.One[0]) return true; } // shr X, Y != 0 if X is negative. Note that the value of the shift is not @@ -1894,16 +1876,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // out are known to be zero, and X is known non-zero then at least one // non-zero bit must remain. if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); + KnownBits Known(BitWidth); + computeKnownBits(X, Known, Depth, Q); auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); // Is there a known one in the portion not shifted out? - if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + if (Known.One.countLeadingZeros() < BitWidth - ShiftVal) return true; // Are all the bits to be shifted out known zero? - if (KnownZero.countTrailingOnes() >= ShiftVal) + if (Known.Zero.countTrailingOnes() >= ShiftVal) return isKnownNonZero(X, Depth, Q); } } @@ -1927,18 +1908,17 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // If X and Y are both negative (as signed values) then their sum is not // zero unless both X and Y equal INT_MIN. if (BitWidth && XKnownNegative && YKnownNegative) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); + KnownBits Known(BitWidth); APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - if ((KnownOne & Mask) != 0) + computeKnownBits(X, Known, Depth, Q); + if (Known.One.intersects(Mask)) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, Depth, Q); - if ((KnownOne & Mask) != 0) + computeKnownBits(Y, Known, Depth, Q); + if (Known.One.intersects(Mask)) return true; } @@ -1993,10 +1973,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } if (!BitWidth) return false; - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); - return KnownOne != 0; + KnownBits Known(BitWidth); + computeKnownBits(V, Known, Depth, Q); + return Known.One != 0; } /// Return true if V2 == V1 + X, where X is known non-zero. @@ -2028,14 +2007,13 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. auto BitWidth = Ty->getBitWidth(); - APInt KnownZero1(BitWidth, 0); - APInt KnownOne1(BitWidth, 0); - computeKnownBits(V1, KnownZero1, KnownOne1, 0, Q); - APInt KnownZero2(BitWidth, 0); - APInt KnownOne2(BitWidth, 0); - computeKnownBits(V2, KnownZero2, KnownOne2, 0, Q); - - auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); + KnownBits Known1(BitWidth); + computeKnownBits(V1, Known1, 0, Q); + KnownBits Known2(BitWidth); + computeKnownBits(V2, Known2, 0, Q); + + APInt OppositeBits = (Known1.Zero & Known2.One) | + (Known2.Zero & Known1.One); if (OppositeBits.getBoolValue()) return true; } @@ -2053,9 +2031,9 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { /// for all of the elements in the vector. bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q) { - APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); - return (KnownZero & Mask) == Mask; + KnownBits Known(Mask.getBitWidth()); + computeKnownBits(V, Known, Depth, Q); + return Mask.isSubsetOf(Known.Zero); } /// For vector constants, loop over the elements and find the constant with the @@ -2233,17 +2211,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isSignBitSet()) + if (Known.Zero.isSignBitSet()) return Tmp; } @@ -2258,16 +2236,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Handle NEG. if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) if (CLHS->isNullValue()) { - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isSignBitSet()) + if (Known.Zero.isSignBitSet()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -2319,16 +2297,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits)) return VecSignBits; - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); + KnownBits Known(TyBits); + computeKnownBits(V, Known, Depth, Q); // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. - if (KnownZero.isSignBitSet()) - return std::max(FirstAnswer, KnownZero.countLeadingOnes()); + if (Known.Zero.isSignBitSet()) + return std::max(FirstAnswer, Known.Zero.countLeadingOnes()); - if (KnownOne.isSignBitSet()) - return std::max(FirstAnswer, KnownOne.countLeadingOnes()); + if (Known.One.isSignBitSet()) + return std::max(FirstAnswer, Known.One.countLeadingOnes()); // computeKnownBits gave us no extra information about the top bits. return FirstAnswer; @@ -3534,26 +3512,22 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, // we can guarantee that the result does not overflow. // Ref: "Hacker's Delight" by Henry Warren unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI, - DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI, - DT); + KnownBits LHSKnown(BitWidth); + KnownBits RHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); // Note that underestimating the number of zero bits gives a more // conservative answer. - unsigned ZeroBits = LHSKnownZero.countLeadingOnes() + - RHSKnownZero.countLeadingOnes(); + unsigned ZeroBits = LHSKnown.Zero.countLeadingOnes() + + RHSKnown.Zero.countLeadingOnes(); // First handle the easy case: if we have enough zero bits there's // definitely no overflow. if (ZeroBits >= BitWidth) return OverflowResult::NeverOverflows; // Get the largest possible values for each operand. - APInt LHSMax = ~LHSKnownZero; - APInt RHSMax = ~RHSKnownZero; + APInt LHSMax = ~LHSKnown.Zero; + APInt RHSMax = ~RHSKnown.Zero; // We know the multiply operation doesn't overflow if the maximum values for // each operand will not overflow after we multiply them together. @@ -3565,7 +3539,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, // We know it always overflows if multiplying the smallest possible values for // the operands also results in overflow. bool MinOverflow; - (void)LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow); + (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow); if (MinOverflow) return OverflowResult::AlwaysOverflows; @@ -4284,11 +4258,10 @@ static bool isTruePredicate(CmpInst::Predicate Pred, // If X & C == 0 then (X | C) == X +_{nuw} C if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - unsigned BitWidth = CA->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); + KnownBits Known(CA->getBitWidth()); + computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT); - if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) + if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 1d3cde2f5ddb..e5aba03c8dc1 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -726,54 +726,50 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { } void ModuleBitcodeWriter::writeAttributeGroupTable() { - const std::vector<AttributeList> &AttrGrps = VE.getAttributeGroups(); + const std::vector<ValueEnumerator::IndexAndAttrSet> &AttrGrps = + VE.getAttributeGroups(); if (AttrGrps.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); SmallVector<uint64_t, 64> Record; - for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) { - AttributeList AS = AttrGrps[i]; - for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { - AttributeList A = AS.getSlotAttributes(i); - - Record.push_back(VE.getAttributeGroupID(A)); - Record.push_back(AS.getSlotIndex(i)); - - for (AttributeList::iterator I = AS.begin(0), E = AS.end(0); I != E; - ++I) { - Attribute Attr = *I; - if (Attr.isEnumAttribute()) { - Record.push_back(0); - Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - } else if (Attr.isIntAttribute()) { - Record.push_back(1); - Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - Record.push_back(Attr.getValueAsInt()); - } else { - StringRef Kind = Attr.getKindAsString(); - StringRef Val = Attr.getValueAsString(); - - Record.push_back(Val.empty() ? 3 : 4); - Record.append(Kind.begin(), Kind.end()); + for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) { + unsigned AttrListIndex = Pair.first; + AttributeSet AS = Pair.second; + Record.push_back(VE.getAttributeGroupID(Pair)); + Record.push_back(AttrListIndex); + + for (Attribute Attr : AS) { + if (Attr.isEnumAttribute()) { + Record.push_back(0); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); + } else if (Attr.isIntAttribute()) { + Record.push_back(1); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); + Record.push_back(Attr.getValueAsInt()); + } else { + StringRef Kind = Attr.getKindAsString(); + StringRef Val = Attr.getValueAsString(); + + Record.push_back(Val.empty() ? 3 : 4); + Record.append(Kind.begin(), Kind.end()); + Record.push_back(0); + if (!Val.empty()) { + Record.append(Val.begin(), Val.end()); Record.push_back(0); - if (!Val.empty()) { - Record.append(Val.begin(), Val.end()); - Record.push_back(0); - } } } - - Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); - Record.clear(); } + + Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); + Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeAttributeTable() { - const std::vector<AttributeList> &Attrs = VE.getAttributes(); + const std::vector<AttributeList> &Attrs = VE.getAttributeLists(); if (Attrs.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); @@ -782,7 +778,8 @@ void ModuleBitcodeWriter::writeAttributeTable() { for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { const AttributeList &A = Attrs[i]; for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) - Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i))); + Record.push_back( + VE.getAttributeGroupID({A.getSlotIndex(i), A.getSlotAttributes(i)})); Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); @@ -1270,7 +1267,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); - Vals.push_back(VE.getAttributeID(F.getAttributes())); + Vals.push_back(VE.getAttributeListID(F.getAttributes())); Vals.push_back(Log2_32(F.getAlignment())+1); Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); @@ -2616,7 +2613,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_INVOKE; - Vals.push_back(VE.getAttributeID(II->getAttributes())); + Vals.push_back(VE.getAttributeListID(II->getAttributes())); Vals.push_back(II->getCallingConv() | 1 << 13); Vals.push_back(VE.getValueID(II->getNormalDest())); Vals.push_back(VE.getValueID(II->getUnwindDest())); @@ -2808,7 +2805,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_CALL; - Vals.push_back(VE.getAttributeID(CI.getAttributes())); + Vals.push_back(VE.getAttributeListID(CI.getAttributes())); unsigned Flags = getOptimizationFlags(&I); Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 3800d9abd429..861150766986 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -891,19 +891,19 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { if (PAL.isEmpty()) return; // null is always 0. // Do a lookup. - unsigned &Entry = AttributeMap[PAL]; + unsigned &Entry = AttributeListMap[PAL]; if (Entry == 0) { // Never saw this before, add it. - Attribute.push_back(PAL); - Entry = Attribute.size(); + AttributeLists.push_back(PAL); + Entry = AttributeLists.size(); } // Do lookups for all attribute groups. for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { - AttributeList AS = PAL.getSlotAttributes(i); - unsigned &Entry = AttributeGroupMap[AS]; + IndexAndAttrSet Pair = {PAL.getSlotIndex(i), PAL.getSlotAttributes(i)}; + unsigned &Entry = AttributeGroupMap[Pair]; if (Entry == 0) { - AttributeGroups.push_back(AS); + AttributeGroups.push_back(Pair); Entry = AttributeGroups.size(); } } diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 8a82aab29836..e7ccc8df1e5f 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -48,6 +48,10 @@ public: // For each value, we remember its Value* and occurrence frequency. typedef std::vector<std::pair<const Value*, unsigned> > ValueList; + /// Attribute groups as encoded in bitcode are almost AttributeSets, but they + /// include the AttributeList index, so we have to track that in our map. + typedef std::pair<unsigned, AttributeSet> IndexAndAttrSet; + UseListOrderStack UseListOrders; private: @@ -102,13 +106,13 @@ private: bool ShouldPreserveUseListOrder; - typedef DenseMap<AttributeList, unsigned> AttributeGroupMapType; + typedef DenseMap<IndexAndAttrSet, unsigned> AttributeGroupMapType; AttributeGroupMapType AttributeGroupMap; - std::vector<AttributeList> AttributeGroups; + std::vector<IndexAndAttrSet> AttributeGroups; - typedef DenseMap<AttributeList, unsigned> AttributeMapType; - AttributeMapType AttributeMap; - std::vector<AttributeList> Attribute; + typedef DenseMap<AttributeList, unsigned> AttributeListMapType; + AttributeListMapType AttributeListMap; + std::vector<AttributeList> AttributeLists; /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by /// the "getGlobalBasicBlockID" method. @@ -166,16 +170,17 @@ public: unsigned getInstructionID(const Instruction *I) const; void setInstructionID(const Instruction *I); - unsigned getAttributeID(AttributeList PAL) const { + unsigned getAttributeListID(AttributeList PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeMapType::const_iterator I = AttributeMap.find(PAL); - assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); + AttributeListMapType::const_iterator I = AttributeListMap.find(PAL); + assert(I != AttributeListMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } - unsigned getAttributeGroupID(AttributeList PAL) const { - if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL); + unsigned getAttributeGroupID(IndexAndAttrSet Group) const { + if (!Group.second.hasAttributes()) + return 0; // Null maps to zero. + AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(Group); assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } @@ -206,8 +211,8 @@ public: const std::vector<const BasicBlock*> &getBasicBlocks() const { return BasicBlocks; } - const std::vector<AttributeList> &getAttributes() const { return Attribute; } - const std::vector<AttributeList> &getAttributeGroups() const { + const std::vector<AttributeList> &getAttributeLists() const { return AttributeLists; } + const std::vector<IndexAndAttrSet> &getAttributeGroups() const { return AttributeGroups; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 955524c2a676..3a57772cc7f5 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -964,10 +964,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // sure to update that as well. const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; if (!SU) continue; - for (DbgValueVector::iterator DVI = DbgValues.begin(), - DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q.second.Operand->getParent()) - UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); + UpdateDbgValues(DbgValues, Q.second.Operand->getParent(), + AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 04f7f419f5ea..d14d93100adb 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -60,6 +60,25 @@ public: if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg) MI.getOperand(0).setReg(NewReg); } + + /// Update all DBG_VALUE instructions that may be affected by the dependency + /// breaker's update of ParentMI to use NewReg. + void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI, + unsigned OldReg, unsigned NewReg) { + // The following code is dependent on the order in which the DbgValues are + // constructed in ScheduleDAGInstrs::buildSchedGraph. + MachineInstr *PrevDbgMI = nullptr; + for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) { + MachineInstr *PrevMI = DV.second; + if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) { + MachineInstr *DbgMI = DV.first; + UpdateDbgValue(*DbgMI, OldReg, NewReg); + PrevDbgMI = DbgMI; + } else if (PrevDbgMI) { + break; // If no match and already found a DBG_VALUE, we're done. + } + } + } }; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 028c79f3ab6d..d99065b1b67a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -825,41 +825,25 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << Name << ":"; } OS << V->getName(); - - const DIExpression *Expr = MI->getDebugExpression(); - auto Fragment = Expr->getFragmentInfo(); - if (Fragment) - OS << " [fragment offset=" << Fragment->OffsetInBits - << " size=" << Fragment->SizeInBits << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. - bool Deref = false; bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; - for (unsigned i = 0; i < Expr->getNumElements(); ++i) { - uint64_t Op = Expr->getElement(i); - if (Op == dwarf::DW_OP_LLVM_fragment) { - // There can't be any operands after this in a valid expression - break; - } else if (Deref) { - // We currently don't support extra Offsets or derefs after the first - // one. Bail out early instead of emitting an incorrect comment. - OS << " [complex expression]"; - AP.OutStreamer->emitRawComment(OS.str()); - return true; - } else if (Op == dwarf::DW_OP_deref) { - Deref = true; - continue; - } - - uint64_t ExtraOffset = Expr->getElement(i++); - if (Op == dwarf::DW_OP_plus) - Offset += ExtraOffset; - else { - assert(Op == dwarf::DW_OP_minus); - Offset -= ExtraOffset; + const DIExpression *Expr = MI->getDebugExpression(); + if (Expr->getNumElements()) { + OS << '['; + bool NeedSep = false; + for (auto Op : Expr->expr_ops()) { + if (NeedSep) + OS << ", "; + else + NeedSep = true; + OS << dwarf::OperationEncodingString(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + OS << ' ' << Op.getArg(I); } + OS << "] "; } // Register or immediate value. Register 0 means undef. @@ -890,7 +874,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); - Deref = true; + MemLoc = true; } if (Reg == 0) { // Suppress offset, it is not meaningful here. @@ -899,12 +883,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer->emitRawComment(OS.str()); return true; } - if (MemLoc || Deref) + if (MemLoc) OS << '['; OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } - if (MemLoc || Deref) + if (MemLoc) OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. @@ -936,6 +920,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; + // If there is no "real" instruction following this CFI instruction, skip + // emitting it; it would be beyond the end of the function's FDE range. + auto *MBB = MI.getParent(); + auto I = std::next(MI.getIterator()); + while (I != MBB->end() && I->isTransient()) + ++I; + if (I == MBB->instr_end() && + MBB->getReverseIterator() == MBB->getParent()->rbegin()) + return; + const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions(); unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; @@ -1046,15 +1040,23 @@ void AsmPrinter::EmitFunctionBody() { // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { + // Similarly, don't emit empty functions on Windows either. It can lead to + // duplicate entries (two functions with the same RVA) in the Guard CF Table + // after linking, causing the kernel not to load the binary: + // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html + // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer. + const Triple &TT = TM.getTargetTriple(); + if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() || + (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) { MCInst Noop; - MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop); - OutStreamer->AddComment("avoids zero-length function"); + MF->getSubtarget().getInstrInfo()->getNoop(Noop); // Targets can opt-out of emitting the noop here by leaving the opcode // unspecified. - if (Noop.getOpcode()) + if (Noop.getOpcode()) { + OutStreamer->AddComment("avoids zero-length function"); OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } } const Function *F = MF->getFunction(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 683e622e3d53..a0bf1632dff3 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -144,6 +144,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); + if (Dialect == InlineAsm::AD_Intel) + // We need this flag to be able to parse numbers like "0bH" + Parser->setParsingInlineAsm(true); if (MF) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 31c2b3b5e752..30bfd7c94e68 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -655,20 +655,12 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref_addr: { // Get the absolute offset for this DIE within the debug info/types section. unsigned Addr = Entry->getDebugSectionOffset(); - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { - const DwarfDebug *DD = AP->getDwarfDebug(); - if (DD) - assert(!DD->useSplitDwarf() && - "TODO: dwo files can't have relocations."); - const DIEUnit *Unit = Entry->getUnit(); - assert(Unit && "CUDie should belong to a CU."); - MCSection *Section = Unit->getSection(); - if (Section) { - const MCSymbol *SectionSym = Section->getBeginSymbol(); - AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); - return; - } + if (const MCSymbol *SectionSym = + Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { + AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + return; } + AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form)); return; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 9a64b4b76b06..20a415150b4d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -28,7 +28,7 @@ class DwarfFile; class MCSymbol; class LexicalScope; -class DwarfCompileUnit : public DwarfUnit { +class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d72656bcc58d..6f442f5c3172 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -91,14 +91,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::init(Default)); static cl::opt<DefaultOnOff> -SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output DWARF5 split debug info."), - cl::values(clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled")), - cl::init(Default)); - -static cl::opt<DefaultOnOff> DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, cl::desc("Generate DWARF pubnames and pubtypes sections"), cl::values(clEnumVal(Default, "Default for platform"), @@ -253,11 +245,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) HasAppleExtensionAttributes = tuneForLLDB(); - // Handle split DWARF. Off by default for now. - if (SplitDwarf == Default) - HasSplitDwarf = false; - else - HasSplitDwarf = SplitDwarf == Enable; + // Handle split DWARF. + HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); // Pubnames/pubtypes on by default for GDB. if (DwarfPubSections == Default) @@ -412,7 +401,7 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { if (useSplitDwarf()) { NewCU.setSkeleton(constructSkeletonCU(NewCU)); NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit->getSplitDebugFilename()); + Asm->TM.Options.MCOptions.SplitDwarfFile); } // LTO with assembly output shares a single line table amongst multiple CUs. @@ -1885,7 +1874,7 @@ void DwarfDebug::emitDebugMacinfo() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfCompileUnit> NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode()->getSplitDebugFilename()); + Asm->TM.Options.MCOptions.SplitDwarfFile); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index f65dc151f301..ccd326917bfd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -117,8 +117,9 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, // Otherwise, attempt to find a covering set of sub-register numbers. // For example, Q0 on ARM is a composition of D0+D1. unsigned CurPos = 0; - // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; + // The size of the register in bits. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg); + unsigned RegSize = TRI.getRegSizeInBits(*RC); // Keep track of the bits in the register we already emitted, so we // can avoid emitting redundant aliasing subregs. SmallBitVector Coverage(RegSize, false); @@ -198,8 +199,10 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned FragmentOffsetInBits) { auto Fragment = ExprCursor.getFragmentInfo(); - if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) + if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) { + LocationKind = Unknown; return false; + } bool HasComplexExpression = false; auto Op = ExprCursor.peek(); @@ -212,6 +215,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // operation to multiple DW_OP_pieces. if (HasComplexExpression && DwarfRegs.size() > 1) { DwarfRegs.clear(); + LocationKind = Unknown; return false; } @@ -233,6 +237,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return Op.getOp() == dwarf::DW_OP_stack_value; })) { DwarfRegs.clear(); + LocationKind = Unknown; return false; } @@ -343,7 +348,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, emitUnsigned(Op->getArg(0)); break; case dwarf::DW_OP_stack_value: - assert(LocationKind == Unknown || LocationKind == Implicit); LocationKind = Implicit; break; case dwarf::DW_OP_swap: diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index bac0c204d04f..16fb20dd7e20 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1595,3 +1595,11 @@ void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { getCU().addGlobalTypeUnitType(Ty, Context); } + +const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const { + if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return nullptr; + if (isDwoUnit()) + return nullptr; + return getSection()->getBeginSymbol(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index d626ef920f95..e84df4650882 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -104,8 +104,6 @@ protected: bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); public: - virtual ~DwarfUnit(); - // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } @@ -289,6 +287,8 @@ public: void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); protected: + ~DwarfUnit(); + /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); @@ -335,9 +335,10 @@ private: void setIndexTyDie(DIE *D) { IndexTyDie = D; } virtual bool isDwoUnit() const = 0; + const MCSymbol *getCrossSectionRelativeBaseAddress() const override; }; -class DwarfTypeUnit : public DwarfUnit { +class DwarfTypeUnit final : public DwarfUnit { uint64_t TypeSignature; const DIE *Ty; DwarfCompileUnit &CU; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index e1eeddf0816c..b2d6652b075e 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -648,10 +648,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // as well. const SUnit *SU = MISUnitMap[Q->second->getParent()]; if (!SU) continue; - for (DbgValueVector::iterator DVI = DbgValues.begin(), - DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q->second->getParent()) - UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); + UpdateDbgValues(DbgValues, Q->second->getParent(), + AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 26454c1ef00f..cf97c635e79a 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -145,6 +145,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } } + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + // Now that selection is complete, there are no more generic vregs. Verify // that the size of the now-constrained vreg is unchanged and that it has a // register class. @@ -165,7 +167,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; if (VRegToType.second.isValid() && - VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) { + VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { reportGISelFailure(MF, TPC, MORE, "gisel-select", "VReg has explicit size different from class size", *MI); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 58778077bc0e..ef5818dabe23 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -76,6 +76,12 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { + case TargetOpcode::G_SDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SDIV_I32; + case TargetOpcode::G_UDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UDIV_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -87,31 +93,43 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); + CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), + {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); + MI.eraseFromParent(); + return LegalizerHelper::Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = Ty.getSizeInBits(); + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: { + Type *Ty = Type::getInt32Ty(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, Ty); + } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); - auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Libcall = getRTLibDesc(MI.getOpcode(), Size); - const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - CLI.lowerCall( - MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), {MI.getOperand(0).getReg(), Ty}, - {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}}); - MI.eraseFromParent(); - return Legalized; + return simpleLibcall(MI, MIRBuilder, Size, Ty); } } } diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 940957d02152..83b21e637097 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -48,7 +48,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { // Verify that the Size of the register bank is big enough to cover // all the register classes it covers. - assert((getSize() >= SubRC.getSize() * 8) && + assert(getSize() >= TRI.getRegSizeInBits(SubRC) && "Size is not big enough for all the subclasses!"); assert(covers(SubRC) && "Not all subclasses are covered"); } diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index b2df2f159676..d5ae9a6776a4 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -421,7 +421,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, RC = MRI.getRegClass(Reg); } assert(RC && "Unable to deduce the register class"); - return RC->getSize() * 8; + return TRI.getRegSizeInBits(*RC); } //------------------------------------------------------------------------------ diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 6da174a53666..b6624b88fe23 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -925,9 +925,6 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, << CmpInst::getPredicateName(Pred) << ')'; break; } - case MachineOperand::MO_Placeholder: - OS << "<placeholder>"; - break; } } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 4bd5fbfe38e6..1faf6292a9c1 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -287,8 +287,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getIntrinsicID() == Other.getIntrinsicID(); case MachineOperand::MO_Predicate: return getPredicate() == Other.getPredicate(); - case MachineOperand::MO_Placeholder: - return true; } llvm_unreachable("Invalid machine operand type"); } @@ -337,8 +335,6 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); - case MachineOperand::MO_Placeholder: - return hash_combine(); } llvm_unreachable("Invalid machine operand type"); } @@ -515,9 +511,6 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << CmpInst::getPredicateName(Pred) << '>'; break; } - case MachineOperand::MO_Placeholder: - OS << "<placeholder>"; - break; } if (unsigned TF = getTargetFlags()) OS << "[TF=" << TF << ']'; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b3d18435985e..7eb991744f01 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -330,7 +330,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { /// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { // If we lost memory operands, conservatively assume that the instruction - // writes to all slots. + // writes to all slots. if (MI->memoperands_empty()) return true; for (const MachineMemOperand *MemOp : MI->memoperands()) { @@ -708,7 +708,7 @@ void MachineLICM::SinkIntoLoop() { for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. - // As such, it must not have side-effects, e.g. such as a call has. + // As such, it must not have side-effects, e.g. such as a call has. if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) Candidates.push_back(&*I); } @@ -837,9 +837,9 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, /// constant pool. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); - + // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. + // reads from everything.. if (MI.memoperands_empty()) return true; @@ -1337,7 +1337,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); // Since we are moving the instruction out of its basic block, we do not - // retain its debug location. Doing so would degrade the debugging + // retain its debug location. Doing so would degrade the debugging // experience and adversely affect the accuracy of profiling information. MI->setDebugLoc(DebugLoc()); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 1354009794cb..570a0cd0ba90 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -373,22 +373,22 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, FixedSlot->Reg != Reg) ++FixedSlot; + unsigned Size = RegInfo->getSpillSize(*RC); if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); + unsigned Align = RegInfo->getSpillAlignment(*RC); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI.CreateStackObject(Size, Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = - MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset); } CS.setFrameIdx(FrameIdx); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 283d84629f8e..c606b7b83310 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -212,8 +212,9 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return SS; // Already has space allocated? // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); + int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 6392136fa290..35db30f89976 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -395,8 +395,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. const MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned NeedSize = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + unsigned NeedSize = TRI->getSpillSize(*RC); + unsigned NeedAlign = TRI->getSpillAlignment(*RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4702d63cb617..1251ae6262b8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3878,27 +3878,29 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); if (!N1C) return false; - unsigned Num; + unsigned MaskByteOffset; switch (N1C->getZExtValue()) { default: return false; - case 0xFF: Num = 0; break; - case 0xFF00: Num = 1; break; - case 0xFF0000: Num = 2; break; - case 0xFF000000: Num = 3; break; + case 0xFF: MaskByteOffset = 0; break; + case 0xFF00: MaskByteOffset = 1; break; + case 0xFF0000: MaskByteOffset = 2; break; + case 0xFF000000: MaskByteOffset = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). - SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { - if (Num == 0 || Num == 2) { + if (MaskByteOffset == 0 || MaskByteOffset == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 - if (N0.getOpcode() != ISD::SRL) + if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3906,7 +3908,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 - if (N0.getOpcode() != ISD::SHL) + if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3915,7 +3917,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 - if (Num != 0 && Num != 2) + if (MaskByteOffset != 0 && MaskByteOffset != 2) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3923,17 +3925,17 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 - if (Num != 1 && Num != 3) + if (MaskByteOffset != 1 && MaskByteOffset != 3) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } - if (Parts[Num]) + if (Parts[MaskByteOffset]) return false; - Parts[Num] = N0.getOperand(0).getNode(); + Parts[MaskByteOffset] = N0.getOperand(0).getNode(); return true; } @@ -4198,20 +4200,22 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // reassociate or if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) != 0. - if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - isa<ConstantSDNode>(N0.getOperand(1))) { - ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, - N1C, C1)) - return DAG.getNode( - ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); - return SDValue(); + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) { + if (SDValue COR = + DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); + } } } + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) @@ -5611,24 +5615,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) if (N1C && N0.getOpcode() == ISD::TRUNCATE && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); - // This is only valid if the OpSizeInBits + c1 = size of inner shift. - if (c1 + OpSizeInBits == InnerShiftSize) { - SDLoc DL(N0); - if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::TRUNCATE, DL, VT, - DAG.getNode(ISD::SRL, DL, InnerShiftVT, - N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, DL, - ShiftCountVT))); + N0.getOperand(0).getOpcode() == ISD::SRL) { + if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { + uint64_t c1 = N001C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, + N0.getOperand(0).getOperand(0), + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); + } } } @@ -11641,7 +11645,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // Check if this is a trunc(lshr). if (User->getOpcode() == ISD::SRL && User->hasOneUse() && isa<ConstantSDNode>(User->getOperand(1))) { - Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + Shift = User->getConstantOperandVal(1); User = *User->use_begin(); } @@ -13120,8 +13124,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // do this only if indices are both constants and Idx1 < Idx0. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() && isa<ConstantSDNode>(InVec.getOperand(2))) { - unsigned OtherElt = - cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); + unsigned OtherElt = InVec.getConstantOperandVal(2); if (Elt < OtherElt) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, @@ -14065,7 +14068,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { if (!isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); - int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + int ExtIdx = Op.getConstantOperandVal(1); // Ensure that we are extracting a subvector from a vector the same // size as the result. @@ -15049,7 +15052,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && N1.getValueType() == N0.getOperand(1).getValueType() && isa<ConstantSDNode>(N0.getOperand(2))) { - unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue(); + unsigned OtherIdx = N0.getConstantOperandVal(2); if (InsIdx < OtherIdx) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, @@ -16088,6 +16091,19 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { if (Op1->isInvariant() && Op0->writeMem()) return false; + unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + + // Check for BaseIndexOffset matching. + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); + if (BasePtr0.equalBaseIndex(BasePtr1)) + return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) || + (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset)); + + // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis + // modified to use BaseIndexOffset. + // Gather base node and offset information. SDValue Base0, Base1; int64_t Offset0, Offset1; @@ -16099,8 +16115,6 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { Base1, Offset1, GV1, CV1); // If they have the same base address, then check to see if they overlap. - unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; - unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1))) return !((Offset0 + NumBytes0) <= Offset1 || (Offset1 + NumBytes1) <= Offset0); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e85d1951e3ae..b235e19aaab2 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -161,7 +161,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (VRBase) { DstRC = MRI->getRegClass(VRBase); } else if (UseRC) { - assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + assert(TRI->isTypeLegalForClass(*UseRC, VT) && + "Incompatible phys register def and uses!"); DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3bae3bf9ab7c..fdebb8bd00db 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3497,11 +3497,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // part. unsigned LoSize = VT.getSizeInBits(); SDValue HiLHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue HiRHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getNode(ISD::SRA, dl, VT, RHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 85068e890756..9ed70c9b4db9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3251,7 +3251,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } @@ -3294,7 +3294,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { @@ -3342,7 +3342,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { @@ -3445,5 +3445,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); + return DAG.getBuildVector(N->getValueType(0), dl, NewOps); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c02b8960b36c..aa69e0e2adfc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -362,8 +362,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector<SDValue, 8> Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, - makeArrayRef(Ops.data(), NumElts)); + SDValue Vec = + DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -396,10 +396,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { NewElts.push_back(Hi); } - SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*DAG.getContext(), - NewVT, NewElts.size()), - NewElts); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()); + SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); @@ -458,7 +456,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1a7d7b7af5fa..4a3160297d64 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -512,7 +512,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); + return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -523,16 +523,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } -/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, -/// so just return the element, ignoring the index. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - SDValue Res = GetScalarizedVector(N->getOperand(0)); - if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), - Res); - return Res; -} - +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
+ return Res;
+}
+
/// If the input condition is a vector that needs to be scalarized, it must be /// <1 x i1>, so just convert to a normal ISD::SELECT @@ -2631,7 +2631,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { if (InVT.isVector()) NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + NewVec = DAG.getBuildVector(NewInVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e923e30e5037..69b76fbe57d2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1320,6 +1320,18 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (MCID.hasOptionalDef()) { + // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. + // This operand can be either a def of CPSR, if the S bit is set; or a use + // of %noreg. When the OptionalDef is set to a valid register, we need to + // handle it in the same way as an ImplicitDef. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) + if (MCID.OpInfo[i].isOptionalDef()) { + const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); + unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + } if (!MCID.ImplicitDefs) continue; for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 523f409e6b2c..439f67f1e155 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" @@ -2868,7 +2869,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // A left-shift of a constant one will have exactly one bit set because // shifting the bit off the end is undefined. if (Val.getOpcode() == ISD::SHL) { - auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); + auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue() == 1) return true; } @@ -2876,7 +2877,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // Similarly, a logical right-shift of a constant sign-bit will have exactly // one bit set. if (Val.getOpcode() == ISD::SRL) { - auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); + auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue().isSignMask()) return true; } @@ -7539,10 +7540,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, + KnownBits Known(PtrWidth); + llvm::computeKnownBits(const_cast<GlobalValue *>(GV), Known, getDataLayout()); - unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned AlignBits = Known.Zero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) return MinAlign(Align, GVOffset); @@ -7629,52 +7630,52 @@ Type *ConstantPoolSDNode::getType() const { return Val.ConstVal->getType(); } -bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, - APInt &SplatUndef, +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) const { + bool IsBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); - unsigned sz = VT.getSizeInBits(); - if (MinSplatBits > sz) + unsigned VecWidth = VT.getSizeInBits(); + if (MinSplatBits > VecWidth) return false; - SplatValue = APInt(sz, 0); - SplatUndef = APInt(sz, 0); + // FIXME: The widths are based on this node's type, but build vectors can + // truncate their operands. + SplatValue = APInt(VecWidth, 0); + SplatUndef = APInt(VecWidth, 0); - // Get the bits. Bits with undefined values (when the corresponding element + // Get the bits. Bits with undefined values (when the corresponding element // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared - // in SplatValue. If any of the values are not constant, give up and return + // in SplatValue. If any of the values are not constant, give up and return // false. - unsigned int nOps = getNumOperands(); - assert(nOps > 0 && "isConstantSplat has 0-size build vector"); - unsigned EltBitSize = VT.getScalarSizeInBits(); + unsigned int NumOps = getNumOperands(); + assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltWidth = VT.getScalarSizeInBits(); - for (unsigned j = 0; j < nOps; ++j) { - unsigned i = isBigEndian ? nOps-1-j : j; + for (unsigned j = 0; j < NumOps; ++j) { + unsigned i = IsBigEndian ? NumOps - 1 - j : j; SDValue OpVal = getOperand(i); - unsigned BitPos = j * EltBitSize; + unsigned BitPos = j * EltWidth; if (OpVal.isUndef()) - SplatUndef.setBits(BitPos, BitPos + EltBitSize); - else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) - SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize), - BitPos); - else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatUndef.setBits(BitPos, BitPos + EltWidth); + else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal)) SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); else return false; } - // The build_vector is all constants or undefs. Find the smallest element + // The build_vector is all constants or undefs. Find the smallest element // size that splats the vector. - HasAnyUndefs = (SplatUndef != 0); - while (sz > 8) { - unsigned HalfSize = sz / 2; + // FIXME: This does not work for vectors with elements less than 8 bits. + while (VecWidth > 8) { + unsigned HalfSize = VecWidth / 2; APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); APInt LowValue = SplatValue.trunc(HalfSize); APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); @@ -7688,10 +7689,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SplatValue = HighValue | LowValue; SplatUndef = HighUndef & LowUndef; - sz = HalfSize; + VecWidth = HalfSize; } - SplatBitSize = sz; + SplatBitSize = VecWidth; return true; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2c58953ee908..6a737ed84ea4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -362,11 +362,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, return DAG.getUNDEF(ValueVT); } - if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); + EVT ValueSVT = ValueVT.getVectorElementType(); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); + return DAG.getBuildVector(ValueVT, DL, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, @@ -537,7 +537,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); + Val = DAG.getBuildVector(PartVT, DL, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -1088,8 +1088,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ArrayType>(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1141,7 +1140,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -3147,7 +3146,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops)); + setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3969,9 +3968,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -4896,11 +4895,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - + Entry.Ty = I.getArgOperand(2)->getType(); Entry.Node = NumElements; Args.push_back(Entry); - + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); Entry.Node = ElementSize; Args.push_back(Entry); @@ -5183,7 +5182,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); + ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, @@ -5743,7 +5742,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fadd: Opcode = ISD::STRICT_FADD; break; case Intrinsic::experimental_constrained_fsub: @@ -6653,12 +6652,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), - OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; @@ -6666,12 +6665,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && - PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -6699,12 +6698,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *RC->vt_begin(); + ValueVT = *TRI.legalclasstypes_begin(*RC); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); @@ -6730,7 +6729,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 93c6738f650d..136dec873cb8 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -342,11 +342,16 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// If the specified instruction has a constant integer operand and there are /// bits set in that constant that are not demanded, then clear those bits and /// return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( - SDValue Op, const APInt &Demanded) { +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + SelectionDAG &DAG = TLO.DAG; SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); + // Do target-specific constant optimization. + if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + return TLO.New.getNode(); + // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Opcode) { default: @@ -367,7 +372,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); - return CombineTo(Op, NewOp); + return TLO.CombineTo(Op, NewOp); } break; @@ -380,15 +385,17 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, - unsigned BitWidth, - const APInt &Demanded, - const SDLoc &dl) { +bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + SelectionDAG &DAG = TLO.DAG; + SDLoc dl(Op); + // Early return, as this function cannot handle vector types. if (Op.getValueType().isVector()) return false; @@ -418,23 +425,22 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, bool NeedZext = DemandedSize > SmallVTBits; SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, dl, Op.getValueType(), X); - return CombineTo(Op, Z); + return TLO.CombineTo(Op, Z); } } return false; } bool -TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, - unsigned OpIdx, - const APInt &Demanded, - DAGCombinerInfo &DCI) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, + const APInt &Demanded, + DAGCombinerInfo &DCI, + TargetLoweringOpt &TLO) const { SDValue Op = User->getOperand(OpIdx); APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, - *this, 0, true)) + if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, + TLO, 0, true)) return false; @@ -446,9 +452,9 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, // with the value 'x', which will give us: // Old = i32 and x, 0xffffff // New = x - if (Old.hasOneUse()) { + if (TLO.Old.hasOneUse()) { // For the one use case, we just commit the change. - DCI.CommitTargetLoweringOpt(*this); + DCI.CommitTargetLoweringOpt(TLO); return true; } @@ -456,17 +462,17 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that // it will attempt to combine will be opt. - assert(Old == Op); + assert(TLO.Old == Op); SmallVector <SDValue, 4> NewOps; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (i == OpIdx) { - NewOps.push_back(New); + NewOps.push_back(TLO.New); continue; } NewOps.push_back(User->getOperand(i)); } - DAG.UpdateNodeOperands(User, NewOps); + TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -585,7 +591,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -620,10 +626,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((NewMask & (KnownZero|KnownZero2)) == NewMask) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -654,10 +660,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -682,7 +688,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -727,7 +733,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // If it already has all the bits set, nothing to change // but don't shrink either! - } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + } else if (ShrinkDemandedConstant(Op, NewMask, TLO)) { return true; } } @@ -746,7 +752,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -764,7 +770,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -1284,7 +1290,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, KnownOne2, TLO, Depth+1) || // See if the operation should be performed at a smaller bit width. - TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) { + ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { const SDNodeFlags *Flags = Op.getNode()->getFlags(); if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we @@ -1358,31 +1364,38 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must +// work with truncating build vectors and vectors with elements of less than +// 8 bits. bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - if (!CN) { - const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); - if (!BV) - return false; - - // Only interested in constant splats, we don't care about undef - // elements in identifying boolean constants and getConstantSplatNode - // returns NULL if all ops are undef; - CN = BV->getConstantSplatNode(); + APInt CVal; + if (auto *CN = dyn_cast<ConstantSDNode>(N)) { + CVal = CN->getAPIntValue(); + } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) { + auto *CN = BV->getConstantSplatNode(); if (!CN) return false; + + // If this is a truncating build vector, truncate the splat value. + // Otherwise, we may fail to match the expected values below. + unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits(); + CVal = CN->getAPIntValue(); + if (BVEltWidth < CVal.getBitWidth()) + CVal = CVal.trunc(BVEltWidth); + } else { + return false; } switch (getBooleanContents(N->getValueType(0))) { case UndefinedBooleanContent: - return CN->getAPIntValue()[0]; + return CVal[0]; case ZeroOrOneBooleanContent: - return CN->isOne(); + return CVal == 1; case ZeroOrNegativeOneBooleanContent: - return CN->isAllOnesValue(); + return CVal.isAllOnesValue(); } llvm_unreachable("Invalid boolean contents"); @@ -2535,7 +2548,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, for (const TargetRegisterClass *RC : RI->regclasses()) { // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. - if (!isLegalRC(RC)) + if (!isLegalRC(*RI, *RC)) continue; for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); @@ -2547,9 +2560,9 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, // If this register class has the requested value type, return it, // otherwise keep searching and return the first class found // if no other is found which explicitly has the requested type. - if (RC->hasType(VT)) + if (RI->isTypeLegalForClass(*RC, VT)) return S; - else if (!R.second) + if (!R.second) R = S; } } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 1a8ec5bff322..315b059c5ac9 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -161,7 +161,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); - Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset); + Locs.emplace_back(Location::Register, TRI->getSpillSize(*RC), + DwarfRegNum, Offset); return ++MOI; } @@ -245,7 +246,7 @@ void StackMaps::print(raw_ostream &OS) { StackMaps::LiveOutReg StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + unsigned Size = TRI->getSpillSize(*TRI->getMinimalPhysRegClass(Reg)); return LiveOutReg(Reg, DwarfRegNum, Size); } diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 711144a34743..14c5adc0d898 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -345,12 +345,12 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!SubIdx) { - Size = RC->getSize(); + Size = TRI->getSpillSize(*RC); Offset = 0; return true; } - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). @@ -364,10 +364,10 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, Size = BitSize /= 8; Offset = (unsigned)BitOffset / 8; - assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); if (!MF.getDataLayout().isLittleEndian()) { - Offset = RC->getSize() - (Offset + Size); + Offset = TRI->getSpillSize(*RC) - (Offset + Size); } return true; } @@ -428,8 +428,8 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, return nullptr; } -void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - llvm_unreachable("Not a MachO target"); +void TargetInstrInfo::getNoop(MCInst &NopInst) const { + llvm_unreachable("Not implemented"); } static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 27630a3055cb..e579922bb69e 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1184,12 +1184,11 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, /// isLegalRC - Return true if the value types that can be represented by the /// specified register class are all legal. -bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { +bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) const { + for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) if (isTypeLegal(*I)) return true; - } return false; } @@ -1299,9 +1298,9 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { const TargetRegisterClass *SuperRC = TRI->getRegClass(i); // We want the largest possible spill size. - if (SuperRC->getSize() <= BestRC->getSize()) + if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC)) continue; - if (!isLegalRC(SuperRC)) + if (!isLegalRC(*TRI, *SuperRC)) continue; BestRC = SuperRC; } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 66cdad278e8d..f6e4c17d514c 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -156,8 +156,8 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { // this physreg. const TargetRegisterClass* BestRC = nullptr; for (const TargetRegisterClass* RC : regclasses()) { - if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || BestRC->hasSubClass(RC))) + if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) && + RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } @@ -207,7 +207,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, if (unsigned Common = *A++ & *B++) { const TargetRegisterClass *RC = TRI->getRegClass(I + countTrailingZeros(Common)); - if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) return RC; } return nullptr; @@ -265,7 +265,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, const TargetRegisterClass *BestRC = nullptr; unsigned *BestPreA = &PreA; unsigned *BestPreB = &PreB; - if (RCA->getSize() < RCB->getSize()) { + if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) { std::swap(RCA, RCB); std::swap(SubA, SubB); std::swap(BestPreA, BestPreB); @@ -273,7 +273,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Also terminate the search one we have found a register class as small as // RCA. - unsigned MinSize = RCA->getSize(); + unsigned MinSize = getRegSizeInBits(*RCA); for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); @@ -281,7 +281,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Check if a common super-register class exists for this index pair. const TargetRegisterClass *RC = firstCommonClass(IA.getMask(), IB.getMask(), this); - if (!RC || RC->getSize() < MinSize) + if (!RC || getRegSizeInBits(*RC) < MinSize) continue; // The indexes must compose identically: PreA+SubA == PreB+SubB. @@ -290,7 +290,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, continue; // Is RC a better candidate than BestRC? - if (BestRC && RC->getSize() >= BestRC->getSize()) + if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC)) continue; // Yes, RC is the smallest super-register seen so far. @@ -299,7 +299,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, *BestPreB = IB.getSubReg(); // Bail early if we reached MinSize. We won't find a better candidate. - if (BestRC->getSize() == MinSize) + if (getRegSizeInBits(*BestRC) == MinSize) return BestRC; } } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index c8946010e9d1..d10ca1a7ff91 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -73,8 +73,9 @@ void VirtRegMap::grow() { } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { - int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); + int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align); ++NumSpillSlots; return SS; } diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 85e1eaedfc61..a12f8adfafe5 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -112,10 +113,8 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { continue; } while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { - unsigned StringOffset = AccelSection.getU32(&DataOffset); - RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4); - if (Reloc != Relocs.end()) - StringOffset += Reloc->second.second; + unsigned StringOffset = + getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs); if (!StringOffset) break; OS << format(" Name: %08x \"%s\"\n", StringOffset, diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index bbb19b5e998d..7e8d04672c03 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -56,6 +56,16 @@ typedef DWARFDebugLine::LineTable DWARFLineTable; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; +uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs) { + if (!Relocs) + return Data.getUnsigned(Off, Size); + RelocAddrMap::const_iterator AI = Relocs->find(*Off); + if (AI == Relocs->end()) + return Data.getUnsigned(Off, Size); + return Data.getUnsigned(Off, Size) + AI->second.second; +} + static void dumpAccelSection(raw_ostream &OS, StringRef Name, const DWARFSection& Section, StringRef StringSection, bool LittleEndian) { @@ -212,11 +222,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, // sizes, but for simplicity we just use the address byte size of the last // compile unit (there is no easy and fast way to associate address range // list and the compile unit it describes). - DataExtractor rangesData(getRangeSection(), isLittleEndian(), + DataExtractor rangesData(getRangeSection().Data, isLittleEndian(), savedAddressByteSize); offset = 0; DWARFDebugRangeList rangeList; - while (rangeList.extract(rangesData, &offset)) + while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs)) rangeList.dump(OS); } @@ -722,7 +732,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, *SectionData = data; if (name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection = data; + RangeDWOSection.Data = data; } } else if (name == "debug_types") { // Find debug_types data by section rather than name as there are @@ -763,6 +773,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, .Case("debug_loc", &LocSection.Relocs) .Case("debug_info.dwo", &InfoDWOSection.Relocs) .Case("debug_line", &LineSection.Relocs) + .Case("debug_ranges", &RangeSection.Relocs) .Case("apple_names", &AppleNamesSection.Relocs) .Case("apple_types", &AppleTypesSection.Relocs) .Case("apple_namespaces", &AppleNamespacesSection.Relocs) @@ -845,7 +856,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) - .Case("debug_ranges", &RangeSection) + .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index e4670519b797..ff6ed9c6741d 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -302,16 +303,9 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - { - // If this address is in our relocation map, apply the relocation. - RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); - if (AI != RMap->end()) { - const std::pair<uint8_t, int64_t> &R = AI->second; - State.Row.Address = - debug_line_data.getAddress(offset_ptr) + R.second; - } else - State.Row.Address = debug_line_data.getAddress(offset_ptr); - } + State.Row.Address = + getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(), + offset_ptr, RMap); break; case DW_LNE_define_file: diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index e2799ab2d243..d5c34216ed53 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -48,18 +49,10 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { // 2.6.2 Location Lists // A location list entry consists of: while (true) { + // A beginning and ending address offsets. Entry E; - RelocAddrMap::const_iterator AI = RelocMap.find(Offset); - // 1. A beginning address offset. ... - E.Begin = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.Begin += AI->second.second; - - AI = RelocMap.find(Offset); - // 2. An ending address offset. ... - E.End = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.End += AI->second.second; + E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); + E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index f1d82fda8c06..9380fe8fe85d 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -22,7 +23,8 @@ void DWARFDebugRangeList::clear() { Entries.clear(); } -bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { +bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr, + const RelocAddrMap &Relocs) { clear(); if (!data.isValidOffset(*offset_ptr)) return false; @@ -33,8 +35,11 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { while (true) { RangeListEntry entry; uint32_t prev_offset = *offset_ptr; - entry.StartAddress = data.getAddress(offset_ptr); - entry.EndAddress = data.getAddress(offset_ptr); + entry.StartAddress = + getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + entry.EndAddress = + getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + // Check that both values were extracted correctly. if (*offset_ptr != prev_offset + 2 * AddressSize) { clear(); diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 6de57b999adc..28592e4dfb65 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -334,11 +334,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, (Form == DW_FORM_addr) ? U->getAddressByteSize() : U->getRefAddrByteSize(); - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - if (AI != U->getRelocMap()->end()) { - Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second; - } else - Value.uval = data.getUnsigned(offset_ptr, AddrSize); + Value.uval = + getRelocatedValue(data, AddrSize, offset_ptr, U->getRelocMap()); break; } case DW_FORM_exprloc: @@ -376,12 +373,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, case DW_FORM_ref_sup4: case DW_FORM_strx4: case DW_FORM_addrx4: { - Value.uval = data.getU32(offset_ptr); - if (!U) - break; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + const RelocAddrMap* RelocMap = U ? U->getRelocMap() : nullptr; + Value.uval = getRelocatedValue(data, 4, offset_ptr, RelocMap); break; } case DW_FORM_data8: @@ -411,11 +404,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, case DW_FORM_strp_sup: { if (!U) return false; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - uint8_t Size = U->getDwarfOffsetByteSize(); - Value.uval = data.getUnsigned(offset_ptr, Size); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + Value.uval = getRelocatedValue(data, U->getDwarfOffsetByteSize(), + offset_ptr, U->getRelocMap()); break; } case DW_FORM_flag_present: diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index c3f467745402..f50487fc3ba3 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -32,7 +32,7 @@ using namespace llvm; using namespace dwarf; void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { - parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(), + parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(), C.getStringSection(), StringRef(), C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(), false); } @@ -40,16 +40,17 @@ void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { void DWARFUnitSectionBase::parseDWO(DWARFContext &C, const DWARFSection &DWOSection, DWARFUnitIndex *Index) { - parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(), + parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(), C.getStringDWOSection(), C.getStringOffsetDWOSection(), C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(), true); } DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO, const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, + bool LE, bool IsDWO, + const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), LineSection(LS), StringSection(SS), StringOffsetSection([&]() { @@ -142,9 +143,10 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); - DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize); + DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize); uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; - return RangeList.extract(RangesData, &ActualRangeListOffset); + return RangeList.extract(RangesData, &ActualRangeListOffset, + RangeSection->Relocs); } void DWARFUnit::clear() { diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index 5e8c0bdc171d..4e2474c51cb1 100644 --- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h" #include "llvm/DebugInfo/PDB/DIA/DIASession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" @@ -720,7 +721,7 @@ uint32_t DIARawSymbol::getVirtualTableShapeId() const { return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId); } -std::unique_ptr<PDBSymbolTypeVTable> +std::unique_ptr<PDBSymbolTypeBuiltin> DIARawSymbol::getVirtualBaseTableType() const { CComPtr<IDiaSymbol> TableType; if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType) @@ -729,7 +730,7 @@ DIARawSymbol::getVirtualBaseTableType() const { auto RawVT = llvm::make_unique<DIARawSymbol>(Session, TableType); auto Pointer = llvm::make_unique<PDBSymbolTypePointer>(Session, std::move(RawVT)); - return unique_dyn_cast<PDBSymbolTypeVTable>(Pointer->getPointeeType()); + return unique_dyn_cast<PDBSymbolTypeBuiltin>(Pointer->getPointeeType()); } PDB_DataKind DIARawSymbol::getDataKind() const { diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index 6ecf335812b5..ef47b92b4f2f 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -21,12 +21,22 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::pdb; -static Error ErrorFromHResult(HRESULT Result, StringRef Context) { +template <typename... Ts> +static Error ErrorFromHResult(HRESULT Result, const char *Str, Ts &&... Args) { + SmallString<64> MessageStorage; + StringRef Context; + if (sizeof...(Args) > 0) { + MessageStorage = formatv(Str, std::forward<Ts>(Args)...).str(); + Context = MessageStorage; + } else + Context = Str; + switch (Result) { case E_PDB_NOT_FOUND: return make_error<GenericError>(generic_error_code::invalid_path, Context); @@ -95,8 +105,9 @@ Error DIASession::createFromPdb(StringRef Path, const wchar_t *Path16Str = reinterpret_cast<const wchar_t*>(Path16.data()); HRESULT HR; - if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) - return ErrorFromHResult(HR, "Calling loadDataFromPdb"); + if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) { + return ErrorFromHResult(HR, "Calling loadDataFromPdb {0}", Path); + } if (FAILED(HR = DiaDataSource->openSession(&DiaSession))) return ErrorFromHResult(HR, "Calling openSession"); diff --git a/lib/DebugInfo/PDB/Native/ModStream.cpp b/lib/DebugInfo/PDB/Native/ModStream.cpp index 08798cf0ed28..e87e2c407593 100644 --- a/lib/DebugInfo/PDB/Native/ModStream.cpp +++ b/lib/DebugInfo/PDB/Native/ModStream.cpp @@ -82,4 +82,8 @@ ModStream::lines(bool *HadError) const { return make_range(LineInfo.begin(HadError), LineInfo.end()); } +bool ModStream::hasLineInfo() const { + return C13LinesSubstream.getLength() > 0 || LinesSubstream.getLength() > 0; +} + Error ModStream::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 3aba35adb53f..70968d4330b0 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/Support/ConvertUTF.h" @@ -320,7 +321,7 @@ uint32_t NativeRawSymbol::getVirtualTableShapeId() const { return 0; } -std::unique_ptr<PDBSymbolTypeVTable> +std::unique_ptr<PDBSymbolTypeBuiltin> NativeRawSymbol::getVirtualBaseTableType() const { return nullptr; } diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp index 61cef093d4ce..aacefae80c3a 100644 --- a/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/lib/DebugInfo/PDB/UDTLayout.cpp @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" @@ -39,36 +40,47 @@ static uint32_t getTypeLength(const PDBSymbol &Symbol) { return RawType.getLength(); } -StorageItemBase::StorageItemBase(const UDTLayoutBase &Parent, - const PDBSymbol &Symbol, - const std::string &Name, - uint32_t OffsetInParent, uint32_t Size) - : Parent(Parent), Symbol(Symbol), Name(Name), - OffsetInParent(OffsetInParent), SizeOf(Size) { +LayoutItemBase::LayoutItemBase(const UDTLayoutBase *Parent, + const PDBSymbol *Symbol, const std::string &Name, + uint32_t OffsetInParent, uint32_t Size, + bool IsElided) + : Symbol(Symbol), Parent(Parent), Name(Name), + OffsetInParent(OffsetInParent), SizeOf(Size), LayoutSize(Size), + IsElided(IsElided) { UsedBytes.resize(SizeOf, true); } -uint32_t StorageItemBase::deepPaddingSize() const { - // sizeof(Field) - sizeof(typeof(Field)) is trailing padding. - return SizeOf - getTypeLength(Symbol); +uint32_t LayoutItemBase::deepPaddingSize() const { + return UsedBytes.size() - UsedBytes.count(); +} + +uint32_t LayoutItemBase::tailPadding() const { + int Last = UsedBytes.find_last(); + + return UsedBytes.size() - (Last + 1); } DataMemberLayoutItem::DataMemberLayoutItem( - const UDTLayoutBase &Parent, std::unique_ptr<PDBSymbolData> DataMember) - : StorageItemBase(Parent, *DataMember, DataMember->getName(), - DataMember->getOffset(), getTypeLength(*DataMember)), - DataMember(std::move(DataMember)) { - auto Type = this->DataMember->getType(); + const UDTLayoutBase &Parent, std::unique_ptr<PDBSymbolData> Member) + : LayoutItemBase(&Parent, Member.get(), Member->getName(), + Member->getOffset(), getTypeLength(*Member), false), + DataMember(std::move(Member)) { + auto Type = DataMember->getType(); if (auto UDT = unique_dyn_cast<PDBSymbolTypeUDT>(Type)) { - // UDT data members might have padding in between fields, but otherwise - // a member should occupy its entire storage. - UsedBytes.resize(SizeOf, false); UdtLayout = llvm::make_unique<ClassLayout>(std::move(UDT)); + UsedBytes = UdtLayout->usedBytes(); } } +VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr<PDBSymbolTypeBuiltin> Sym, + uint32_t Offset, uint32_t Size) + : LayoutItemBase(&Parent, Sym.get(), "<vbptr>", Offset, Size, false), + Type(std::move(Sym)) { +} + const PDBSymbolData &DataMemberLayoutItem::getDataMember() { - return *dyn_cast<PDBSymbolData>(&Symbol); + return *dyn_cast<PDBSymbolData>(Symbol); } bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; } @@ -77,60 +89,73 @@ const ClassLayout &DataMemberLayoutItem::getUDTLayout() const { return *UdtLayout; } -uint32_t DataMemberLayoutItem::deepPaddingSize() const { - uint32_t Result = StorageItemBase::deepPaddingSize(); - if (UdtLayout) - Result += UdtLayout->deepPaddingSize(); - return Result; -} - VTableLayoutItem::VTableLayoutItem(const UDTLayoutBase &Parent, - std::unique_ptr<PDBSymbolTypeVTable> VTable) - : StorageItemBase(Parent, *VTable, "<vtbl>", 0, getTypeLength(*VTable)), - VTable(std::move(VTable)) { - auto VTableType = cast<PDBSymbolTypePointer>(this->VTable->getType()); + std::unique_ptr<PDBSymbolTypeVTable> VT) + : LayoutItemBase(&Parent, VT.get(), "<vtbl>", 0, getTypeLength(*VT), false), + VTable(std::move(VT)) { + auto VTableType = cast<PDBSymbolTypePointer>(VTable->getType()); ElementSize = VTableType->getLength(); +} - Shape = - unique_dyn_cast<PDBSymbolTypeVTableShape>(VTableType->getPointeeType()); - if (Shape) - VTableFuncs.resize(Shape->getCount()); +UDTLayoutBase::UDTLayoutBase(const UDTLayoutBase *Parent, const PDBSymbol &Sym, + const std::string &Name, uint32_t OffsetInParent, + uint32_t Size, bool IsElided) + : LayoutItemBase(Parent, &Sym, Name, OffsetInParent, Size, IsElided) { + // UDT storage comes from a union of all the children's storage, so start out + // uninitialized. + UsedBytes.reset(0, Size); + + initializeChildren(Sym); + if (LayoutSize < Size) + UsedBytes.resize(LayoutSize); } -UDTLayoutBase::UDTLayoutBase(const PDBSymbol &Symbol, const std::string &Name, - uint32_t Size) - : SymbolBase(Symbol), Name(Name), SizeOf(Size) { - UsedBytes.resize(Size); - ChildrenPerByte.resize(Size); - initializeChildren(Symbol); +uint32_t UDTLayoutBase::tailPadding() const { + uint32_t Abs = LayoutItemBase::tailPadding(); + if (!LayoutItems.empty()) { + const LayoutItemBase *Back = LayoutItems.back(); + uint32_t ChildPadding = Back->LayoutItemBase::tailPadding(); + if (Abs < ChildPadding) + Abs = 0; + else + Abs -= ChildPadding; + } + return Abs; } ClassLayout::ClassLayout(const PDBSymbolTypeUDT &UDT) - : UDTLayoutBase(UDT, UDT.getName(), UDT.getLength()), UDT(UDT) {} + : UDTLayoutBase(nullptr, UDT, UDT.getName(), 0, UDT.getLength(), false), + UDT(UDT) { + ImmediateUsedBytes.resize(SizeOf, false); + for (auto &LI : LayoutItems) { + uint32_t Begin = LI->getOffsetInParent(); + uint32_t End = Begin + LI->getLayoutSize(); + End = std::min(SizeOf, End); + ImmediateUsedBytes.set(Begin, End); + } +} ClassLayout::ClassLayout(std::unique_ptr<PDBSymbolTypeUDT> UDT) : ClassLayout(*UDT) { OwnedStorage = std::move(UDT); } -BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent, - std::unique_ptr<PDBSymbolTypeBaseClass> Base) - : UDTLayoutBase(*Base, Base->getName(), Base->getLength()), - StorageItemBase(Parent, *Base, Base->getName(), Base->getOffset(), - Base->getLength()), - Base(std::move(Base)) { - IsVirtualBase = this->Base->isVirtualBaseClass(); -} - -uint32_t UDTLayoutBase::shallowPaddingSize() const { - return UsedBytes.size() - UsedBytes.count(); +uint32_t ClassLayout::immediatePadding() const { + return SizeOf - ImmediateUsedBytes.count(); } -uint32_t UDTLayoutBase::deepPaddingSize() const { - uint32_t Result = shallowPaddingSize(); - for (auto &Child : ChildStorage) - Result += Child->deepPaddingSize(); - return Result; +BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent, + uint32_t OffsetInParent, bool Elide, + std::unique_ptr<PDBSymbolTypeBaseClass> B) + : UDTLayoutBase(&Parent, *B, B->getName(), OffsetInParent, B->getLength(), + Elide), + Base(std::move(B)) { + if (isEmptyBase()) { + // Special case an empty base so that it doesn't get treated as padding. + UsedBytes.resize(1); + UsedBytes.set(0); + } + IsVirtualBase = Base->isVirtualBaseClass(); } void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { @@ -138,15 +163,16 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { // followed by functions, followed by other. This ordering is necessary // so that bases and vtables get initialized before any functions which // may override them. - UniquePtrVector<PDBSymbolTypeBaseClass> Bases; UniquePtrVector<PDBSymbolTypeVTable> VTables; UniquePtrVector<PDBSymbolData> Members; + UniquePtrVector<PDBSymbolTypeBaseClass> VirtualBaseSyms; + auto Children = Sym.findAllChildren(); while (auto Child = Children->getNext()) { if (auto Base = unique_dyn_cast<PDBSymbolTypeBaseClass>(Child)) { if (Base->isVirtualBaseClass()) - VirtualBases.push_back(std::move(Base)); + VirtualBaseSyms.push_back(std::move(Base)); else Bases.push_back(std::move(Base)); } @@ -164,20 +190,33 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { Other.push_back(std::move(Child)); } + // We don't want to have any re-allocations in the list of bases, so make + // sure to reserve enough space so that our ArrayRefs don't get invalidated. + AllBases.reserve(Bases.size() + VirtualBaseSyms.size()); + + // Only add non-virtual bases to the class first. Only at the end of the + // class, after all non-virtual bases and data members have been added do we + // add virtual bases. This way the offsets are correctly aligned when we go + // to lay out virtual bases. for (auto &Base : Bases) { - auto BL = llvm::make_unique<BaseClassLayout>(*this, std::move(Base)); - BaseClasses.push_back(BL.get()); + uint32_t Offset = Base->getOffset(); + // Non-virtual bases never get elided. + auto BL = llvm::make_unique<BaseClassLayout>(*this, Offset, false, + std::move(Base)); + AllBases.push_back(BL.get()); addChildToLayout(std::move(BL)); } + NonVirtualBases = AllBases; - for (auto &VT : VTables) { - auto VTLayout = llvm::make_unique<VTableLayoutItem>(*this, std::move(VT)); + assert(VTables.size() <= 1); + if (!VTables.empty()) { + auto VTLayout = + llvm::make_unique<VTableLayoutItem>(*this, std::move(VTables[0])); VTable = VTLayout.get(); addChildToLayout(std::move(VTLayout)); - continue; } for (auto &Data : Members) { @@ -186,150 +225,74 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { addChildToLayout(std::move(DM)); } - for (auto &Func : Funcs) { - if (!Func->isVirtual()) - continue; + // Make sure add virtual bases before adding functions, since functions may be + // overrides of virtual functions declared in a virtual base, so the VTables + // and virtual intros need to be correctly initialized. + for (auto &VB : VirtualBaseSyms) { + int VBPO = VB->getVirtualBasePointerOffset(); + if (!hasVBPtrAtOffset(VBPO)) { + if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) { + auto VBPL = llvm::make_unique<VBPtrLayoutItem>(*this, std::move(VBP), + VBPO, VBP->getLength()); + VBPtr = VBPL.get(); + addChildToLayout(std::move(VBPL)); + } + } - if (Func->isIntroVirtualFunction()) - addVirtualIntro(*Func); - else - addVirtualOverride(*Func); + // Virtual bases always go at the end. So just look for the last place we + // ended when writing something, and put our virtual base there. + // Note that virtual bases get elided unless this is a top-most derived + // class. + uint32_t Offset = UsedBytes.find_last() + 1; + bool Elide = (Parent != nullptr); + auto BL = + llvm::make_unique<BaseClassLayout>(*this, Offset, Elide, std::move(VB)); + AllBases.push_back(BL.get()); + + // Only lay this virtual base out directly inside of *this* class if this + // is a top-most derived class. Keep track of it regardless, but only + // physically lay it out if it's a topmost derived class. + addChildToLayout(std::move(BL)); } + VirtualBases = makeArrayRef(AllBases).drop_front(NonVirtualBases.size()); + + if (Parent != nullptr) + LayoutSize = UsedBytes.find_last() + 1; } -void UDTLayoutBase::addVirtualIntro(PDBSymbolFunc &Func) { - // Kind of a hack, but we prefer the more common destructor name that people - // are familiar with, e.g. ~ClassName. It seems there are always both and - // the vector deleting destructor overwrites the nice destructor, so just - // ignore the vector deleting destructor. - if (Func.getName() == "__vecDelDtor") - return; - - if (!VTable) { - // FIXME: Handle this. What's most likely happening is we have an intro - // virtual in a derived class where the base also has an intro virtual. - // In this case the vtable lives in the base. What we really need is - // for each UDTLayoutBase to contain a list of all its vtables, and - // then propagate this list up the hierarchy so that derived classes have - // direct access to their bases' vtables. - return; +bool UDTLayoutBase::hasVBPtrAtOffset(uint32_t Off) const { + if (VBPtr && VBPtr->getOffsetInParent() == Off) + return true; + for (BaseClassLayout *BL : AllBases) { + if (BL->hasVBPtrAtOffset(Off - BL->getOffsetInParent())) + return true; } - - uint32_t Stride = VTable->getElementSize(); - - uint32_t Index = Func.getVirtualBaseOffset(); - assert(Index % Stride == 0); - Index /= Stride; - - VTable->setFunction(Index, Func); + return false; } -VTableLayoutItem *UDTLayoutBase::findVTableAtOffset(uint32_t RelativeOffset) { - if (VTable && VTable->getOffsetInParent() == RelativeOffset) - return VTable; - for (auto Base : BaseClasses) { - uint32_t Begin = Base->getOffsetInParent(); - uint32_t End = Begin + Base->getSize(); - if (RelativeOffset < Begin || RelativeOffset >= End) - continue; - - return Base->findVTableAtOffset(RelativeOffset - Begin); - } +void UDTLayoutBase::addChildToLayout(std::unique_ptr<LayoutItemBase> Child) { + uint32_t Begin = Child->getOffsetInParent(); - return nullptr; -} + if (!Child->isElided()) { + BitVector ChildBytes = Child->usedBytes(); -void UDTLayoutBase::addVirtualOverride(PDBSymbolFunc &Func) { - auto Signature = Func.getSignature(); - auto ThisAdjust = Signature->getThisAdjust(); - // ThisAdjust tells us which VTable we're looking for. Specifically, it's - // the offset into the current class of the VTable we're looking for. So - // look through the base hierarchy until we find one such that - // AbsoluteOffset(VT) == ThisAdjust - VTableLayoutItem *VT = findVTableAtOffset(ThisAdjust); - if (!VT) { - // FIXME: There really should be a vtable here. If there's not it probably - // means that the vtable is in a virtual base, which we don't yet support. - assert(!VirtualBases.empty()); - return; - } - int32_t OverrideIndex = -1; - // Now we've found the VTable. Func will not have a virtual base offset set, - // so instead we need to compare names and signatures. We iterate each item - // in the VTable. All items should already have non null entries because they - // were initialized by the intro virtual, which was guaranteed to come before. - for (auto ItemAndIndex : enumerate(VT->funcs())) { - auto Item = ItemAndIndex.value(); - assert(Item); - // If the name doesn't match, this isn't an override. Note that it's ok - // for the return type to not match (e.g. co-variant return). - if (Item->getName() != Func.getName()) { - if (Item->isDestructor() && Func.isDestructor()) { - OverrideIndex = ItemAndIndex.index(); - break; - } - continue; - } - // Now make sure it's the right overload. Get the signature of the existing - // vtable method and make sure it has the same arglist and the same cv-ness. - auto ExistingSig = Item->getSignature(); - if (ExistingSig->isConstType() != Signature->isConstType()) - continue; - if (ExistingSig->isVolatileType() != Signature->isVolatileType()) - continue; - - // Now compare arguments. Using the raw bytes of the PDB this would be - // trivial - // because there is an ArgListId and they should be identical. But DIA - // doesn't - // expose this, so the best we can do is iterate each argument and confirm - // that - // each one is identical. - if (ExistingSig->getCount() != Signature->getCount()) - continue; - bool IsMatch = true; - auto ExistingEnumerator = ExistingSig->getArguments(); - auto NewEnumerator = Signature->getArguments(); - for (uint32_t I = 0; I < ExistingEnumerator->getChildCount(); ++I) { - auto ExistingArg = ExistingEnumerator->getNext(); - auto NewArg = NewEnumerator->getNext(); - if (ExistingArg->getSymIndexId() != NewArg->getSymIndexId()) { - IsMatch = false; - break; - } - } - if (!IsMatch) - continue; + // Suppose the child occupies 4 bytes starting at offset 12 in a 32 byte + // class. When we call ChildBytes.resize(32), the Child's storage will + // still begin at offset 0, so we need to shift it left by offset bytes + // to get it into the right position. + ChildBytes.resize(UsedBytes.size()); + ChildBytes <<= Child->getOffsetInParent(); + UsedBytes |= ChildBytes; - // It's a match! Stick the new function into the VTable. - OverrideIndex = ItemAndIndex.index(); - break; - } - if (OverrideIndex == -1) { - // FIXME: This is probably due to one of the other FIXMEs in this file. - return; - } - VT->setFunction(OverrideIndex, Func); -} + if (ChildBytes.count() > 0) { + auto Loc = std::upper_bound(LayoutItems.begin(), LayoutItems.end(), Begin, + [](uint32_t Off, const LayoutItemBase *Item) { + return (Off < Item->getOffsetInParent()); + }); -void UDTLayoutBase::addChildToLayout(std::unique_ptr<StorageItemBase> Child) { - uint32_t Begin = Child->getOffsetInParent(); - uint32_t End = Begin + Child->getSize(); - // Due to the empty base optimization, End might point outside the bounds of - // the parent class. If that happens, just clamp the value. - End = std::min(End, getClassSize()); - - UsedBytes.set(Begin, End); - while (Begin != End) { - ChildrenPerByte[Begin].push_back(Child.get()); - ++Begin; + LayoutItems.insert(Loc, Child.get()); + } } - auto Loc = std::upper_bound( - ChildStorage.begin(), ChildStorage.end(), Begin, - [](uint32_t Off, const std::unique_ptr<StorageItemBase> &Item) { - return Off < Item->getOffsetInParent(); - }); - - ChildStorage.insert(Loc, std::move(Child)); + ChildStorage.push_back(std::move(Child)); }
\ No newline at end of file diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt index 59cef04cdece..b886021aee3f 100644 --- a/lib/Fuzzer/CMakeLists.txt +++ b/lib/Fuzzer/CMakeLists.txt @@ -1,6 +1,18 @@ -set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") -# Disable the coverage and sanitizer instrumentation for the fuzzer itself. -set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") +include(CheckCXXSourceCompiles) + +if( APPLE ) + CHECK_CXX_SOURCE_COMPILES(" + static thread_local int blah; + int main() { + return 0; + } + " HAS_THREAD_LOCAL) + + if( NOT HAS_THREAD_LOCAL ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Dthread_local=__thread") + endif() +endif() + if( LLVM_USE_SANITIZE_COVERAGE ) if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address") message(FATAL_ERROR @@ -8,41 +20,50 @@ if( LLVM_USE_SANITIZE_COVERAGE ) "LLVM_USE_SANITIZE_COVERAGE=YES to be set." ) endif() + set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") + + # Disable the coverage and sanitizer instrumentation for the fuzzer itself. + set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") +endif() + +# Compile libFuzzer if the compilation is specifically requested, OR +# if the platform is known to be working. +if ( LLVM_USE_SANITIZE_COVERAGE OR CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" ) add_library(LLVMFuzzerNoMainObjects OBJECT - FuzzerCrossOver.cpp - FuzzerDriver.cpp - FuzzerExtFunctionsDlsym.cpp - FuzzerExtFunctionsDlsymWin.cpp - FuzzerExtFunctionsWeak.cpp - FuzzerExtraCounters.cpp - FuzzerIO.cpp - FuzzerIOPosix.cpp - FuzzerIOWindows.cpp - FuzzerLoop.cpp - FuzzerMerge.cpp - FuzzerMutate.cpp - FuzzerSHA1.cpp - FuzzerShmemPosix.cpp - FuzzerShmemWindows.cpp - FuzzerTracePC.cpp - FuzzerTraceState.cpp - FuzzerUtil.cpp - FuzzerUtilDarwin.cpp - FuzzerUtilLinux.cpp - FuzzerUtilPosix.cpp - FuzzerUtilWindows.cpp - ) + FuzzerCrossOver.cpp + FuzzerDriver.cpp + FuzzerExtFunctionsDlsym.cpp + FuzzerExtFunctionsDlsymWin.cpp + FuzzerExtFunctionsWeak.cpp + FuzzerExtraCounters.cpp + FuzzerIO.cpp + FuzzerIOPosix.cpp + FuzzerIOWindows.cpp + FuzzerLoop.cpp + FuzzerMerge.cpp + FuzzerMutate.cpp + FuzzerSHA1.cpp + FuzzerShmemPosix.cpp + FuzzerShmemWindows.cpp + FuzzerTracePC.cpp + FuzzerTraceState.cpp + FuzzerUtil.cpp + FuzzerUtilDarwin.cpp + FuzzerUtilLinux.cpp + FuzzerUtilPosix.cpp + FuzzerUtilWindows.cpp + ) add_library(LLVMFuzzerNoMain STATIC - $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects> - ) + $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects> + ) target_link_libraries(LLVMFuzzerNoMain ${LLVM_PTHREAD_LIB}) add_library(LLVMFuzzer STATIC - FuzzerMain.cpp - $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects> - ) + FuzzerMain.cpp + $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects> + ) target_link_libraries(LLVMFuzzer ${LLVM_PTHREAD_LIB}) +endif() - if( LLVM_INCLUDE_TESTS ) - add_subdirectory(test) - endif() +if( LLVM_USE_SANITIZE_COVERAGE AND LLVM_INCLUDE_TESTS ) + add_subdirectory(test) endif() diff --git a/lib/Fuzzer/FuzzerDefs.h b/lib/Fuzzer/FuzzerDefs.h index bd1827508002..27f5719236dd 100644 --- a/lib/Fuzzer/FuzzerDefs.h +++ b/lib/Fuzzer/FuzzerDefs.h @@ -36,17 +36,29 @@ #error "Support for your platform has not been implemented" #endif +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + #define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX #ifdef __x86_64 -#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +# if __has_attribute(target) +# define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +# else +# define ATTRIBUTE_TARGET_POPCNT +# endif #else -#define ATTRIBUTE_TARGET_POPCNT +# define ATTRIBUTE_TARGET_POPCNT #endif #ifdef __clang__ // avoid gcc warning. -# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# if __has_attribute(no_sanitize) +# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# else +# define ATTRIBUTE_NO_SANITIZE_MEMORY +# endif # define ALWAYS_INLINE __attribute__((always_inline)) #else # define ATTRIBUTE_NO_SANITIZE_MEMORY diff --git a/lib/Fuzzer/FuzzerMerge.h b/lib/Fuzzer/FuzzerMerge.h index cf4a0863571d..dd4c37b6e39c 100644 --- a/lib/Fuzzer/FuzzerMerge.h +++ b/lib/Fuzzer/FuzzerMerge.h @@ -69,7 +69,7 @@ struct Merger { size_t Merge(const std::set<uint32_t> &InitialFeatures, std::vector<std::string> *NewFiles); size_t Merge(std::vector<std::string> *NewFiles) { - return Merge({}, NewFiles); + return Merge(std::set<uint32_t>{}, NewFiles); } size_t ApproximateMemoryConsumption() const; std::set<uint32_t> AllFeatures() const; diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index d0b77e7218b9..b7de07170de9 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1103,35 +1103,34 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() || - &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) { + const APFloat &APF = CFP->getValueAPF(); + if (&APF.getSemantics() == &APFloat::IEEEsingle() || + &APF.getSemantics() == &APFloat::IEEEdouble()) { // We would like to output the FP constant value in exponential notation, // but we cannot do this if doing so will lose precision. Check here to // make sure that we only output it in exponential format if we can parse // the value back and get the same value. // bool ignored; - bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble(); - bool isInf = CFP->getValueAPF().isInfinity(); - bool isNaN = CFP->getValueAPF().isNaN(); + bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble(); + bool isInf = APF.isInfinity(); + bool isNaN = APF.isNaN(); if (!isInf && !isNaN) { - double Val = isDouble ? CFP->getValueAPF().convertToDouble() : - CFP->getValueAPF().convertToFloat(); + double Val = isDouble ? APF.convertToDouble() : APF.convertToFloat(); SmallString<128> StrVal; - raw_svector_ostream(StrVal) << Val; - + APF.toString(StrVal, 6, 0, false); // Check to make sure that the stringized number is not some string like // "Inf" or NaN, that atof will accept, but the lexer will not. Check // that the string matches the "[-+]?[0-9]" regex. // - if ((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) { - // Reparse stringized version! - if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { - Out << StrVal; - return; - } + assert(((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + "[-+]?[0-9] regex does not match!"); + // Reparse stringized version! + if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { + Out << StrVal; + return; } } // Otherwise we could not reparse it to exactly the same value, so we must @@ -1140,7 +1139,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - APFloat apf = CFP->getValueAPF(); + APFloat apf = APF; // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, @@ -1153,27 +1152,27 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - APInt API = CFP->getValueAPF().bitcastToAPInt(); - if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) { + APInt API = APF.bitcastToAPInt(); + if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) { Out << 'K'; Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, /*Upper=*/true); Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); return; - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) { + } else if (&APF.getSemantics() == &APFloat::IEEEquad()) { Out << 'L'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) { + } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) { Out << 'M'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) { + } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) { Out << 'H'; Out << format_hex_no_prefix(API.getZExtValue(), 4, /*Upper=*/true); diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 09f037365793..cf2925254695 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -255,17 +255,10 @@ public: /// \brief Retrieve the attribute set node for the given "slot" in the /// AttrNode list. - AttributeSet getSlotNode(unsigned Slot) const { + AttributeSet getSlotAttributes(unsigned Slot) const { return getSlotPair(Slot)->second; } - /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. - /// \p Slot is an index into the AttrNodes list, not the index of the return / - /// parameter/ function which the attributes apply to. - AttributeList getSlotAttributes(unsigned Slot) const { - return AttributeList::get(Context, *getSlotPair(Slot)); - } - /// \brief Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { @@ -273,8 +266,10 @@ public: } typedef AttributeSet::iterator iterator; - iterator begin(unsigned Slot) const { return getSlotNode(Slot).begin(); } - iterator end(unsigned Slot) const { return getSlotNode(Slot).end(); } + iterator begin(unsigned Slot) const { + return getSlotAttributes(Slot).begin(); + } + iterator end(unsigned Slot) const { return getSlotAttributes(Slot).end(); } void Profile(FoldingSetNodeID &ID) const; static void Profile(FoldingSetNodeID &ID, diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d690111ef210..e30414537a6c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -955,13 +955,13 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, for (unsigned Index : Indices) { // Add all attribute slots before the current index. for (; I < E && getSlotIndex(I) < Index; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); // Add the attribute at this index. If we already have attributes at this // index, merge them into a new set. AttrBuilder B; if (I < E && getSlotIndex(I) == Index) { - B.merge(AttrBuilder(pImpl->getSlotNode(I))); + B.merge(AttrBuilder(pImpl->getSlotAttributes(I))); ++I; } B.addAttribute(A); @@ -970,7 +970,7 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, // Add remaining attributes. for (; I < E; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); return get(C, AttrVec); } @@ -1008,13 +1008,13 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, for (I = 0; I < NumAttrs; ++I) { if (getSlotIndex(I) >= Index) break; - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); } AttrBuilder NewAttrs; if (I < NumAttrs && getSlotIndex(I) == Index) { // We need to merge the attribute sets. - NewAttrs.merge(pImpl->getSlotNode(I)); + NewAttrs.merge(pImpl->getSlotAttributes(I)); ++I; } NewAttrs.merge(B); @@ -1024,7 +1024,7 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, // Add the remaining entries. for (; I < NumAttrs; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); return get(C, AttrVec); } @@ -1063,11 +1063,11 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, for (unsigned I = 0, E = NumAttrs; I != E; ++I) { if (getSlotIndex(I) >= Index) { if (getSlotIndex(I) == Index) - B = AttrBuilder(pImpl->getSlotNode(LastIndex++)); + B = AttrBuilder(getSlotAttributes(LastIndex++)); break; } LastIndex = I + 1; - AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); + AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); } // Remove the attributes from the existing set and add them. @@ -1077,7 +1077,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, // Add the remaining attribute slots. for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); + AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); return get(C, AttrSets); } @@ -1091,7 +1091,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { unsigned Index = getSlotIndex(I); if (Index != WithoutIndex) - AttrSet.push_back({Index, pImpl->getSlotNode(I)}); + AttrSet.push_back({Index, pImpl->getSlotAttributes(I)}); } return get(C, AttrSet); } @@ -1220,7 +1220,7 @@ AttributeSet AttributeList::getAttributes(unsigned Index) const { // Loop through to find the attribute node we want. for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) if (pImpl->getSlotIndex(I) == Index) - return pImpl->getSlotNode(I); + return pImpl->getSlotAttributes(I); return AttributeSet(); } @@ -1251,7 +1251,7 @@ unsigned AttributeList::getSlotIndex(unsigned Slot) const { return pImpl->getSlotIndex(Slot); } -AttributeList AttributeList::getSlotAttributes(unsigned Slot) const { +AttributeSet AttributeList::getSlotAttributes(unsigned Slot) const { assert(pImpl && Slot < pImpl->getNumSlots() && "Slot # out of range!"); return pImpl->getSlotAttributes(Slot); diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index ba92a91cc917..d4b455228225 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -589,8 +589,12 @@ FileInfo::openCoveragePath(StringRef CoveragePath) { /// print - Print source files with collected line count information. void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, StringRef GCNOFile, StringRef GCDAFile) { - for (const auto &LI : LineInfo) { - StringRef Filename = LI.first(); + SmallVector<StringRef, 4> Filenames; + for (const auto &LI : LineInfo) + Filenames.push_back(LI.first()); + std::sort(Filenames.begin(), Filenames.end()); + + for (StringRef Filename : Filenames) { auto AllLines = LineConsumer(Filename); std::string CoveragePath = getCoveragePath(Filename, MainFilename); @@ -603,7 +607,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, CovOS << " -: 0:Runs:" << RunCount << "\n"; CovOS << " -: 0:Programs:" << ProgramCount << "\n"; - const LineData &Line = LI.second; + const LineData &Line = LineInfo[Filename]; GCOVCoverage FileCoverage(Filename); for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) { diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index b07c57685a26..d83bdf2acd43 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -432,6 +432,7 @@ namespace { enum PointerStripKind { PSK_ZeroIndices, PSK_ZeroIndicesAndAliases, + PSK_ZeroIndicesAndAliasesAndBarriers, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -450,6 +451,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { if (auto *GEP = dyn_cast<GEPOperator>(V)) { switch (StripKind) { case PSK_ZeroIndicesAndAliases: + case PSK_ZeroIndicesAndAliasesAndBarriers: case PSK_ZeroIndices: if (!GEP->hasAllZeroIndices()) return V; @@ -472,12 +474,20 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { return V; V = GA->getAliasee(); } else { - if (auto CS = ImmutableCallSite(V)) + if (auto CS = ImmutableCallSite(V)) { if (const Value *RV = CS.getReturnedArgOperand()) { V = RV; continue; } - + // The result of invariant.group.barrier must alias it's argument, + // but it can't be marked with returned attribute, that's why it needs + // special case. + if (StripKind == PSK_ZeroIndicesAndAliasesAndBarriers && + CS.getIntrinsicID() == Intrinsic::invariant_group_barrier) { + V = CS.getArgOperand(0); + continue; + } + } return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); @@ -499,6 +509,11 @@ const Value *Value::stripInBoundsConstantOffsets() const { return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this); } +const Value *Value::stripPointerCastsAndBarriers() const { + return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliasesAndBarriers>( + this); +} + const Value * Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const { diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 9782c898bf50..1bc0d7361d4c 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -415,7 +415,8 @@ void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, // Flag as visible outside of ThinLTO if visible from a regular object or // if this is a reference in the regular LTO partition. GlobalRes.VisibleOutsideThinLTO |= - (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO)); + (Res.VisibleToRegularObj || Sym.isUsed() || + Partition == GlobalResolution::RegularLTO); } static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 4bd251f727a4..30447c528af1 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -25,7 +25,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/LTO/LTO.h" -#include "llvm/LTO/legacy/UpdateCompilerUsed.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" @@ -353,19 +352,6 @@ finalizeOptimizationRemarks(std::unique_ptr<tool_output_file> DiagOutputFile) { DiagOutputFile->os().flush(); } -static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) { - // Collect the list of undefined symbols used in asm and update - // llvm.compiler.used to prevent optimization to drop these from the output. - StringSet<> AsmUndefinedRefs; - ModuleSymbolTable::CollectAsmSymbols( - Mod, - [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { - if (Flags & object::BasicSymbolRef::SF_Undefined) - AsmUndefinedRefs.insert(Name); - }); - updateCompilerUsed(Mod, TM, AsmUndefinedRefs); -} - Error lto::backend(Config &C, AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, std::unique_ptr<Module> Mod, @@ -377,8 +363,6 @@ Error lto::backend(Config &C, AddStreamFn AddStream, std::unique_ptr<TargetMachine> TM = createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); - handleAsmUndefinedRefs(*Mod, *TM); - // Setup optimization remarks. auto DiagFileOrErr = lto::setupOptimizationRemarks( Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness); @@ -416,8 +400,6 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); - handleAsmUndefinedRefs(Mod, *TM); - if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); return Error::success(); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 42e8ad340281..2fa9c03b608e 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -134,7 +134,7 @@ struct ParseStatementInfo { SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; - ParseStatementInfo() = default; + ParseStatementInfo() = delete; ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites) : AsmRewrites(rewrites) {} }; @@ -737,6 +737,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { HadError = false; AsmCond StartingCondState = TheCondState; + SmallVector<AsmRewrite, 4> AsmStrRewrites; // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. @@ -756,7 +757,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { - ParseStatementInfo Info; + ParseStatementInfo Info(&AsmStrRewrites); if (!parseStatement(Info, nullptr)) continue; @@ -1650,7 +1651,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, } // Emit the label. - if (!ParsingInlineAsm) + if (!getTargetParser().isParsingInlineAsm()) Out.EmitLabel(Sym, IDLoc); // If we are generating dwarf for assembly source files then gather the @@ -2057,9 +2058,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // If parsing succeeded, match the instruction. if (!ParseHadError) { uint64_t ErrorInfo; - if (getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode, - Info.ParsedOperands, Out, - ErrorInfo, ParsingInlineAsm)) + if (getTargetParser().MatchAndEmitInstruction( + IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, + getTargetParser().isParsingInlineAsm())) return true; } return false; diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 159cc3b4def2..6444046a30d7 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -1105,7 +1105,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, encodeULEB128(wasm::WASM_SEC_CODE, getStream()); - encodeULEB128(CodeRelocations.size(), getStream()); + encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); WriteRelocations(CodeRelocations, getStream(), SymbolIndices); WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 23682e1fabfd..e89a4a315c46 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -1,4 +1,4 @@ -//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===// +//===- ELF.cpp - ELF object file implementation ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,15 +8,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" -namespace llvm { -namespace object { +using namespace llvm; +using namespace object; #define ELF_RELOC(name, value) \ case ELF::name: \ return #name; \ -StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { +StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + uint32_t Type) { switch (Machine) { case ELF::EM_X86_64: switch (Type) { @@ -139,6 +141,3 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { } #undef ELF_RELOC - -} // end namespace object -} // end namespace llvm diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 3f8c81c8e911..86f033bb6cbf 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===// +//===- ELFObjectFile.cpp - ELF object file implementation -----------------===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <utility> -namespace llvm { +using namespace llvm; using namespace object; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) @@ -299,5 +314,3 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { TheTriple.setArchName(Triple); } - -} // end namespace llvm diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp index bb3d1b2cf695..5f0837882d60 100644 --- a/lib/Object/IRSymtab.cpp +++ b/lib/Object/IRSymtab.cpp @@ -1,4 +1,4 @@ -//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/IRSymtab.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/StringSaver.h" +#include <cassert> +#include <string> +#include <utility> +#include <vector> using namespace llvm; using namespace irsymtab; @@ -25,6 +45,7 @@ namespace { struct Builder { SmallVector<char, 0> &Symtab; SmallVector<char, 0> &Strtab; + Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab) : Symtab(Symtab), Strtab(Strtab) {} @@ -49,6 +70,7 @@ struct Builder { S.Offset = StrtabBuilder.add(Value); S.Size = Value.size(); } + template <typename T> void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { R.Offset = Symtab.size(); @@ -141,6 +163,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, Sym.ComdatIndex = -1; auto *GV = Msym.dyn_cast<GlobalValue *>(); if (!GV) { + // Undefined module asm symbols act as GC roots and are implicitly used. + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_used; setStr(Sym.IRName, ""); return Error::success(); } @@ -228,7 +253,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) { return Error::success(); } -} // anonymous namespace +} // end anonymous namespace Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab) { diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 1753d2baaedd..3d3fa07db3f4 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1,4 +1,4 @@ -//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,32 +12,52 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/MachO.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include <cctype> +#include "llvm/Support/SwapByteOrder.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> #include <cstring> #include <limits> #include <list> +#include <memory> +#include <string> +#include <system_error> using namespace llvm; using namespace object; namespace { + struct section_base { char sectname[16]; char segname[16]; }; -} + +} // end anonymous namespace static Error malformedError(Twine Msg) { @@ -1144,11 +1164,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, Error &Err, uint32_t UniversalCputype, uint32_t UniversalIndex) - : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), - DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), - DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), - HasPageZeroSegment(false) { + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { ErrorAsOutParameter ErrAsOutParam(&Err); uint64_t SizeOfHeaders; uint32_t cputype; @@ -2343,11 +2359,11 @@ StringRef MachOObjectFile::getFileFormatName() const { unsigned CPUType = getCPUType(*this); if (!is64Bit()) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return "Mach-O 32-bit i386"; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return "Mach-O arm"; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return "Mach-O 32-bit ppc"; default: return "Mach-O 32-bit unknown"; @@ -2355,11 +2371,11 @@ StringRef MachOObjectFile::getFileFormatName() const { } switch (CPUType) { - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return "Mach-O 64-bit x86-64"; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return "Mach-O arm64"; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return "Mach-O 64-bit ppc64"; default: return "Mach-O 64-bit unknown"; @@ -2368,17 +2384,17 @@ StringRef MachOObjectFile::getFileFormatName() const { Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return Triple::x86; - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return Triple::x86_64; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return Triple::arm; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return Triple::aarch64; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return Triple::ppc; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return Triple::ppc64; default: return Triple::UnknownArch; @@ -2571,8 +2587,7 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } -ExportEntry::ExportEntry(ArrayRef<uint8_t> T) - : Trie(T), Malformed(false), Done(false) {} +ExportEntry::ExportEntry(ArrayRef<uint8_t> T) : Trie(T) {} void ExportEntry::moveToFirst() { pushNode(0); @@ -2641,9 +2656,7 @@ uint32_t ExportEntry::nodeOffset() const { } ExportEntry::NodeState::NodeState(const uint8_t *Ptr) - : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), - ImportName(nullptr), ChildCount(0), NextChildIndex(0), - ParentStringLength(0), IsExportNode(false) {} + : Start(Ptr), Current(Ptr) {} void ExportEntry::pushNode(uint64_t offset) { const uint8_t *Ptr = Trie.begin() + offset; @@ -2733,7 +2746,7 @@ void ExportEntry::moveNext() { iterator_range<export_iterator> MachOObjectFile::exports(ArrayRef<uint8_t> Trie) { ExportEntry Start(Trie); - if (Trie.size() == 0) + if (Trie.empty()) Start.moveToEnd(); else Start.moveToFirst(); @@ -2750,9 +2763,8 @@ iterator_range<export_iterator> MachOObjectFile::exports() const { MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, ArrayRef<uint8_t> Bytes, bool is64Bit) - : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), - SegmentIndex(-1), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), - PointerSize(is64Bit ? 8 : 4), Done(false) {} + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} void MachORebaseEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2794,7 +2806,7 @@ void MachORebaseEntry::moveNext() { More = false; Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); break; case MachO::REBASE_OPCODE_SET_TYPE_IMM: RebaseType = ImmValue; @@ -2807,8 +2819,8 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " - << "RebaseType=" << (int) RebaseType << "\n"); + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); break; case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; @@ -2831,10 +2843,10 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: SegmentOffset += readULEB128(&error); @@ -2855,9 +2867,9 @@ void MachORebaseEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2881,9 +2893,9 @@ void MachORebaseEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2913,11 +2925,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2954,11 +2966,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2992,11 +3004,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -3041,11 +3053,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: *E = malformedError("bad rebase info (bad opcode value 0x" + @@ -3131,10 +3143,8 @@ iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) { MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) - : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), - SegmentIndex(-1), LibraryOrdinalSet(false), Ordinal(0), Flags(0), - Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0), - PointerSize(is64Bit ? 8 : 4), TableKind(BK), Done(false) {} + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} void MachOBindEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -3189,7 +3199,7 @@ void MachOBindEntry::moveNext() { } More = false; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: if (TableKind == Kind::Weak) { @@ -3211,8 +3221,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: if (TableKind == Kind::Weak) { @@ -3241,8 +3251,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: if (TableKind == Kind::Weak) { @@ -3267,8 +3277,8 @@ void MachOBindEntry::moveNext() { Ordinal = 0; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Flags = ImmValue; @@ -3288,8 +3298,8 @@ void MachOBindEntry::moveNext() { ++Ptr; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " - << "SymbolName=" << SymbolName << "\n"); + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); if (TableKind == Kind::Weak) { if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) return; @@ -3306,8 +3316,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " - << "BindType=" << (int)BindType << "\n"); + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: Addend = readSLEB128(&error); @@ -3320,8 +3330,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " - << "Addend=" << Addend << "\n"); + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); break; case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; @@ -3343,10 +3353,10 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::BIND_OPCODE_ADD_ADDR_ULEB: SegmentOffset += readULEB128(&error); @@ -3366,9 +3376,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::BIND_OPCODE_DO_BIND: AdvanceAmount = PointerSize; @@ -3395,9 +3405,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: if (TableKind == Kind::Lazy) { @@ -3452,11 +3462,11 @@ void MachOBindEntry::moveNext() { RemainingLoopCount = 0; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: if (TableKind == Kind::Lazy) { @@ -3501,10 +3511,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: if (TableKind == Kind::Lazy) { @@ -3568,11 +3577,11 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: *E = malformedError("bad bind info (bad opcode value 0x" + diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp index de1ddab88fd4..91f93a41032e 100644 --- a/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ b/lib/Object/ModuleSummaryIndexObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation ==// +//==- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation -==// // // The LLVM Compiler Infrastructure // @@ -11,29 +11,38 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <memory> +#include <system_error> + using namespace llvm; using namespace object; -static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile( - "ignore-empty-index-file", llvm::cl::ZeroOrMore, - llvm::cl::desc( +static cl::opt<bool> IgnoreEmptyThinLTOIndexFile( + "ignore-empty-index-file", cl::ZeroOrMore, + cl::desc( "Ignore an empty index file and perform non-ThinLTO compilation"), - llvm::cl::init(false)); + cl::init(false)); ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I) : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { } -ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() {} +ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() = default; std::unique_ptr<ModuleSummaryIndex> ModuleSummaryIndexObjectFile::takeIndex() { return std::move(Index); diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp index 9a935d8e0869..a5b42725d817 100644 --- a/lib/Object/ModuleSymbolTable.cpp +++ b/lib/Object/ModuleSymbolTable.cpp @@ -1,4 +1,4 @@ -//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===// +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// // // The LLVM Compiler Infrastructure // @@ -13,27 +13,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <string> + using namespace llvm; using namespace object; diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp index c9c27451f809..e94e9cfed394 100644 --- a/lib/Object/RecordStreamer.cpp +++ b/lib/Object/RecordStreamer.cpp @@ -9,6 +9,7 @@ #include "RecordStreamer.h" #include "llvm/MC/MCSymbol.h" + using namespace llvm; void RecordStreamer::markDefined(const MCSymbol &Symbol) { @@ -69,14 +70,14 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) { void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + RecordStreamer::const_iterator RecordStreamer::begin() { return Symbols.begin(); } RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } -RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} - void RecordStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) { MCStreamer::EmitInstruction(Inst, STI); diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h index a845ecd786a8..4d119091a3d2 100644 --- a/lib/Object/RecordStreamer.h +++ b/lib/Object/RecordStreamer.h @@ -1,4 +1,4 @@ -//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// +//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,16 @@ #ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H #define LLVM_LIB_OBJECT_RECORDSTREAMER_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" +#include <vector> namespace llvm { + class RecordStreamer : public MCStreamer { public: enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, @@ -24,16 +31,19 @@ private: // their symbol binding after parsing complete. This maps from each // aliasee to its list of aliases. DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap; + void markDefined(const MCSymbol &Symbol); void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); void markUsed(const MCSymbol &Symbol); void visitUsedSymbol(const MCSymbol &Sym) override; public: - typedef StringMap<State>::const_iterator const_iterator; + RecordStreamer(MCContext &Context); + + using const_iterator = StringMap<State>::const_iterator; + const_iterator begin(); const_iterator end(); - RecordStreamer(MCContext &Context); void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) override; void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; @@ -50,6 +60,7 @@ public: DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() { return SymverAliasMap; } + /// Get the state recorded for the given symbol. State getSymbolState(const MCSymbol *Sym) { auto SI = Symbols.find(Sym->getName()); @@ -58,5 +69,7 @@ public: return SI->second; } }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index fc1dca35424e..9f3486e58a11 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" @@ -22,7 +23,9 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/Wasm.h" #include <algorithm> +#include <cassert> #include <cstdint> +#include <cstring> #include <system_error> using namespace llvm; @@ -141,7 +144,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { Expr.Value.Float64 = readFloat64(Ptr); break; case wasm::WASM_OPCODE_GET_GLOBAL: - Expr.Value.Global = readUint32(Ptr); + Expr.Value.Global = readULEB128(Ptr); break; default: return make_error<GenericBinaryError>("Invalid opcode in init_expr", @@ -180,7 +183,7 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr, } WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) - : ObjectFile(Binary::ID_Wasm, Buffer), StartFunction(-1) { + : ObjectFile(Binary::ID_Wasm, Buffer) { ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -252,7 +255,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { while (Count--) { /*uint32_t Index =*/readVaruint32(Ptr); StringRef Name = readString(Ptr); - if (Name.size()) + if (!Name.empty()) Symbols.emplace_back(Name, WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME); } @@ -313,11 +316,12 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: break; case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - Reloc.Addend = readVaruint32(Ptr); + Reloc.Addend = readVarint32(Ptr); break; default: return make_error<GenericBinaryError>("Bad relocation type", diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 3e1bed19d61f..9b1ff7e5dc16 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -223,7 +223,7 @@ void MappingTraits<WasmYAML::Relocation>::mapping( IO.mapRequired("Type", Relocation.Type); IO.mapRequired("Index", Relocation.Index); IO.mapRequired("Offset", Relocation.Offset); - IO.mapRequired("Addend", Relocation.Addend); + IO.mapOptional("Addend", Relocation.Addend, 0); } void MappingTraits<WasmYAML::LocalDecl>::mapping( @@ -294,6 +294,9 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO, case wasm::WASM_OPCODE_F64_CONST: IO.mapRequired("Value", Expr.Value.Float64); break; + case wasm::WASM_OPCODE_GET_GLOBAL: + IO.mapRequired("Index", Expr.Value.Global); + break; } } diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 0421946a32a6..55ac2541948e 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -624,6 +624,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // And finally clean up LCSSA form before generating code. OptimizePM.addPass(InstSimplifierPass()); + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + OptimizePM.addPass(SimplifyCFGPass()); + // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index c4c892f0352a..e1e2c22e1df1 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3393,7 +3393,7 @@ namespace { } void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, bool TruncateZero) const { switch (category) { case fcInfinity: if (isNegative()) @@ -3407,9 +3407,16 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, if (isNegative()) Str.push_back('-'); - if (!FormatMaxPadding) - append(Str, "0.0E+0"); - else + if (!FormatMaxPadding) { + if (TruncateZero) + append(Str, "0.0E+0"); + else { + append(Str, "0.0"); + if (FormatPrecision > 1) + Str.append(FormatPrecision - 1, '0'); + append(Str, "e+00"); + } + } else Str.push_back('0'); return; @@ -3543,12 +3550,16 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, Str.push_back(buffer[NDigits-1]); Str.push_back('.'); - if (NDigits == 1) + if (NDigits == 1 && TruncateZero) Str.push_back('0'); else for (unsigned I = 1; I != NDigits; ++I) Str.push_back(buffer[NDigits-1-I]); - Str.push_back('E'); + // Fill with zeros up to FormatPrecision. + if (!TruncateZero && FormatPrecision > NDigits - 1) + Str.append(FormatPrecision - NDigits + 1, '0'); + // For !TruncateZero we use lower 'e'. + Str.push_back(TruncateZero ? 'E' : 'e'); Str.push_back(exp >= 0 ? '+' : '-'); if (exp < 0) exp = -exp; @@ -3557,6 +3568,9 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, expbuf.push_back((char) ('0' + (exp % 10))); exp /= 10; } while (exp); + // Exponent always at least two digits if we do not truncate zeros. + if (!TruncateZero && expbuf.size() < 2) + expbuf.push_back('0'); for (unsigned I = 0, E = expbuf.size(); I != E; ++I) Str.push_back(expbuf[E-1-I]); return; @@ -4362,10 +4376,11 @@ bool DoubleAPFloat::isInteger() const { void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, + bool TruncateZero) const { assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) - .toString(Str, FormatPrecision, FormatMaxPadding); + .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); } bool DoubleAPFloat::getExactInverse(APFloat *inv) const { diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 2d049a1cff85..1227d7528c8f 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -81,7 +81,7 @@ void APInt::initSlowCase(uint64_t val, bool isSigned) { pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = -1ULL; + pVal[i] = WORD_MAX; clearUnusedBits(); } @@ -364,44 +364,20 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { return std::equal(pVal, pVal + getNumWords(), RHS.pVal); } -bool APInt::ult(const APInt& RHS) const { +int APInt::compare(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) - return VAL < RHS.VAL; + return VAL < RHS.VAL ? -1 : VAL > RHS.VAL; - // Get active bit length of both operands - unsigned n1 = getActiveBits(); - unsigned n2 = RHS.getActiveBits(); - - // If magnitude of LHS is less than RHS, return true. - if (n1 < n2) - return true; - - // If magnitude of RHS is greater than LHS, return false. - if (n2 < n1) - return false; - - // If they both fit in a word, just compare the low order word - if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) - return pVal[0] < RHS.pVal[0]; - - // Otherwise, compare all words - unsigned topWord = whichWord(std::max(n1,n2)-1); - for (int i = topWord; i >= 0; --i) { - if (pVal[i] > RHS.pVal[i]) - return false; - if (pVal[i] < RHS.pVal[i]) - return true; - } - return false; + return tcCompare(pVal, RHS.pVal, getNumWords()); } -bool APInt::slt(const APInt& RHS) const { +int APInt::compareSigned(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) { int64_t lhsSext = SignExtend64(VAL, BitWidth); int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); - return lhsSext < rhsSext; + return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; } bool lhsNeg = isNegative(); @@ -409,11 +385,11 @@ bool APInt::slt(const APInt& RHS) const { // If the sign bits don't match, then (LHS < RHS) if LHS is negative if (lhsNeg != rhsNeg) - return lhsNeg; + return lhsNeg ? -1 : 1; // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. - return ult(RHS); + return tcCompare(pVal, RHS.pVal, getNumWords()); } void APInt::setBit(unsigned bitPosition) { @@ -428,13 +404,13 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { unsigned hiWord = whichWord(hiBit); // Create an initial mask for the low word with zeros below loBit. - uint64_t loMask = UINT64_MAX << whichBit(loBit); + uint64_t loMask = WORD_MAX << whichBit(loBit); // If hiBit is not aligned, we need a high mask. unsigned hiShiftAmt = whichBit(hiBit); if (hiShiftAmt != 0) { // Create a high mask with zeros above hiBit. - uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); + uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); // If loWord and hiWord are equal, then we combine the masks. Otherwise, // set the bits in hiWord. if (hiWord == loWord) @@ -447,7 +423,7 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { // Fill any words between loWord and hiWord with all ones. for (unsigned word = loWord + 1; word < hiWord; ++word) - pVal[word] = UINT64_MAX; + pVal[word] = WORD_MAX; } /// Set the given bit to 0 whose position is given as "bitPosition". @@ -487,7 +463,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Single word result can be done as a direct bitmask. if (isSingleWord()) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); VAL &= ~(mask << bitPosition); VAL |= (subBits.VAL << bitPosition); return; @@ -499,7 +475,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Insertion within a single word can be done as a direct bitmask. if (loWord == hi1Word) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); pVal[loWord] &= ~(mask << loBit); pVal[loWord] |= (subBits.VAL << loBit); return; @@ -515,7 +491,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Mask+insert remaining bits. unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; if (remainingBits != 0) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits); pVal[hi1Word] &= ~mask; pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); } @@ -682,7 +658,7 @@ unsigned APInt::countLeadingOnes() const { unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == -1ULL) + if (pVal[i] == WORD_MAX) Count += APINT_BITS_PER_WORD; else { Count += llvm::countLeadingOnes(pVal[i]); @@ -708,11 +684,12 @@ unsigned APInt::countTrailingZeros() const { unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == -1ULL; ++i) + for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) Count += llvm::countTrailingOnes(pVal[i]); - return std::min(Count, BitWidth); + assert(Count <= BitWidth); + return Count; } unsigned APInt::countPopulationSlowCase() const { @@ -962,43 +939,26 @@ APInt APInt::trunc(unsigned width) const { } // Sign extend to a new width. -APInt APInt::sext(unsigned width) const { - assert(width > BitWidth && "Invalid APInt SignExtend request"); +APInt APInt::sext(unsigned Width) const { + assert(Width > BitWidth && "Invalid APInt SignExtend request"); - if (width <= APINT_BITS_PER_WORD) { - uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth); - val = (int64_t)val >> (width - BitWidth); - return APInt(width, val >> (APINT_BITS_PER_WORD - width)); - } - - APInt Result(getMemory(getNumWords(width)), width); - - // Copy full words. - unsigned i; - uint64_t word = 0; - for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) { - word = getRawData()[i]; - Result.pVal[i] = word; - } + if (Width <= APINT_BITS_PER_WORD) + return APInt(Width, SignExtend64(VAL, BitWidth)); - // Read and sign-extend any partial word. - unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD; - if (bits != 0) - word = (int64_t)getRawData()[i] << bits >> bits; - else - word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); + APInt Result(getMemory(getNumWords(Width)), Width); - // Write remaining full words. - for (; i != width / APINT_BITS_PER_WORD; i++) { - Result.pVal[i] = word; - word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); - } + // Copy words. + std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); - // Write any partial word. - bits = (0 - width) % APINT_BITS_PER_WORD; - if (bits != 0) - Result.pVal[i] = word << bits >> bits; + // Sign extend the last word since there may be unused bits in the input. + Result.pVal[getNumWords() - 1] = + SignExtend64(Result.pVal[getNumWords() - 1], + ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + // Fill with sign bits. + std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); + Result.clearUnusedBits(); return Result; } @@ -1012,12 +972,11 @@ APInt APInt::zext(unsigned width) const { APInt Result(getMemory(getNumWords(width)), width); // Copy words. - unsigned i; - for (i = 0; i != getNumWords(); i++) - Result.pVal[i] = getRawData()[i]; + std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Zero remaining words. - memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE); + std::memset(Result.pVal + getNumWords(), 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); return Result; } @@ -1052,89 +1011,51 @@ APInt APInt::sextOrSelf(unsigned width) const { /// Arithmetic right-shift this APInt by shiftAmt. /// @brief Arithmetic right-shift function. -APInt APInt::ashr(const APInt &shiftAmt) const { - return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth)); +void APInt::ashrInPlace(const APInt &shiftAmt) { + ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); } /// Arithmetic right-shift this APInt by shiftAmt. /// @brief Arithmetic right-shift function. -APInt APInt::ashr(unsigned shiftAmt) const { - assert(shiftAmt <= BitWidth && "Invalid shift amount"); - // Handle a degenerate case - if (shiftAmt == 0) - return *this; - - // Handle single word shifts with built-in ashr - if (isSingleWord()) { - if (shiftAmt == BitWidth) - return APInt(BitWidth, 0); // undefined - return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt); - } +void APInt::ashrSlowCase(unsigned ShiftAmt) { + // Don't bother performing a no-op shift. + if (!ShiftAmt) + return; - // If all the bits were shifted out, the result is, technically, undefined. - // We return -1 if it was negative, 0 otherwise. We check this early to avoid - // issues in the algorithm below. - if (shiftAmt == BitWidth) { - if (isNegative()) - return APInt(BitWidth, -1ULL, true); - else - return APInt(BitWidth, 0); - } - - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; - - // Compute some values needed by the following shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift - unsigned breakWord = getNumWords() - 1 - offset; // last word affected - unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word? - if (bitsInWord == 0) - bitsInWord = APINT_BITS_PER_WORD; - - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - // Move the words containing significant bits - for (unsigned i = 0; i <= breakWord; ++i) - val[i] = pVal[i+offset]; // move whole word - - // Adjust the top significant word for sign bit fill, if negative - if (isNegative()) - if (bitsInWord < APINT_BITS_PER_WORD) - val[breakWord] |= ~0ULL << bitsInWord; // set high bits - } else { - // Shift the low order words - for (unsigned i = 0; i < breakWord; ++i) { - // This combines the shifted corresponding word with the low bits from - // the next word (shifted into this word's high bits). - val[i] = (pVal[i+offset] >> wordShift) | - (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); - } + // Save the original sign bit for later. + bool Negative = isNegative(); - // Shift the break word. In this case there are no bits from the next word - // to include in this word. - val[breakWord] = pVal[breakWord+offset] >> wordShift; - - // Deal with sign extension in the break word, and possibly the word before - // it. - if (isNegative()) { - if (wordShift > bitsInWord) { - if (breakWord > 0) - val[breakWord-1] |= - ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); - val[breakWord] |= ~0ULL; - } else - val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); + // WordShift is the inter-part shift; BitShift is is intra-part shift. + unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; + unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; + + unsigned WordsToMove = getNumWords() - WordShift; + if (WordsToMove != 0) { + // Sign extend the last word to fill in the unused bits. + pVal[getNumWords() - 1] = SignExtend64( + pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + // Move the words containing significant bits. + for (unsigned i = 0; i != WordsToMove - 1; ++i) + pVal[i] = (pVal[i + WordShift] >> BitShift) | + (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); + + // Handle the last word which has no high bits to copy. + pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift; + // Sign extend one more time. + pVal[WordsToMove - 1] = + SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); } } - // Remaining words are 0 or -1, just assign them. - uint64_t fillValue = (isNegative() ? -1ULL : 0); - for (unsigned i = breakWord+1; i < getNumWords(); ++i) - val[i] = fillValue; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; + // Fill in the remainder based on the original sign. + std::memset(pVal + WordsToMove, Negative ? -1 : 0, + WordShift * APINT_WORD_SIZE); + clearUnusedBits(); } /// Logical right-shift this APInt by shiftAmt. @@ -2608,7 +2529,7 @@ void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { if (!Count) return; - /* WordShift is the inter-part shift; BitShift is is intra-part shift. */ + // WordShift is the inter-part shift; BitShift is the intra-part shift. unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); unsigned BitShift = Count % APINT_BITS_PER_WORD; @@ -2635,7 +2556,7 @@ void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { if (!Count) return; - // WordShift is the inter-part shift; BitShift is is intra-part shift. + // WordShift is the inter-part shift; BitShift is the intra-part shift. unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); unsigned BitShift = Count % APINT_BITS_PER_WORD; @@ -2684,10 +2605,8 @@ int APInt::tcCompare(const WordType *lhs, const WordType *rhs, unsigned parts) { while (parts) { parts--; - if (lhs[parts] == rhs[parts]) - continue; - - return (lhs[parts] > rhs[parts]) ? 1 : -1; + if (lhs[parts] != rhs[parts]) + return (lhs[parts] > rhs[parts]) ? 1 : -1; } return 0; diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 92ce6185306a..22fb3f2cb9c9 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -193,4 +193,3 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName) { void LLVMAddSymbol(const char *symbolName, void *symbolValue) { return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue); } - diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 64d5977e2ebd..f3a654d7d2bd 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -161,6 +161,7 @@ StringRef Triple::getVendorTypeName(VendorType Kind) { case Myriad: return "myriad"; case AMD: return "amd"; case Mesa: return "mesa"; + case SUSE: return "suse"; } llvm_unreachable("Invalid VendorType!"); @@ -443,6 +444,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("myriad", Triple::Myriad) .Case("amd", Triple::AMD) .Case("mesa", Triple::Mesa) + .Case("suse", Triple::SUSE) .Default(Triple::UnknownVendor); } diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index d0c0956b87ca..629ad5c61b78 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -942,6 +942,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, AArch64::XZR, NextMBBI); case AArch64::CMP_SWAP_128: return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); + } return false; } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 550174b22a89..dc916c034661 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1125,7 +1125,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) BasePointerReg = RegInfo->getBaseRegister(); - bool ExtraCSSpill = false; + unsigned ExtraCSSpill = 0; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { @@ -1153,7 +1153,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && !RegInfo->isReservedReg(MF, PairedReg)) - ExtraCSSpill = true; + ExtraCSSpill = PairedReg; } } @@ -1186,8 +1186,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. - if (BigStack && !ExtraCSSpill) { - if (UnspilledCSGPR != AArch64::NoRegister) { + if (BigStack) { + if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); @@ -1196,15 +1196,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = true; + ExtraCSSpill = UnspilledCSGPRPaired; NumRegsSpilled = SavedRegs.count(); } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. - if (!ExtraCSSpill) { - const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - int FI = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false); + if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass &RC = AArch64::GPR64RegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + int FI = MFI.CreateStackObject(Size, Align, false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ddc95199d4c..a7c98fbb425f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -91,6 +91,7 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); +STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); static cl::opt<bool> EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, @@ -105,6 +106,12 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +static cl::opt<bool> +EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, + cl::desc("Enable AArch64 logical imm instruction " + "optimization"), + cl::init(true)); + /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -787,6 +794,140 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, + const APInt &Demanded, + TargetLowering::TargetLoweringOpt &TLO, + unsigned NewOpc) { + uint64_t OldImm = Imm, NewImm, Enc; + uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; + + // Return if the immediate is already all zeros, all ones, a bimm32 or a + // bimm64. + if (Imm == 0 || Imm == Mask || + AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) + return false; + + unsigned EltSize = Size; + uint64_t DemandedBits = Demanded.getZExtValue(); + + // Clear bits that are not demanded. + Imm &= DemandedBits; + + while (true) { + // The goal here is to set the non-demanded bits in a way that minimizes + // the number of switching between 0 and 1. In order to achieve this goal, + // we set the non-demanded bits to the value of the preceding demanded bits. + // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a + // non-demanded bit), we copy bit0 (1) to the least significant 'x', + // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. + // The final result is 0b11000011. + uint64_t NonDemandedBits = ~DemandedBits; + uint64_t InvertedImm = ~Imm & DemandedBits; + uint64_t RotatedImm = + ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & + NonDemandedBits; + uint64_t Sum = RotatedImm + NonDemandedBits; + bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); + uint64_t Ones = (Sum + Carry) & NonDemandedBits; + NewImm = (Imm | Ones) & Mask; + + // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate + // or all-ones or all-zeros, in which case we can stop searching. Otherwise, + // we halve the element size and continue the search. + if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) + break; + + // We cannot shrink the element size any further if it is 2-bits. + if (EltSize == 2) + return false; + + EltSize /= 2; + Mask >>= EltSize; + uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; + + // Return if there is mismatch in any of the demanded bits of Imm and Hi. + if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) + return false; + + // Merge the upper and lower halves of Imm and DemandedBits. + Imm |= Hi; + DemandedBits |= DemandedBitsHi; + } + + ++NumOptimizedImms; + + // Replicate the element across the register width. + while (EltSize < Size) { + NewImm |= NewImm << EltSize; + EltSize *= 2; + } + + (void)OldImm; + assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && + "demanded bits should never be altered"); + assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); + + // Create the new constant immediate node. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + // If the new constant immediate is all-zeros or all-ones, let the target + // independent DAG combine optimize this node. + if (NewImm == 0 || NewImm == OrigMask) + return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT)); + + // Otherwise, create a machine node so that target independent DAG combine + // doesn't undo this optimization. + Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); + SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); + SDValue New( + TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + + return TLO.CombineTo(Op, New); +} + +bool AArch64TargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { + // Delay this optimization to as late as possible. + if (!TLO.LegalOps) + return false; + + if (!EnableOptimizeLogicalImm) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + unsigned Size = VT.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "i32 or i64 is expected after legalization."); + + // Exit early if we demand all bits. + if (Demanded.countPopulation() == Size) + return false; + + unsigned NewOpc; + switch (Op.getOpcode()) { + default: + return false; + case ISD::AND: + NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; + break; + case ISD::OR: + NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; + break; + case ISD::XOR: + NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; + break; + } + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) + return false; + uint64_t Imm = C->getZExtValue(); + return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc); +} + /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. @@ -3418,11 +3559,75 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Other Lowering Code //===----------------------------------------------------------------------===// +SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +// (loadGOT sym) +template <class NodeTy> +SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes instead of using a wrapper node. + return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); +} + +// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) +template <class NodeTy> +SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG) + const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, Ty, + getTargetNode(N, Ty, DAG, AArch64II::MO_G3), + getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC)); +} + +// (addlow (adrp %hi(sym)) %lo(sym)) +template <class NodeTy> +SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE); + SDValue Lo = getTargetNode(N, Ty, DAG, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); +} + SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); - const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GN->getGlobal(); unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); @@ -3430,32 +3635,15 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && "unexpected offset in global node"); - // This also catched the large code model case for Darwin. + // This also catches the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { - SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(GN, DAG); } if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(GN, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and - // the only correct model on Darwin. - SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | AArch64II::MO_PAGE); - unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; - SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(GN, DAG); } } @@ -4232,90 +4420,37 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(JT, DAG); } - - SDValue Hi = - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(JT, DAG); } SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { - SDValue GotAddr = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_GOT); - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(CP, DAG); } - - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G3), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G2 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G1 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(CP, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on - // ELF, the only valid one on Darwin. - SDValue Hi = - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(CP, DAG); } } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); + BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(BA, DAG); } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(BA, DAG); } } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index a023b4373835..6081b07479b9 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -255,6 +255,9 @@ public: const SelectionDAG &DAG, unsigned Depth = 0) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; /// Returns true if the target allows unaligned memory accesses of the @@ -508,6 +511,18 @@ private: const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + template <class NodeTy> SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const; + template <class NodeTy> + SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const; + template <class NodeTy> SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td index 867074c3c374..71826bec6b11 100644 --- a/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -14,6 +14,9 @@ //===---------------------------------- // Atomic fences //===---------------------------------- +let AddedComplexity = 15, Size = 0 in +def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), + [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 16be4432b160..c44daf306ea9 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -693,11 +693,11 @@ def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>; def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>; def gi_addsub_shifted_imm32 : - GIComplexOperandMatcher<s32, (ops i32imm, i32imm), "selectArithImmed">, + GIComplexOperandMatcher<s32, "selectArithImmed">, GIComplexPatternEquiv<addsub_shifted_imm32>; def gi_addsub_shifted_imm64 : - GIComplexOperandMatcher<s64, (ops i32imm, i32imm), "selectArithImmed">, + GIComplexOperandMatcher<s64, "selectArithImmed">, GIComplexPatternEquiv<addsub_shifted_imm64>; class neg_addsub_shifted_imm<ValueType Ty> diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 41fc8eceab5c..cb268828455e 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2320,7 +2320,7 @@ void AArch64InstrInfo::storeRegToStackSlot( PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; @@ -2424,7 +2424,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; @@ -2649,7 +2649,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { - assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == + TRI.getRegSizeInBits(*getRegClass(SrcReg)) && "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, @@ -2735,7 +2736,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } if (FillRC) { - assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == + TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); MachineInstr &LoadMI = *--InsertPt; @@ -3025,7 +3027,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return false; } -void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void AArch64InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); NopInst.addOperand(MCOperand::createImm(0)); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index bacce441f6c5..4cd14db633b9 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -205,7 +205,7 @@ public: const DebugLoc &DL, unsigned DstReg, ArrayRef<MachineOperand> Cond, unsigned TrueReg, unsigned FalseReg) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 5e01b6cd2b46..b0e0e3eb4ba7 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -41,12 +41,17 @@ using namespace llvm; namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); + void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -62,14 +67,19 @@ private: bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1, - MachineOperand &Result2) const; + ComplexRendererFn selectArithImmed(MachineOperand &Root) const; const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; + bool ForCodeSize; + + PredicateBitset AvailableFeatures; + PredicateBitset + computeAvailableFeatures(const MachineFunction *MF, + const AArch64Subtarget *Subtarget) const; // We declare the temporaries used by selectImpl() in the class to minimize the // cost of constructing placeholder values. @@ -88,7 +98,7 @@ AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures() #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -567,6 +577,12 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } +void AArch64InstructionSelector::beginFunction( + const MachineFunction &MF) { + ForCodeSize = MF.getFunction()->optForSize(); + AvailableFeatures = computeAvailableFeatures(&MF, &STI); +} + bool AArch64InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1312,9 +1328,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. -bool AArch64InstructionSelector::selectArithImmed( - MachineOperand &Root, MachineOperand &Result1, - MachineOperand &Result2) const { +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { MachineInstr &MI = *Root.getParent(); MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -1333,13 +1348,13 @@ bool AArch64InstructionSelector::selectArithImmed( else if (Root.isReg()) { MachineInstr *Def = MRI.getVRegDef(Root.getReg()); if (Def->getOpcode() != TargetOpcode::G_CONSTANT) - return false; + return nullptr; MachineOperand &Op1 = Def->getOperand(1); if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64) - return false; + return nullptr; Immed = Op1.getCImm()->getZExtValue(); } else - return false; + return nullptr; unsigned ShiftAmt; @@ -1349,14 +1364,10 @@ bool AArch64InstructionSelector::selectArithImmed( ShiftAmt = 12; Immed = Immed >> 12; } else - return false; + return nullptr; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); - Result1.ChangeToImmediate(Immed); - Result1.clearParent(); - Result2.ChangeToImmediate(ShVal); - Result2.clearParent(); - return true; + return [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); }; } namespace llvm { diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td index eec089087fe0..cf1c0b66db58 100644 --- a/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/lib/Target/AArch64/AArch64SchedFalkor.td @@ -79,14 +79,14 @@ def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; } def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; } def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; } def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; } -def : WriteRes<WriteST, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]> - { let Latency = 3; let NumMicroOps = 3; } +def : WriteRes<WriteST, [FalkorUnitST, FalkorUnitSD]> + { let Latency = 0; let NumMicroOps = 2; } def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]> { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 5; } +def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 1; } def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; } -def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]> - { let Latency = 4; let NumMicroOps = 3; } +def : WriteRes<WriteSTIdx, [FalkorUnitST, FalkorUnitSD]> + { let Latency = 0; let NumMicroOps = 2; } def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 3; let NumMicroOps = 2; } def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; } diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td index 4bd77d344488..8f8eeef8a6cf 100644 --- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -326,6 +326,10 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; // SIMD Store Instructions // ----------------------------------------------------------------------------- +def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>; + def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; @@ -421,6 +425,7 @@ def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>; def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>; def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>; + // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>; @@ -433,7 +438,6 @@ def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>; - // Load Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; @@ -461,6 +465,7 @@ def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>; def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>; + // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; @@ -502,28 +507,30 @@ def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>; -def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; +def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; -def : InstRW<[WriteST], (instregex "^LDC.*$")>; -def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>; +def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>; +def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>; def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>; def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>; -def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>; +def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>; +def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>; def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>; def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>; def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>; +def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>; +def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>; diff --git a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td index 9cdb4be4246b..e64b2c441a19 100644 --- a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td +++ b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td @@ -28,7 +28,6 @@ //===----------------------------------------------------------------------===// // Define 1 micro-op types - def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } @@ -175,18 +174,33 @@ def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { //===----------------------------------------------------------------------===// // Define 3 micro-op types +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 3; let NumMicroOps = 3; } + def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 4; let NumMicroOps = 3; } + def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 5; let NumMicroOps = 3; } + def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 6; let NumMicroOps = 3; @@ -196,10 +210,12 @@ def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { let Latency = 4; let NumMicroOps = 3; } + def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { let Latency = 3; let NumMicroOps = 3; } + def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, FalkorUnitLD]> { let Latency = 3; @@ -259,6 +275,12 @@ def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { let NumMicroOps = 4; } +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + //===----------------------------------------------------------------------===// // Define 5 micro-op types @@ -289,6 +311,13 @@ def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, let NumMicroOps = 6; } +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + //===----------------------------------------------------------------------===// // Define 8 micro-op types diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index d7bbc2bcd22c..4dbcc9581a84 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2473,16 +2473,14 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } - auto DB = AArch64DB::lookupDBByName(Tok.getString()); - if (!DB) { - TokError("invalid barrier option name"); - return MatchOperand_ParseFail; - } - // The only valid named option for ISB is 'sy' - if (Mnemonic == "isb" && DB->Encoding != AArch64DB::sy) { + auto DB = AArch64DB::lookupDBByName(Tok.getString()); + if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) { TokError("'sy' or #imm operand expected"); return MatchOperand_ParseFail; + } else if (!DB) { + TokError("invalid barrier option name"); + return MatchOperand_ParseFail; } Operands.push_back(AArch64Operand::CreateBarrier( diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 41ae70f85e58..fc89657bffd3 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" @@ -275,6 +276,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } + if (Opcode == AArch64::CompilerBarrier) { + O << '\t' << MAI.getCommentString() << " COMPILER BARRIER"; + printAnnotation(O, Annot); + return; + } + if (!printAliasInstr(MI, STI, O)) printInstruction(MI, STI, O); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 62dfa59483eb..33698d2b8c38 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -565,6 +565,9 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup)); return; + } else if (MI.getOpcode() == AArch64::CompilerBarrier) { + // This just prevents the compiler from reordering accesses, no actual code. + return; } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 2c7a2d8962d0..0f331486d0f8 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -406,7 +406,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode + FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, + FeatureFastFMAF32 ] >; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 318de7f2e3d2..f5110857da84 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -116,8 +116,11 @@ private: bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; - bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, - SDValue &SOffset, SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr, + SDValue &SOffset, SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + SDValue &Offset) const; + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; @@ -150,14 +153,12 @@ private: bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Omod) const; bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; @@ -953,8 +954,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, return true; } +static bool isLegalMUBUFImmOffset(unsigned Imm) { + return isUInt<12>(Imm); +} + static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { - return isUInt<12>(Imm->getZExtValue()); + return isLegalMUBUFImmOffset(Imm->getZExtValue()); } bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, @@ -1076,9 +1081,9 @@ SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { return N; } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, - SDValue &VAddr, SDValue &SOffset, - SDValue &ImmOffset) const { +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, + SDValue &VAddr, SDValue &SOffset, + SDValue &ImmOffset) const { SDLoc DL(Addr); MachineFunction &MF = CurDAG->getMachineFunction(); @@ -1087,8 +1092,22 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); - // (add n0, c1) + if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { + unsigned Imm = CAddr->getZExtValue(); + assert(!isLegalMUBUFImmOffset(Imm) && + "should have been selected by other pattern"); + + SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); + MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + DL, MVT::i32, HighBits); + VAddr = SDValue(MovHighBits, 0); + ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); + return true; + } + if (CurDAG->isBaseWithConstantOffset(Addr)) { + // (add n0, c1) + SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); @@ -1107,6 +1126,24 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, return true; } +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr, + SDValue &SRsrc, + SDValue &SOffset, + SDValue &Offset) const { + ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); + if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) + return false; + + SDLoc DL(Addr); + MachineFunction &MF = CurDAG->getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + + SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); + SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); + Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); + return true; +} + bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, @@ -1628,38 +1665,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, return isNoNanSrc(Src); } -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - bool Res = SelectVOP3Mods(In, Src, SrcMods); - return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { + if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) + return false; + + Src = In; + return true; } bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); - - return SelectVOP3Mods(In, Src, SrcMods); -} - -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, - SDValue &SrcMods, SDValue &Clamp, - SDValue &Omod) const { - bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); - - return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && - cast<ConstantSDNode>(Clamp)->isNullValue() && - cast<ConstantSDNode>(Omod)->isNullValue(); -} - -bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, - SDValue &SrcMods, - SDValue &Omod) const { - // FIXME: Handle Omod - Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return SelectVOP3Mods(In, Src, SrcMods); } @@ -1677,9 +1696,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, Src = In; SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return true; } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index c0f336e082bd..e21775e61dd4 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2315,12 +2315,13 @@ static bool simplifyI24(SDNode *Node24, unsigned OpIdx, SelectionDAG &DAG = DCI.DAG; SDValue Op = Node24->getOperand(OpIdx); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = Op.getValueType(); APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, true, true); - if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI)) + if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO)) return true; return false; @@ -3361,7 +3362,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); @@ -3436,6 +3437,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ELSE) NODE_NAME_CASE(LOOP) NODE_NAME_CASE(CALL) + NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index d6aa0ba92bf7..13cbfe267932 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -231,6 +231,10 @@ public: AMDGPUAS getAMDGPUAS() const { return AMDGPUASI; } + + MVT getFenceOperandTy(const DataLayout &DL) const override { + return MVT::i32; + } }; namespace AMDGPUISD { @@ -244,6 +248,7 @@ enum NodeType : unsigned { // Function call. CALL, + TRAP, // Masked control flow nodes. IF, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 56f060984f08..c1706d12a2ea 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -78,6 +78,11 @@ def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; +def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", + SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, + [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] +>; + def AMDGPUconstdata_ptr : SDNode< "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<0, iPTR>]> diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index b8d681298dee..4e688ab0b105 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -50,6 +50,16 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; +def u16ImmTarget : AsmOperandClass { + let Name = "U16Imm"; + let RenderMethod = "addImmOperands"; +} + +def s16ImmTarget : AsmOperandClass { + let Name = "S16Imm"; + let RenderMethod = "addImmOperands"; +} + let OperandType = "OPERAND_IMMEDIATE" in { def u32imm : Operand<i32> { @@ -58,6 +68,12 @@ def u32imm : Operand<i32> { def u16imm : Operand<i16> { let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = u16ImmTarget; +} + +def s16imm : Operand<i16> { + let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = s16ImmTarget; } def u8imm : Operand<i8> { diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 14ee1c81f8fa..da247fea7de6 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -225,6 +225,12 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { + if (isVerbose()) + OutStreamer->emitRawComment(" divergent unreachable"); + return; + } + MCInst TmpInst; MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 961f7186f373..70c848f3c7bd 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -479,6 +479,8 @@ public: bool isSMRDLiteralOffset() const; bool isDPPCtrl() const; bool isGPRIdxMode() const; + bool isS16Imm() const; + bool isU16Imm() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -2836,6 +2838,28 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // s_waitcnt //===----------------------------------------------------------------------===// +static bool +encodeCnt( + const AMDGPU::IsaInfo::IsaVersion ISA, + int64_t &IntVal, + int64_t CntVal, + bool Saturate, + unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) +{ + bool Failed = false; + + IntVal = encode(ISA, IntVal, CntVal); + if (CntVal != decode(ISA, IntVal)) { + if (Saturate) { + IntVal = encode(ISA, IntVal, -1); + } else { + Failed = true; + } + } + return Failed; +} + bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { StringRef CntName = Parser.getTok().getString(); int64_t CntVal; @@ -2851,25 +2875,35 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getParser().parseAbsoluteExpression(CntVal)) return true; - if (getLexer().isNot(AsmToken::RParen)) - return true; - - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) - Parser.Lex(); - AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); - if (CntName == "vmcnt") - IntVal = encodeVmcnt(ISA, IntVal, CntVal); - else if (CntName == "expcnt") - IntVal = encodeExpcnt(ISA, IntVal, CntVal); - else if (CntName == "lgkmcnt") - IntVal = encodeLgkmcnt(ISA, IntVal, CntVal); - else - return true; - return false; + bool Failed = true; + bool Sat = CntName.endswith("_sat"); + + if (CntName == "vmcnt" || CntName == "vmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); + } else if (CntName == "expcnt" || CntName == "expcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); + } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); + } + + // To improve diagnostics, do not skip delimiters on errors + if (!Failed) { + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); + } + } + } + + return Failed; } OperandMatchResultTy @@ -3858,6 +3892,14 @@ bool AMDGPUOperand::isGPRIdxMode() const { return isImm() && isUInt<4>(getImm()); } +bool AMDGPUOperand::isS16Imm() const { + return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); +} + +bool AMDGPUOperand::isU16Imm() const { + return isImm() && isUInt<16>(getImm()); +} + OperandMatchResultTy AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index a6609f0725ab..89eddb9ce961 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -11,7 +11,9 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">; def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">; def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; -def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">; +def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen">; +def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [], 20>; + def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; @@ -958,21 +960,30 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>; } // End Predicates = [Has16BitInsts] -class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat < - (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, - i32:$soffset, u16imm:$offset))), - (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; +multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen, + MUBUF_Pseudo InstrOffset, + ValueType vt, PatFrag ld> { + def : Pat < + (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset))), + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + >; + + def : Pat < + (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) + >; +} -def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i16, sextloadi8_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i16, extloadi8_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>; -def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>; // BUFFER_LOAD_DWORD*, addr64=0 multiclass MUBUF_Load_Dword <ValueType vt, @@ -1054,19 +1065,29 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>; defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, global_store>; -class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat < - (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, - u16imm:$offset)), - (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; +multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, + MUBUF_Pseudo InstrOffset, + ValueType vt, PatFrag st> { + def : Pat < + (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset)), + (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + >; + + def : Pat < + (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, + u16imm:$offset)), + (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) + >; +} -def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>; -def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>; -def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i16, truncstorei8_private>; -def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i16, store_private>; -def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>; -def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>; -def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>; //===----------------------------------------------------------------------===// // MTBUF Patterns diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 4ecfa118fb27..bf16a8216001 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -83,8 +83,8 @@ unsigned GCNRegPressure::getRegKind(unsigned Reg, const auto RC = MRI.getRegClass(Reg); auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); return STI->isSGPRClass(RC) ? - (RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) : - (RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE); + (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) : + (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); } void GCNRegPressure::inc(unsigned Reg, diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 29a6ab9fbe93..647017d5061d 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -286,20 +286,20 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, return ValueKind::Pipe; return StringSwitch<ValueKind>(BaseTypeName) + .Case("image1d_t", ValueKind::Image) + .Case("image1d_array_t", ValueKind::Image) + .Case("image1d_buffer_t", ValueKind::Image) + .Case("image2d_t", ValueKind::Image) + .Case("image2d_array_t", ValueKind::Image) + .Case("image2d_array_depth_t", ValueKind::Image) + .Case("image2d_array_msaa_t", ValueKind::Image) + .Case("image2d_array_msaa_depth_t", ValueKind::Image) + .Case("image2d_depth_t", ValueKind::Image) + .Case("image2d_msaa_t", ValueKind::Image) + .Case("image2d_msaa_depth_t", ValueKind::Image) + .Case("image3d_t", ValueKind::Image) .Case("sampler_t", ValueKind::Sampler) .Case("queue_t", ValueKind::Queue) - .Cases("image1d_t", - "image1d_array_t", - "image1d_buffer_t", - "image2d_t" , - "image2d_array_t", - "image2d_array_depth_t", - "image2d_array_msaa_t" - "image2d_array_msaa_depth_t" - "image2d_depth_t", - "image2d_msaa_t", - "image2d_msaa_depth_t", - "image3d_t", ValueKind::Image) .Default(isa<PointerType>(Ty) ? (Ty->getPointerAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ? diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 6c61fb1f2d6b..2364e7b7b5fb 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -15,6 +15,7 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4; + StackGrowsUp = true; HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MinInstAlignment = 4; diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index f9d258f44a62..b0f0bf04a891 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -81,6 +81,11 @@ using namespace llvm; #define DEBUG_TYPE "si-fix-sgpr-copies" +static cl::opt<bool> EnableM0Merge( + "amdgpu-enable-merge-m0", + cl::desc("Merge and hoist M0 initializations"), + cl::init(false)); + namespace { class SIFixSGPRCopies : public MachineFunctionPass { @@ -108,7 +113,7 @@ public: INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) @@ -332,27 +337,186 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, return true; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - DenseSet<MachineBasicBlock*> Visited; +template <class UnaryPredicate> +bool searchPredecessors(const MachineBasicBlock *MBB, + const MachineBasicBlock *CutOff, + UnaryPredicate Predicate) { + + if (MBB == CutOff) + return false; + + DenseSet<const MachineBasicBlock*> Visited; SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(), MBB->pred_end()); while (!Worklist.empty()) { - MachineBasicBlock *mbb = Worklist.back(); - Worklist.pop_back(); + MachineBasicBlock *MBB = Worklist.pop_back_val(); - if (!Visited.insert(mbb).second) + if (!Visited.insert(MBB).second) continue; - if (hasTerminatorThatModifiesExec(*mbb, *TRI)) + if (MBB == CutOff) + continue; + if (Predicate(MBB)) return true; - Worklist.insert(Worklist.end(), mbb->pred_begin(), mbb->pred_end()); + Worklist.append(MBB->pred_begin(), MBB->pred_end()); } return false; } +static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI) { + return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { + return hasTerminatorThatModifiesExec(*MBB, *TRI); }); +} + +// Checks if there is potential path From instruction To instruction. +// If CutOff is specified and it sits in between of that path we ignore +// a higher portion of the path and report it is not reachable. +static bool isReachable(const MachineInstr *From, + const MachineInstr *To, + const MachineBasicBlock *CutOff, + MachineDominatorTree &MDT) { + // If either From block dominates To block or instructions are in the same + // block and From is higher. + if (MDT.dominates(From, To)) + return true; + + const MachineBasicBlock *MBBFrom = From->getParent(); + const MachineBasicBlock *MBBTo = To->getParent(); + if (MBBFrom == MBBTo) + return false; + + // Instructions are in different blocks, do predecessor search. + // We should almost never get here since we do not usually produce M0 stores + // other than -1. + return searchPredecessors(MBBTo, CutOff, [MBBFrom] + (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); +} + +// Hoist and merge identical SGPR initializations into a common predecessor. +// This is intended to combine M0 initializations, but can work with any +// SGPR. A VGPR cannot be processed since we cannot guarantee vector +// executioon. +static bool hoistAndMergeSGPRInits(unsigned Reg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT) { + // List of inits by immediate value. + typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap; + InitListMap Inits; + // List of clobbering instructions. + SmallVector<MachineInstr*, 8> Clobbers; + bool Changed = false; + + for (auto &MI : MRI.def_instructions(Reg)) { + MachineOperand *Imm = nullptr; + for (auto &MO: MI.operands()) { + if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || + (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { + Imm = nullptr; + break; + } else if (MO.isImm()) + Imm = &MO; + } + if (Imm) + Inits[Imm->getImm()].push_front(&MI); + else + Clobbers.push_back(&MI); + } + + for (auto &Init : Inits) { + auto &Defs = Init.second; + + for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) { + MachineInstr *MI1 = *I1; + + for (auto I2 = std::next(I1); I2 != E; ) { + MachineInstr *MI2 = *I2; + + // Check any possible interference + auto intereferes = [&](MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To) -> bool { + + assert(MDT.dominates(&*To, &*From)); + + auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool { + const MachineBasicBlock *MBBFrom = From->getParent(); + const MachineBasicBlock *MBBTo = To->getParent(); + bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT); + bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT); + if (!MayClobberFrom && !MayClobberTo) + return false; + if ((MayClobberFrom && !MayClobberTo) || + (!MayClobberFrom && MayClobberTo)) + return true; + // Both can clobber, this is not an interference only if both are + // dominated by Clobber and belong to the same block or if Clobber + // properly dominates To, given that To >> From, so it dominates + // both and located in a common dominator. + return !((MBBFrom == MBBTo && + MDT.dominates(Clobber, &*From) && + MDT.dominates(Clobber, &*To)) || + MDT.properlyDominates(Clobber->getParent(), MBBTo)); + }; + + return (any_of(Clobbers, interferes)) || + (any_of(Inits, [&](InitListMap::value_type &C) { + return C.first != Init.first && any_of(C.second, interferes); + })); + }; + + if (MDT.dominates(MI1, MI2)) { + if (!intereferes(MI2, MI1)) { + DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber() + << " " << *MI2); + MI2->eraseFromParent(); + Defs.erase(I2++); + Changed = true; + continue; + } + } else if (MDT.dominates(MI2, MI1)) { + if (!intereferes(MI1, MI2)) { + DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() + << " " << *MI1); + MI1->eraseFromParent(); + Defs.erase(I1++); + Changed = true; + break; + } + } else { + auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(), + MI2->getParent()); + if (!MBB) { + ++I2; + continue; + } + + MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); + if (!intereferes(MI1, I) && !intereferes(MI2, I)) { + DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() + << " " << *MI1 << "and moving from BB#" + << MI2->getParent()->getNumber() << " to BB#" + << I->getParent()->getNumber() << " " << *MI2); + I->getParent()->splice(I, MI2->getParent(), MI2); + MI1->eraseFromParent(); + Defs.erase(I1++); + Changed = true; + break; + } + } + ++I2; + } + ++I1; + } + } + + if (Changed) + MRI.clearKillFlags(Reg); + + return Changed; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -485,5 +649,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } } + if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) + hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT); + return true; } diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index abe6af9a6d3f..86e3b37b09e9 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -101,10 +101,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); // We need to insert initialization of the scratch resource descriptor. unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); - if (ScratchRsrcReg == AMDGPU::NoRegister) + if (ScratchRsrcReg == AMDGPU::NoRegister || + !MRI.isPhysRegUsed(ScratchRsrcReg)) return AMDGPU::NoRegister; if (ST.hasSGPRInitBug() || @@ -122,8 +124,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( // We find the resource first because it has an alignment requirement. - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF); AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); @@ -143,24 +143,34 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( return ScratchRsrcReg; } -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +// Shift down registers reserved for the scratch wave offset and stack pointer +// SGPRs. +std::pair<unsigned, unsigned> +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const SISubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); - if (ST.hasSGPRInitBug() || - ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) - return ScratchWaveOffsetReg; - unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); - MachineRegisterInfo &MRI = MF.getRegInfo(); + // No replacement necessary. + if (ScratchWaveOffsetReg == AMDGPU::NoRegister || + !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { + assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister); + return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); + } + + unsigned SPReg = MFI->getStackPtrOffsetReg(); + if (ST.hasSGPRInitBug()) + return std::make_pair(ScratchWaveOffsetReg, SPReg); + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, SPReg); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -175,26 +185,42 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // register from the list to consider, it means that when this // register is being used for the scratch wave offset and there // are no other free SGPRs, then the value will stay in this register. + // + 1 if stack pointer is used. // ---- - // 13 - if (AllSGPRs.size() < 13) - return ScratchWaveOffsetReg; + // 13 (+1) + unsigned ReservedRegCount = 13; + if (SPReg != AMDGPU::NoRegister) + ++ReservedRegCount; - for (MCPhysReg Reg : AllSGPRs.drop_back(13)) { + if (AllSGPRs.size() < ReservedRegCount) + return std::make_pair(ScratchWaveOffsetReg, SPReg); + + bool HandledScratchWaveOffsetReg = + ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + + for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the // scratch descriptor, since we haven’t added its uses yet. - if (!MRI.isPhysRegUsed(Reg)) { - if (!MRI.isAllocatable(Reg) || - TRI->isSubRegisterEq(ScratchRsrcReg, Reg)) - continue; + if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { + if (!HandledScratchWaveOffsetReg) { + HandledScratchWaveOffsetReg = true; - MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); - MFI->setScratchWaveOffsetReg(Reg); - return Reg; + MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); + MFI->setScratchWaveOffsetReg(Reg); + ScratchWaveOffsetReg = Reg; + } else { + if (SPReg == AMDGPU::NoRegister) + break; + + MRI.replaceRegWith(SPReg, Reg); + MFI->setStackPtrOffsetReg(Reg); + SPReg = Reg; + break; + } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, SPReg); } void SIFrameLowering::emitPrologue(MachineFunction &MF, @@ -220,18 +246,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned ScratchRsrcReg - = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg - = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); - - if (ScratchRsrcReg == AMDGPU::NoRegister) { - assert(ScratchWaveOffsetReg == AMDGPU::NoRegister); - return; - } - - assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); - // We need to do the replacement of the private segment buffer and wave offset // register even if there are no stack objects. There could be stores to undef // or a constant without an associated object. @@ -244,19 +258,49 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) emitFlatScratchInit(ST, MF, MBB); + unsigned SPReg = MFI->getStackPtrOffsetReg(); + if (SPReg != AMDGPU::NoRegister) { + DebugLoc DL; + int64_t StackSize = MF.getFrameInfo().getStackSize(); + + if (StackSize == 0) { + BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) + .addReg(MFI->getScratchWaveOffsetReg()); + } else { + BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg) + .addReg(MFI->getScratchWaveOffsetReg()) + .addImm(StackSize * ST.getWavefrontSize()); + } + } + + unsigned ScratchRsrcReg + = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); + + unsigned ScratchWaveOffsetReg; + std::tie(ScratchWaveOffsetReg, SPReg) + = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); + + // It's possible to have uses of only ScratchWaveOffsetReg without + // ScratchRsrcReg if it's only used for the initialization of flat_scratch, + // but the inverse is not true. + if (ScratchWaveOffsetReg == AMDGPU::NoRegister) { + assert(ScratchRsrcReg == AMDGPU::NoRegister); + return; + } + // We need to insert initialization of the scratch resource descriptor. unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); - unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } - bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg); - bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg); + bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg); + bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister && + MRI.isPhysRegUsed(ScratchRsrcReg); // We added live-ins during argument lowering, but since they were not used // they were deleted. We're adding the uses now, so add them back. @@ -469,7 +513,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // this also ensures we shouldn't need a register for the offset when // emergency scavenging. int ScavengeFI = MFI.CreateFixedObject( - AMDGPU::SGPR_32RegClass.getSize(), 0, false); + TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); RS->addScavengingFrameIndex(ScavengeFI); } } diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index 1bfc08093da2..7ccd02b3c86a 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -49,7 +49,7 @@ private: SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg( const SISubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index dd867b15b4c7..ce74a7cd8b04 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -287,8 +287,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Custom); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -1644,7 +1644,7 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset) { - int NumElts = SuperRC->getSize() / 4; + int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32; // Skip out of bounds offsets, or else we would end up using an undefined // register. @@ -1793,17 +1793,18 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, return LoopBB; } -static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) { - switch (VecRC->getSize()) { - case 4: +static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI, + const TargetRegisterClass *VecRC) { + switch (TRI.getRegSizeInBits(*VecRC)) { + case 32: // 4 bytes return AMDGPU::V_MOVRELD_B32_V1; - case 8: + case 64: // 8 bytes return AMDGPU::V_MOVRELD_B32_V2; - case 16: + case 128: // 16 bytes return AMDGPU::V_MOVRELD_B32_V4; - case 32: + case 256: // 32 bytes return AMDGPU::V_MOVRELD_B32_V8; - case 64: + case 512: // 64 bytes return AMDGPU::V_MOVRELD_B32_V16; default: llvm_unreachable("unsupported size for MOVRELD pseudos"); @@ -1863,7 +1864,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC)); + const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); BuildMI(MBB, I, DL, MovRelDesc) .addReg(Dst, RegState::Define) @@ -1907,7 +1908,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, .addReg(PhiReg, RegState::Implicit) .addReg(AMDGPU::M0, RegState::Implicit); } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC)); + const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); BuildMI(*LoopBB, InsPt, DL, MovRelDesc) .addReg(Dst, RegState::Define) @@ -1948,50 +1949,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } switch (MI.getOpcode()) { - case AMDGPU::S_TRAP_PSEUDO: { - const DebugLoc &DL = MI.getDebugLoc(); - const int TrapType = MI.getOperand(0).getImm(); - - if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && - Subtarget->isTrapHandlerEnabled()) { - - MachineFunction *MF = BB->getParent(); - SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - - if (!BB->isLiveIn(UserSGPR)) - BB->addLiveIn(UserSGPR); - - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) - .addReg(UserSGPR); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)) - .addImm(TrapType) - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); - } else { - switch (TrapType) { - case SISubtarget::TrapIDLLVMTrap: - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM)); - break; - case SISubtarget::TrapIDLLVMDebugTrap: { - DiagnosticInfoUnsupported NoTrap(*MF->getFunction(), - "debugtrap handler not supported", - DL, - DS_Warning); - LLVMContext &C = MF->getFunction()->getContext(); - C.diagnose(NoTrap); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - break; - } - default: - llvm_unreachable("unsupported trap handler type!"); - } - } - - MI.eraseFromParent(); - return BB; - } case AMDGPU::SI_INIT_M0: BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) @@ -2163,6 +2120,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); + + case ISD::TRAP: + case ISD::DEBUGTRAP: + return lowerTRAP(Op, DAG); } return SDValue(); } @@ -2431,6 +2392,57 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; } +SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + + unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ? + SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap; + + if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && + Subtarget->isTrapHandlerEnabled()) { + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + + SDValue QueuePtr = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + + SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); + + SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, + QueuePtr, SDValue()); + + SDValue Ops[] = { + ToReg, + DAG.getTargetConstant(TrapID, SL, MVT::i16), + SGPR01, + ToReg.getValue(1) + }; + + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); + } + + switch (TrapID) { + case SISubtarget::TrapIDLLVMTrap: + return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); + case SISubtarget::TrapIDLLVMDebugTrap: { + DiagnosticInfoUnsupported NoTrap(*MF.getFunction(), + "debugtrap handler not supported", + Op.getDebugLoc(), + DS_Warning); + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.diagnose(NoTrap); + return Chain; + } + default: + llvm_unreachable("unsupported trap handler type!"); + } + + return Chain; +} + SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, SelectionDAG &DAG) const { // FIXME: Use inline constants (src_{shared, private}_base) instead. @@ -3410,9 +3422,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, EVT VT = Op.getValueType(); bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) + return SDValue(); + if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) { - if (Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || - VT == MVT::f16) { + if (Unsafe || VT == MVT::f32 || VT == MVT::f16) { if (CLHS->isExactlyValue(1.0)) { // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to // the CI documentation has a worst case error of 1 ulp. @@ -4696,7 +4710,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Src, Demanded) || + if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) || TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c2a3e62aa827..9122cd72d323 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -428,8 +428,8 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI, const MachineInstr &MIA = *MI; const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo); - unsigned Size = RC->getSize(); - Result.second = Result.first + (Size / 4); + unsigned Size = TRI->getRegSizeInBits(*RC); + Result.second = Result.first + (Size / 32); return Result; } diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 47257ce16ceb..9f32ecfa52ff 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -216,8 +216,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // XXX - What if this is a write into a super register? const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0); - unsigned Size = RC->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + unsigned Size = TRI->getRegSizeInBits(*RC); + Result.Named.LGKM = Size > 32 ? 2 : 1; } else { // s_dcache_inv etc. do not have a a destination register. Assume we // want a wait on these. @@ -289,12 +289,12 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC, const MachineOperand &Reg) const { - unsigned Size = RC->getSize(); - assert(Size >= 4); + unsigned Size = TRI->getRegSizeInBits(*RC); + assert(Size >= 32); RegInterval Result; Result.first = TRI->getEncodingValue(Reg.getReg()); - Result.second = Result.first + Size / 4; + Result.second = Result.first + Size / 32; return Result; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 05ac67d26620..92e452a3d6a0 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -138,6 +138,11 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, } if (isSMRD(Opc0) && isSMRD(Opc1)) { + // Skip time and cache invalidation instructions. + if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || + AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) + return false; + assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); // Check base reg. @@ -245,11 +250,11 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, unsigned EltSize; if (LdSt.mayLoad()) - EltSize = getOpRegClass(LdSt, 0)->getSize() / 2; + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; else { assert(LdSt.mayStore()); int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); - EltSize = getOpRegClass(LdSt, Data0Idx)->getSize(); + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; } if (isStride64(Opc)) @@ -345,7 +350,7 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, FirstLdSt.getParent()->getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); - return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; + return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold; } static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, @@ -433,7 +438,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (RI.isSGPRClass(RC)) { - if (RC->getSize() > 4) { + if (RI.getRegSizeInBits(*RC) > 32) { Opcode = AMDGPU::S_MOV_B64; EltSize = 8; } else { @@ -493,11 +498,11 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const { unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { - if (DstRC->getSize() == 4) { + if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { + } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { return AMDGPU::S_MOV_B64; - } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { + } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { return AMDGPU::V_MOV_B64_PSEUDO; } return AMDGPU::COPY; @@ -557,17 +562,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); + unsigned SpillSize = TRI->getSpillSize(*RC); if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. - const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize())); + const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } @@ -602,7 +608,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); + unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(isKill)) // data @@ -660,6 +666,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, DebugLoc DL = MBB.findDebugLoc(MI); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); unsigned Size = FrameInfo.getObjectSize(FrameIndex); + unsigned SpillSize = TRI->getSpillSize(*RC); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, FrameIndex); @@ -670,8 +677,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. - const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize())); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { + const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); + if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } @@ -701,7 +708,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); + unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc @@ -1440,9 +1447,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); - unsigned DstSize = DstRC->getSize(); + unsigned DstSize = RI.getRegSizeInBits(*DstRC); - if (DstSize == 4) { + if (DstSize == 32) { unsigned SelOp = Pred == SCC_TRUE ? AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32; @@ -1456,7 +1463,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, return; } - if (DstSize == 8 && Pred == SCC_TRUE) { + if (DstSize == 64 && Pred == SCC_TRUE) { MachineInstr *Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) .addReg(FalseReg) @@ -1483,7 +1490,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; const int16_t *SubIndices = Sub0_15; - int NElts = DstSize / 4; + int NElts = DstSize / 32; // 64-bit select is only avaialble for SALU. if (Pred == SCC_TRUE) { @@ -2635,6 +2642,19 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) return; + // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for + // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane + // select is uniform. + if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && + RI.isVGPR(MRI, Src1.getReg())) { + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src1); + Src1.ChangeToRegister(Reg, false); + return; + } + // We do not use commuteInstruction here because it is too aggressive and will // commute if it is possible. We only want to commute here if it improves // legality. This can be called a fairly large number of times so don't waste @@ -2729,7 +2749,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); unsigned DstReg = MRI.createVirtualRegister(SRC); - unsigned SubRegs = VRC->getSize() / 4; + unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; SmallVector<unsigned, 8> SRegs; for (unsigned i = 0; i < SubRegs; ++i) { @@ -3595,7 +3615,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist, .addImm(16) .add(Src0); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) - .addImm(0xffff); + .addImm(0xffff0000); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg) .add(Src1) .addReg(ImmReg, RegState::Kill) diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 659473ca6a47..03a5ef74b179 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -626,13 +626,13 @@ public: return 4; } - return RI.getRegClass(OpInfo.RegClass)->getSize(); + return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; } /// \brief This form should usually be preferred since it handles operands /// with unknown register classes. unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { - return getOpRegClass(MI, OpNo)->getSize(); + return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; } /// \returns true if it is legal for the operand at index \p OpNo diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index c6daf743f3ac..7b052844f177 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -646,11 +646,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">; def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">; def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; -def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">; def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">; def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">; def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; -def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">; +def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 2f89503e129a..3f6ddec70479 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -94,6 +94,12 @@ defm V_INTERP_MOV_F32 : VINTRP_m < //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// +def ATOMIC_FENCE : SPseudoInstSI< + (outs), (ins i32imm:$ordering, i32imm:$scope), + [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))], + "ATOMIC_FENCE $ordering, $scope"> { + let hasSideEffects = 1; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { @@ -111,12 +117,6 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)>; } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] -def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> { - let hasSideEffects = 1; - let SALU = 1; - let usesCustomInserter = 1; -} - let usesCustomInserter = 1, SALU = 1 in { def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins), [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>; @@ -400,13 +400,8 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < let Predicates = [isGCN] in { def : Pat< - (trap), - (S_TRAP_PSEUDO TRAPID.LLVM_TRAP) ->; - -def : Pat< - (debugtrap), - (S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP) + (AMDGPUtrap timm:$trapid), + (S_TRAP $trapid) >; def : Pat< @@ -477,8 +472,8 @@ def : Pat < // fp_to_fp16 patterns def : Pat < - (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))), - (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod) + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < @@ -507,11 +502,11 @@ def : Pat < multiclass FMADPat <ValueType vt, Instruction inst> { def : Pat < - (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3NoMods vt:$src1, i32:$src1_modifiers), - (VOP3NoMods vt:$src2, i32:$src2_modifiers))), - (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - $src2_modifiers, $src2, $clamp, $omod) + (vt (fmad (VOP3NoMods vt:$src0), + (VOP3NoMods vt:$src1), + (VOP3NoMods vt:$src2))), + (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; } @@ -681,10 +676,9 @@ def : BitConvert <v16f32, v16i32, VReg_512>; // If denormals are not enabled, it only impacts the compare of the // inputs. The output result is not flushed. class ClampPat<Instruction inst, ValueType vt> : Pat < - (vt (AMDGPUclamp - (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))), + (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), (inst i32:$src0_modifiers, vt:$src0, - i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod) + i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) >; def : ClampPat<V_MAX_F32_e64, f32>; diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 8e612d2ddfda..b6a982aee6be 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -25,6 +25,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) TIDReg(AMDGPU::NoRegister), ScratchRSrcReg(AMDGPU::NoRegister), ScratchWaveOffsetReg(AMDGPU::NoRegister), + FrameOffsetReg(AMDGPU::NoRegister), + StackPtrOffsetReg(AMDGPU::NoRegister), PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), DispatchPtrUserSGPR(AMDGPU::NoRegister), QueuePtrUserSGPR(AMDGPU::NoRegister), diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 810fb05984c4..dc9f509e60ae 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -88,6 +88,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned ScratchRSrcReg; unsigned ScratchWaveOffsetReg; + // This is the current function's incremented size from the kernel's scratch + // wave offset register. For an entry function, this is exactly the same as + // the ScratchWaveOffsetReg. + unsigned FrameOffsetReg; + + // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. + unsigned StackPtrOffsetReg; + // Input registers for non-HSA ABI unsigned PrivateMemoryPtrUserSGPR; @@ -364,9 +372,25 @@ public: return ScratchWaveOffsetReg; } + unsigned getFrameOffsetReg() const { + return FrameOffsetReg; + } + + void setStackPtrOffsetReg(unsigned Reg) { + assert(Reg != AMDGPU::NoRegister && "Should never be unset"); + StackPtrOffsetReg = Reg; + } + + unsigned getStackPtrOffsetReg() const { + return StackPtrOffsetReg; + } + void setScratchWaveOffsetReg(unsigned Reg) { assert(Reg != AMDGPU::NoRegister && "Should never be unset"); ScratchWaveOffsetReg = Reg; + + // FIXME: Only for entry functions. + FrameOffsetReg = ScratchWaveOffsetReg; } unsigned getQueuePtrUserSGPR() const { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 098c67252dd8..8820e294562b 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -146,6 +146,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::EXEC); reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); + // M0 has to be reserved so that llvm accepts it as a live-in into a block. + reserveRegisterTuples(Reserved, AMDGPU::M0); + // Reserve the memory aperture registers. reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); @@ -615,7 +618,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. - std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true); + std::tie(EltSize, ScalarStoreOp) = + getSpillEltSize(getRegSizeInBits(*RC) / 8, true); } ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); @@ -775,7 +779,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. - std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false); + std::tie(EltSize, ScalarLoadOp) = + getSpillEltSize(getRegSizeInBits(*RC) / 8, false); } ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); @@ -1038,20 +1043,21 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { // TODO: It might be helpful to have some target specific flags in // TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { - switch (RC->getSize()) { - case 0: return false; - case 1: return false; - case 4: + unsigned Size = getRegSizeInBits(*RC); + if (Size < 32) + return false; + switch (Size) { + case 32: return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; - case 8: + case 64: return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; - case 12: + case 96: return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; - case 16: + case 128: return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; - case 32: + case 256: return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; - case 64: + case 512: return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; default: llvm_unreachable("Invalid register class size"); @@ -1060,18 +1066,18 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SRC) const { - switch (SRC->getSize()) { - case 4: + switch (getRegSizeInBits(*SRC)) { + case 32: return &AMDGPU::VGPR_32RegClass; - case 8: + case 64: return &AMDGPU::VReg_64RegClass; - case 12: + case 96: return &AMDGPU::VReg_96RegClass; - case 16: + case 128: return &AMDGPU::VReg_128RegClass; - case 32: + case 256: return &AMDGPU::VReg_256RegClass; - case 64: + case 512: return &AMDGPU::VReg_512RegClass; default: llvm_unreachable("Invalid register class size"); @@ -1080,16 +1086,16 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( const TargetRegisterClass *VRC) const { - switch (VRC->getSize()) { - case 4: + switch (getRegSizeInBits(*VRC)) { + case 32: return &AMDGPU::SGPR_32RegClass; - case 8: + case 64: return &AMDGPU::SReg_64RegClass; - case 16: + case 128: return &AMDGPU::SReg_128RegClass; - case 32: + case 256: return &AMDGPU::SReg_256RegClass; - case 64: + case 512: return &AMDGPU::SReg_512RegClass; default: llvm_unreachable("Invalid register class size"); @@ -1354,15 +1360,15 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC) const { - unsigned SrcSize = SrcRC->getSize(); - unsigned DstSize = DstRC->getSize(); - unsigned NewSize = NewRC->getSize(); + unsigned SrcSize = getRegSizeInBits(*SrcRC); + unsigned DstSize = getRegSizeInBits(*DstRC); + unsigned NewSize = getRegSizeInBits(*NewRC); // Do not increase size of registers beyond dword, we would need to allocate // adjacent registers and constraint regalloc more than needed. // Always allow dword coalescing. - if (SrcSize <= 4 || DstSize <= 4) + if (SrcSize <= 32 || DstSize <= 32) return true; return NewSize <= DstSize || NewSize <= SrcSize; diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index b4adbdd1df07..593439c2a3cd 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -530,14 +530,16 @@ class SOPKInstTable <bit is_sopk, string cmpOp = ""> { class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), - (ins u16imm:$simm16), + (ins s16imm:$simm16), "$sdst, $simm16", pattern>; -class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo < +class SOPK_SCC <string opName, string base_op, bit isSignExt> : SOPK_Pseudo < opName, (outs), - (ins SReg_32:$sdst, u16imm:$simm16), + !if(isSignExt, + (ins SReg_32:$sdst, s16imm:$simm16), + (ins SReg_32:$sdst, u16imm:$simm16)), "$sdst, $simm16", []>, SOPKInstTable<1, base_op>{ let Defs = [SCC]; @@ -546,7 +548,7 @@ class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo < class SOPK_32TIE <string opName, list<dag> pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), - (ins SReg_32:$src0, u16imm:$simm16), + (ins SReg_32:$src0, s16imm:$simm16), "$sdst, $simm16", pattern >; @@ -575,20 +577,20 @@ let isCompare = 1 in { // [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] // >; -def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">; -def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">; -def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">; -def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">; -def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">; -def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">; +def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32", 1>; +def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32", 1>; +def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32", 1>; +def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32", 1>; +def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32", 1>; +def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32", 1>; let SOPKZext = 1 in { -def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">; -def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">; -def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">; -def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">; -def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">; -def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">; +def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32", 0>; +def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32", 0>; +def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32", 0>; +def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32", 0>; +def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32", 0>; +def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32", 0>; } // End SOPKZext = 1 } // End isCompare = 1 @@ -600,7 +602,7 @@ let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", def S_CBRANCH_I_FORK : SOPK_Pseudo < "s_cbranch_i_fork", - (outs), (ins SReg_64:$sdst, u16imm:$simm16), + (outs), (ins SReg_64:$sdst, s16imm:$simm16), "$sdst, $simm16" >; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 86095a8e1142..5a3242bed1d0 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -93,6 +93,12 @@ unsigned getVmcntBitWidthHi() { return 2; } } // end namespace anonymous namespace llvm { + +static cl::opt<bool> EnablePackedInlinableLiterals( + "enable-packed-inlinable-literals", + cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), + cl::init(false)); + namespace AMDGPU { namespace IsaInfo { @@ -703,6 +709,9 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { assert(HasInv2Pi); + if (!EnablePackedInlinableLiterals) + return false; + int16_t Lo16 = static_cast<int16_t>(Literal); int16_t Hi16 = static_cast<int16_t>(Literal >> 16); return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 4f5711ca9a79..5c9d589e2625 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -905,7 +905,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::STRi12)) @@ -1103,7 +1103,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index faf1c631a3a7..28c407f74125 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -105,10 +105,6 @@ public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; - virtual void getNoopForElfTarget(MCInst &NopInst) const { - getNoopForMachoTarget(NopInst); - } - // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 70a44eaaceb8..a20887564f44 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -806,7 +806,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, if (!DstSubReg) return true; // Small registers don't frequently cause a problem, so we can coalesce them. - if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + if (getRegSizeInBits(*NewRC) < 256 && getRegSizeInBits(*DstRC) < 256 && + getRegSizeInBits(*SrcRC) < 256) return true; auto NewRCWeight = diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 94b317a8f986..13fb30767c9f 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -35,7 +35,8 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { EVT VT = TLI.getValueType(DL, T, true); - if (!VT.isSimple() || VT.isVector()) + if (!VT.isSimple() || VT.isVector() || + !(VT.isInteger() || VT.isFloatingPoint())) return false; unsigned VTSize = VT.getSimpleVT().getSizeInBits(); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e0aecff2633b..78a9144bd321 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -661,7 +661,6 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { return false; case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: - case MachineOperand::MO_Placeholder: llvm_unreachable("should not exist post-isel"); } llvm_unreachable("unhandled machine operand type"); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 70dbe1bc5b95..4f7a0ab4e220 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1960,10 +1960,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. assert(RS && "Register scavenging not provided"); - const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &RC = ARM::GPRRegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 165e9b7378c7..382f881f7741 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3358,8 +3358,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); + ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2)); + auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue()); + if (Scope == SynchronizationScope::SingleThread) + return Op; + if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get @@ -9476,8 +9480,11 @@ AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } - // Don't generate vpaddl+vmovn; we'll match it to vpadd later. - if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure + // we're using the entire input vector, otherwise there's a size/legality + // mismatch somewhere. + if (nextIndex != Vec.getValueType().getVectorNumElements() || + Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 3b3606ef462a..a0e2ac4cbc6f 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -32,8 +32,8 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoop(MCInst &NopInst) const { if (hasNOP()) { NopInst.setOpcode(ARM::HINT); NopInst.addOperand(MCOperand::createImm(0)); diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 4b1b7097b18d..c87fb97448c9 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -25,8 +25,8 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 703e8071b177..9d8ee5c3f9dc 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -5975,3 +5975,10 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } + +def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, + [(atomic_fence imm:$ordering, 0)]> { + let hasSideEffects = 1; + let Size = 0; + let AsmString = "@ COMPILER BARRIER"; +} diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index f2f426e86701..8048c758e998 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -953,7 +953,7 @@ let isAdd = 1 in { /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { - let isCommutable = 1 in + let isCommutable = 1, Uses = [CPSR] in def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm, @@ -1292,6 +1292,7 @@ def tSUBrr : // A8.6.212 /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { + let Uses = [CPSR] in def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 816596b85721..1c13d51a468e 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -47,12 +47,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, unsigned SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); (void)SrcSize; - assert((DstSize == SrcSize || - // Copies are a means to setup initial types, the number of - // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - DstSize <= SrcSize)) && - "Copy with different width?!"); + // We use copies for trunc, so it's ok for the size of the destination to be + // smaller (the higher bits will just be undefined). + assert(DstSize <= SrcSize && "Copy with different width?!"); assert((RegBank->getID() == ARM::GPRRegBankID || RegBank->getID() == ARM::FPRRegBankID) && @@ -294,6 +291,28 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } break; } + case G_TRUNC: { + // The high bits are undefined, so there's nothing special to do, just + // treat it as a copy. + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() << "G_TRUNC operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_TRUNC on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } case G_ADD: case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); @@ -313,6 +332,16 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; + case G_SDIV: + assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation"); + I.setDesc(TII.get(ARM::SDIV)); + MIB.add(predOps(ARMCC::AL)); + break; + case G_UDIV: + assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation"); + I.setDesc(TII.get(ARM::UDIV)); + MIB.add(predOps(ARMCC::AL)); + break; case G_FADD: if (!selectFAdd(MIB, TII, MRI)) return false; @@ -332,6 +361,18 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + + auto &Val = I.getOperand(1); + if (Val.isCImm()) { + if (Val.getCImm()->getBitWidth() > 32) + return false; + Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); + } + + if (!Val.isImm()) { + return false; + } + break; } case G_STORE: diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index fe9681439e6b..9b86030fdd29 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -13,6 +13,8 @@ #include "ARMLegalizerInfo.h" #include "ARMSubtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" @@ -47,6 +49,18 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (auto Ty : {s1, s8, s16, s32}) setAction({Op, Ty}, Legal); + for (unsigned Op : {G_SDIV, G_UDIV}) { + for (auto Ty : {s8, s16}) + // FIXME: We need WidenScalar here, but in the case of targets with + // software division we'll also need Libcall afterwards. Treat as Custom + // until we have better support for chaining legalization actions. + setAction({Op, Ty}, Custom); + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Legal); + else + setAction({Op, s32}, Libcall); + } + for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); for (auto Ty : {s1, s8, s16}) @@ -75,3 +89,48 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } + +bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + using namespace TargetOpcode; + + switch (MI.getOpcode()) { + default: + return false; + case G_SDIV: + case G_UDIV: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8)) + return false; + + // We need to widen to 32 bits and then maybe, if the target requires, + // transform into a libcall. + LegalizerHelper Helper(MIRBuilder.getMF()); + + MachineInstr *NewMI = nullptr; + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + // Store the new, 32-bit div instruction. + if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV) + NewMI = MI; + }); + + auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32)); + Helper.MIRBuilder.stopRecordingInsertions(); + if (Result == LegalizerHelper::UnableToLegalize) { + return false; + } + assert(NewMI && "Couldn't find widened instruction"); + assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) && + "Unexpected widened instruction"); + assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 && + "Unexpected type for the widened instruction"); + + Result = Helper.legalizeInstrStep(*NewMI); + if (Result == LegalizerHelper::UnableToLegalize) { + return false; + } + return true; + } + } +} diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h index 0b8a608a6bde..a9bdd367737e 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.h +++ b/lib/Target/ARM/ARMLegalizerInfo.h @@ -24,6 +24,9 @@ class ARMSubtarget; class ARMLegalizerInfo : public LegalizerInfo { public: ARMLegalizerInfo(const ARMSubtarget &ST); + + bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 0fd98268723a..9e9c1ba6c114 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -211,11 +211,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) .addImm(ARMCC::AL).addReg(0)); MCInst Noop; - Subtarget->getInstrInfo()->getNoopForElfTarget(Noop); + Subtarget->getInstrInfo()->getNoop(Noop); for (int8_t I = 0; I < NoopsInSledCount; I++) - { OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); - } OutStreamer->EmitLabel(Target); recordSled(CurSled, MI, Kind); diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index e47bd3a8963e..7325817d446b 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -221,8 +221,11 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_ADD: case G_SUB: case G_MUL: + case G_SDIV: + case G_UDIV: case G_SEXT: case G_ZEXT: + case G_TRUNC: case G_GEP: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 27bff4d75acf..0ebf55924647 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,8 +24,8 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tMOVr); NopInst.addOperand(MCOperand::createReg(ARM::R8)); NopInst.addOperand(MCOperand::createReg(ARM::R8)); diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 931914ad2799..e8d9a9c4ff14 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -25,8 +25,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 818ba85c7d40..2e2dfe035e26 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -32,8 +32,8 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tHINT); NopInst.addOperand(MCOperand::createImm(0)); NopInst.addOperand(MCOperand::createImm(ARMCC::AL)); diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 15d63300b6a2..c834ba73bfea 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -26,8 +26,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index 50bb50b44f27..d6491ce5c3bf 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -112,7 +112,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - unsigned BytesPerReg = TRI.getMinimalPhysRegClass(Reg)->getSize(); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + unsigned BytesPerReg = TRI.getRegSizeInBits(*RC) / 8; assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported."); unsigned RegIdx = ByteNumber / BytesPerReg; diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 13080a5d72f0..540e05a92997 100644 --- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -88,6 +88,9 @@ private: unsigned ArithOpcode, Block &MBB, BlockIt MBBI); + + /// Scavenges a free GPR8 register for use. + unsigned scavengeGPR8(MachineInstr &MI); }; char AVRExpandPseudo::ID = 0; @@ -577,24 +580,43 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned OpLo, OpHi, DstLoReg, DstHiReg; unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::LDRdPtr; OpHi = AVR::LDDRdPtrQ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same"); + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); + + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(CurDstLoReg, RegState::Define) .addReg(SrcReg); + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + + // Load high byte. auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(CurDstHiReg, RegState::Define) .addReg(SrcReg, getKillRegState(SrcIsKill)) .addImm(1); + if (TmpReg) { + // Move the high byte into the final destination. + buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); + + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); + } + MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -669,9 +691,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned OpLo, OpHi, DstLoReg, DstHiReg; unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); - bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::LDDRdPtrQ; OpHi = AVR::LDDRdPtrQ; @@ -679,60 +701,35 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) { assert(Imm <= 63 && "Offset is out of range"); - MachineInstr *MIBLO, *MIBHI; - - // HACK: We shouldn't have instances of this instruction - // where src==dest because the instruction itself is - // marked earlyclobber. We do however get this instruction when - // loading from stack slots where the earlyclobber isn't useful. - // - // In this case, just use a temporary register. - if (DstReg == SrcReg) { - RegScavenger RS; - - RS.enterBasicBlock(MBB); - RS.forward(MBBI); - - BitVector Candidates = - TRI->getAllocatableSet - (*MBB.getParent(), &AVR::GPR8RegClass); - - // Exclude all the registers being used by the instruction. - for (MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - Candidates.reset(MO.getReg()); - } - - BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); - Available &= Candidates; + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); - signed TmpReg = Available.find_first(); - assert(TmpReg != -1 && "ran out of registers"); + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; - MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(TmpReg, RegState::Define) - .addReg(SrcReg) - .addImm(Imm); + // Load low byte. + auto MIBLO = buildMI(MBB, MBBI, OpLo) + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg) + .addImm(Imm); - buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstLoReg).addReg(TmpReg); + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); - MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(TmpReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); + // Load high byte. + auto MIBHI = buildMI(MBB, MBBI, OpHi) + .addReg(CurDstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(Imm + 1); + if (TmpReg) { + // Move the high byte into the final destination. buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); - } else { - MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg) - .addImm(Imm); - MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); } MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -819,6 +816,32 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width, }); } +unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + RegScavenger RS; + + RS.enterBasicBlock(MBB); + RS.forward(MI); + + BitVector Candidates = + TRI->getAllocatableSet + (*MBB.getParent(), &AVR::GPR8RegClass); + + // Exclude all the registers being used by the instruction. + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + Candidates.reset(MO.getReg()); + } + + BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); + Available &= Candidates; + + signed Reg = Available.find_first(); + assert(Reg != -1 && "ran out of registers"); + return Reg; +} + template<> bool AVRExpandPseudo::expand<AVR::AtomicLoad8>(Block &MBB, BlockIt MBBI) { return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI); @@ -948,7 +971,6 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) { unsigned OpLo, OpHi, SrcLoReg, SrcHiReg; unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); - bool DstIsKill = MI.getOperand(0).isKill(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::STPtrRr; OpHi = AVR::STDPtrQRr; @@ -960,7 +982,7 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) { .addReg(SrcLoReg, getKillRegState(SrcIsKill)); auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg) .addImm(1) .addReg(SrcHiReg, getKillRegState(SrcIsKill)); diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp index b8cb2215ddb4..ab42a7aa9901 100644 --- a/lib/Target/AVR/AVRFrameLowering.cpp +++ b/lib/Target/AVR/AVRFrameLowering.cpp @@ -239,7 +239,7 @@ bool AVRFrameLowering::spillCalleeSavedRegisters( unsigned Reg = CSI[i - 1].getReg(); bool IsNotLiveIn = !MBB.isLiveIn(Reg); - assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 && + assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 && "Invalid register size"); // Add the callee-saved register as live-in only if it is not already a @@ -277,7 +277,7 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters( for (const CalleeSavedInfo &CCSI : CSI) { unsigned Reg = CCSI.getReg(); - assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 && + assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 && "Invalid register size"); BuildMI(MBB, MI, DL, TII.get(AVR::POPRd), Reg); diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp index 88f889260cce..afba66b2e69b 100644 --- a/lib/Target/AVR/AVRInstrInfo.cpp +++ b/lib/Target/AVR/AVRInstrInfo.cpp @@ -142,9 +142,9 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlignment(FrameIndex)); unsigned Opcode = 0; - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::STDPtrQRr; - } else if (RC->hasType(MVT::i16)) { + } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { Opcode = AVR::STDWPtrQRr; } else { llvm_unreachable("Cannot store this register into a stack slot!"); @@ -176,9 +176,9 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlignment(FrameIndex)); unsigned Opcode = 0; - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::LDDRdPtrQ; - } else if (RC->hasType(MVT::i16)) { + } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { // Opcode = AVR::LDDWRdPtrQ; //:FIXME: remove this once PR13375 gets fixed Opcode = AVR::LDDWRdYQ; diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 48798bd4a1da..5cc7eaf8add3 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -78,11 +78,12 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetRegisterClass * AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { - if (RC->hasType(MVT::i16)) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { return &AVR::DREGSRegClass; } - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { return &AVR::GPR8RegClass; } diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index cb3049bf1500..07767d1037a9 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -347,7 +347,7 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub); const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS); - uint16_t BW = RC->getSize()*8; + uint16_t BW = TRI.getRegSizeInBits(*RC); return BW; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index fda23f8f6b05..c8483f7e6e76 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -286,9 +286,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo(); const MachineFunction &MF = *MI.getParent()->getParent(); const auto &HST = MF.getSubtarget<HexagonSubtarget>(); - unsigned VectorSize = HST.useHVXSglOps() - ? Hexagon::VectorRegsRegClass.getSize() - : Hexagon::VectorRegs128BRegClass.getSize(); + const auto &VecRC = HST.useHVXSglOps() ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned VectorSize = HST.getRegisterInfo()->getSpillSize(VecRC); switch (Inst.getOpcode()) { default: return; diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 61f290ca98d7..8502bf24c02f 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -407,7 +407,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) { Begin = 0; - Width = RC->getSize()*8; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC); return true; } @@ -417,7 +417,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, case Hexagon::DoubleRegsRegClassID: case Hexagon::VecDblRegsRegClassID: case Hexagon::VecDblRegs128BRegClassID: - Width = RC->getSize()*8 / 2; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2; if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) Begin = Width; break; @@ -1054,8 +1054,8 @@ namespace { class RedundantInstrElimination : public Transformation { public: RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, - MachineRegisterInfo &mri) - : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; @@ -1070,6 +1070,7 @@ namespace { bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; BitTracker &BT; }; @@ -1262,7 +1263,7 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, assert(MI.getOperand(OpN).isReg()); BitTracker::RegisterRef RR = MI.getOperand(OpN); const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); - uint16_t Width = RC->getSize()*8; + uint16_t Width = HRI.getRegSizeInBits(*RC); if (!GotBits) T.set(Begin, Begin+Width); @@ -2173,8 +2174,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, const RegisterSet &AVs) { if (!GenBitSplit) return false; - if (CountBitSplit >= MaxBitSplit) - return false; + if (MaxBitSplit.getNumOccurrences()) { + if (CountBitSplit >= MaxBitSplit) + return false; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2253,7 +2256,8 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, continue; // Generate bitsplit where S is defined. - CountBitSplit++; + if (MaxBitSplit.getNumOccurrences()) + CountBitSplit++; MachineInstr *DefS = MRI.getVRegDef(S); assert(DefS != nullptr); DebugLoc DL = DefS->getDebugLoc(); @@ -2379,9 +2383,11 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, const RegisterSet &AVs) { if (!GenExtract) return false; - if (CountExtract >= MaxExtract) - return false; - CountExtract++; + if (MaxExtract.getNumOccurrences()) { + if (CountExtract >= MaxExtract) + return false; + CountExtract++; + } unsigned W = RC.width(); unsigned RW = W; @@ -2651,7 +2657,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { Changed |= visitBlock(Entry, ImmG, AIG); RegisterSet ARE; // Available registers for RIE. - RedundantInstrElimination RIE(BT, HII, MRI); + RedundantInstrElimination RIE(BT, HII, HRI, MRI); bool Ried = visitBlock(Entry, RIE, ARE); if (Ried) { Changed = true; diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index d8ba5dcd35ad..9f8c9ded8127 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -559,10 +559,10 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, } unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); - switch (RC->getSize()) { - case 4: + switch (TRI->getRegSizeInBits(*RC)) { + case 32: return IfTrue ? A2_tfrt : A2_tfrf; - case 8: + case 64: return IfTrue ? A2_tfrpt : A2_tfrpf; } llvm_unreachable("Invalid register operand"); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0e2380f4316a..a04aca4afa0f 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1425,7 +1425,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, if (!SRegs[S->Reg]) continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); - int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), S->Offset); + int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset); MinOffset = std::min(MinOffset, S->Offset); CSI.push_back(CalleeSavedInfo(S->Reg, FI)); SRegs[S->Reg] = false; @@ -1437,11 +1437,12 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { unsigned R = x; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); - int Off = MinOffset - RC->getSize(); - unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + int Off = MinOffset - Size; + unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment()); assert(isPowerOf2_32(Align)); Off &= -Align; - int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), Off); + int FI = MFI.CreateFixedSpillStackObject(Size, Off); MinOffset = std::min(MinOffset, Off); CSI.push_back(CalleeSavedInfo(R, FI)); SRegs[R] = false; @@ -1677,10 +1678,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - unsigned Size = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned Size = HRI.getSpillSize(RC); + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned StoreOpc; @@ -1734,10 +1735,10 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - unsigned Size = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned Size = HRI.getSpillSize(RC); + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned LoadOpc; @@ -1777,16 +1778,16 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, if (!MI->getOperand(0).isFI()) return false; + auto &HRI = *HST.getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned StoreOpc; @@ -1815,15 +1816,15 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, if (!MI->getOperand(1).isFI()) return false; + auto &HRI = *HST.getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned LoadOpc; @@ -1932,7 +1933,7 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, if (!needToReserveScavengingSpillSlots(MF, HRI, RC)) continue; unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1; - unsigned S = RC->getSize(), A = RC->getAlignment(); + unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC); for (unsigned i = 0; i < Num; i++) { int NewFI = MFI.CreateSpillStackObject(S, A); RS->addScavengingFrameIndex(NewFI); diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index b5948475e1f7..1829c5da02a6 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -1206,10 +1207,9 @@ bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, if (!T) return false; - unsigned BW = T->getBitWidth(); - APInt K0(BW, 0), K1(BW, 0); - computeKnownBits(V, K0, K1, DL); - return K0.countLeadingOnes() >= IterCount; + KnownBits Known(T->getBitWidth()); + computeKnownBits(V, Known, DL); + return Known.Zero.countLeadingOnes() >= IterCount; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index c0c29b992238..22fc2474fae6 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -122,6 +122,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), } let usesCustomInserter = 1 in { + let Uses = [SR] in { def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), "# Select8 PSEUDO", [(set GR8:$dst, @@ -130,6 +131,7 @@ let usesCustomInserter = 1 in { "# Select16 PSEUDO", [(set GR16:$dst, (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>; + } let Defs = [SR] in { def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), "# Shl8 PSEUDO", diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 2a9d96205eb9..134f7ac3aea3 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -273,9 +273,9 @@ void MipsAsmPrinter::printSavedRegsBitmask() { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); // size of stack area to which FP callee-saved regs are saved. - unsigned CPURegSize = Mips::GPR32RegClass.getSize(); - unsigned FGR32RegSize = Mips::FGR32RegClass.getSize(); - unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize(); + unsigned CPURegSize = TRI->getRegSizeInBits(Mips::GPR32RegClass) / 8; + unsigned FGR32RegSize = TRI->getRegSizeInBits(Mips::FGR32RegClass) / 8; + unsigned AFGR64RegSize = TRI->getRegSizeInBits(Mips::AFGR64RegClass) / 8; bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index 7af988c1f64d..cb9f676c237a 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -38,7 +38,7 @@ static bool isF128SoftLibCall(const char *CallSym) { /// This function returns true if Ty is fp128, {f128} or i128 which was /// originally a fp128. -static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { +static bool originalTypeIsF128(const Type *Ty, const char *Func) { if (Ty->isFP128Ty()) return true; @@ -46,12 +46,9 @@ static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { Ty->getStructElementType(0)->isFP128Ty()) return true; - const ExternalSymbolSDNode *ES = - dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode); - // If the Ty is i128 and the function being called is a long double emulation // routine, then the original type is f128. - return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol())); + return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); } MipsCCState::SpecialCallingConvType @@ -73,11 +70,11 @@ MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, void MipsCCState::PreAnalyzeCallResultForF128( const SmallVectorImpl<ISD::InputArg> &Ins, - const TargetLowering::CallLoweringInfo &CLI) { + const Type *RetTy, const char *Call) { for (unsigned i = 0; i < Ins.size(); ++i) { OriginalArgWasF128.push_back( - originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode())); - OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy()); + originalTypeIsF128(RetTy, Call)); + OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); } } @@ -99,10 +96,10 @@ void MipsCCState::PreAnalyzeReturnForF128( void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl<ISD::OutputArg> &Outs, std::vector<TargetLowering::ArgListEntry> &FuncArgs, - const SDNode *CallNode) { + const char *Func) { for (unsigned i = 0; i < Outs.size(); ++i) { OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode)); + originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func)); OriginalArgWasFloat.push_back( FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h index 081c393a09be..77ecc65b2eee 100644 --- a/lib/Target/Mips/MipsCCState.h +++ b/lib/Target/Mips/MipsCCState.h @@ -31,7 +31,7 @@ private: /// Identify lowered values that originated from f128 arguments and record /// this for use by RetCC_MipsN. void PreAnalyzeCallResultForF128(const SmallVectorImpl<ISD::InputArg> &Ins, - const TargetLowering::CallLoweringInfo &CLI); + const Type *RetTy, const char * Func); /// Identify lowered values that originated from f128 arguments and record /// this for use by RetCC_MipsN. @@ -42,7 +42,7 @@ private: void PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, std::vector<TargetLowering::ArgListEntry> &FuncArgs, - const SDNode *CallNode); + const char *Func); /// Identify lowered values that originated from f128 arguments and record /// this. @@ -73,8 +73,8 @@ public: AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn, std::vector<TargetLowering::ArgListEntry> &FuncArgs, - const SDNode *CallNode) { - PreAnalyzeCallOperands(Outs, FuncArgs, CallNode); + const char *Func) { + PreAnalyzeCallOperands(Outs, FuncArgs, Func); CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); @@ -99,9 +99,9 @@ public: } void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, - CCAssignFn Fn, - const TargetLowering::CallLoweringInfo &CLI) { - PreAnalyzeCallResultForF128(Ins, CLI); + CCAssignFn Fn, const Type *RetTy, + const char *Func) { + PreAnalyzeCallResultForF128(Ins, RetTy, Func); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index c060cf06099d..a5c7bf7699ea 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -1260,8 +1260,10 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, emitInst(Mips::ADJCALLSTACKUP).addImm(16).addImm(0); if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, RetCC_Mips); + MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); + + CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy, + CLI.Symbol->getName().data()); // Only handle a single return value. if (RVLocs.size() != 1) diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index b2cf03976f81..ef05166503b2 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -119,7 +119,7 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { // Conservatively assume all callee-saved registers will be saved. for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { - unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize(); + unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); Offset = alignTo(Offset + Size, Size); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 93c5f496ce97..8f39ebd42a5c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2750,7 +2750,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // caller side but removing it breaks the frame size calculation. CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode()); + const ExternalSymbolSDNode *ES = + dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode()); + CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), + ES ? ES->getSymbol() : nullptr); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); @@ -2985,7 +2988,11 @@ SDValue MipsTargetLowering::LowerCallResult( SmallVector<CCValAssign, 16> RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null<const ExternalSymbolSDNode>(CLI.Callee.getNode()); + CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI.RetTy, + ES ? ES->getSymbol() : nullptr); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 5bf4c958c7b9..63034ecab93b 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -40,11 +40,7 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { const TargetRegisterClass *RC = STI.inMips16Mode() ? &Mips::CPU16RegsRegClass - : STI.inMicroMipsMode() - ? STI.hasMips64() - ? &Mips::GPRMM16_64RegClass - : &Mips::GPRMM16RegClass - : static_cast<const MipsTargetMachine &>(MF.getTarget()) + : static_cast<const MipsTargetMachine &>(MF.getTarget()) .getABI() .IsN64() ? &Mips::GPR64RegClass @@ -53,14 +49,15 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { } void MipsFunctionInfo::createEhDataRegsFI() { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (int I = 0; I < 4; ++I) { - const TargetRegisterClass *RC = + const TargetRegisterClass &RC = static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64() - ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass; + ? Mips::GPR64RegClass + : Mips::GPR32RegClass; - EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), false); } } @@ -69,11 +66,12 @@ void MipsFunctionInfo::createISRRegFI() { // The current implementation only supports Mips32r2+ not Mips64rX. Status // is always 32 bits, ErrorPC is 32 or 64 bits dependent on architecture, // however Mips32r2+ is the supported architecture. - const TargetRegisterClass *RC = &Mips::GPR32RegClass; + const TargetRegisterClass &RC = Mips::GPR32RegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (int I = 0; I < 2; ++I) ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject( - RC->getSize(), RC->getAlignment(), false); + TRI.getSpillSize(RC), TRI.getSpillAlignment(RC), false); } bool MipsFunctionInfo::isEhDataRegFI(int FI) const { @@ -93,9 +91,10 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) { } int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); if (MoveF64ViaSpillFI == -1) { MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject( - RC->getSize(), RC->getAlignment(), false); + TRI.getSpillSize(*RC), TRI.getSpillAlignment(*RC), false); } return MoveF64ViaSpillFI; } diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp index f33857fe628f..68dcbdfb4211 100644 --- a/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -116,9 +116,10 @@ static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) { /// Return type of register Reg. static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); - assert(RC->vt_end() - RC->vt_begin() == 1); - return *RC->vt_begin(); + assert(TRI.legalclasstypes_end(*RC) - TRI.legalclasstypes_begin(*RC) == 1); + return *TRI.legalclasstypes_begin(*RC); } /// Do the following transformation: diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index ef8d18c6deb1..e765b4625206 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -260,7 +260,8 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, // copy dst_hi, $vr1 unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); - unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2; + const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst); + unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16; const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); @@ -858,6 +859,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); @@ -883,10 +885,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (ExpandPseudo(MF).expand()) { // The spill slot should be half the size of the accumulator. If target is // mips64, it should be 64-bit, otherwise it should be 32-bt. - const TargetRegisterClass *RC = STI.hasMips64() ? - &Mips::GPR64RegClass : &Mips::GPR32RegClass; - int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + const TargetRegisterClass &RC = STI.hasMips64() ? + Mips::GPR64RegClass : Mips::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlignment(RC), + false); RS->addScavengingFrameIndex(FI); } @@ -897,10 +900,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (isInt<16>(MaxSPOffset)) return; - const TargetRegisterClass *RC = - ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + const TargetRegisterClass &RC = + ABI.ArePtrs64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlignment(RC), + false); RS->addScavengingFrameIndex(FI); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 91e712a7a54e..ee074798563d 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -207,13 +207,16 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; - else if (RC->hasType(MVT::v16i8)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::ST_B; - else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || + TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::ST_H; - else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::ST_W; - else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::ST_D; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; @@ -280,13 +283,16 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; - else if (RC->hasType(MVT::v16i8)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; - else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || + TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; - else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; - else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; @@ -567,8 +573,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); - unsigned DstRegSize = getRegClass(Desc, 0, RI, MF)->getSize(); - unsigned SrcRegSize = getRegClass(Desc, 1, RI, MF)->getSize(); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } diff --git a/lib/Target/Mips/Relocation.txt b/lib/Target/Mips/Relocation.txt new file mode 100644 index 000000000000..f1a6fd8645f6 --- /dev/null +++ b/lib/Target/Mips/Relocation.txt @@ -0,0 +1,125 @@ +MIPS Relocation Principles + +In LLVM, there are several elements of the llvm::ISD::NodeType enum +that deal with addresses and/or relocations. These are defined in +include/llvm/Target/TargetSelectionDAG.td, namely: + GlobalAddress, GlobalTLSAddress, JumpTable, ConstantPool, + ExternalSymbol, BlockAddress +The MIPS backend uses several principles to handle these. + +1. Code for lowering addresses references to machine dependent code is +factored into common code for generating different address forms and +is called by the relocation model specific lowering function, using +templated functions. For example: + + // lib/Target/Mips/MipsISelLowering.cpp + SDValue MipsTargetLowering:: + lowerJumpTable(SDValue Op, SelectionDAG &DAG) const + +calls + + template <class NodeTy> // lib/Target/Mips/MipsISelLowering.h + SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty, + SelectionDAG &DAG, bool IsN32OrN64) const + +which calls the overloaded function: + + // lib/Target/Mips/MipsISelLowering.h + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + +2. Generic address nodes are lowered to some combination of target +independent and machine specific SDNodes (for example: +MipsISD::{Highest, Higher, Hi, Lo}) depending upon relocation model, +ABI, and compilation options. + +The choice of specific instructions that are to be used is delegated +to ISel which in turn relies on TableGen patterns to choose subtarget +specific instructions. For example, in getAddrLocal, the pseudo-code +generated is: + + (add (load (wrapper $gp, %got(sym)), %lo(sym)) + +where "%lo" represents an instance of an SDNode with opcode +"MipsISD::Lo", "wrapper" indicates one with opcode "MipsISD::Wrapper", +and "%got" the global table pointer "getGlobalReg(...)". The "add" is +"ISD::ADD", not a target dependent one. + +3. A TableGen multiclass pattern "MipsHiLoRelocs" is used to define a +template pattern parameterized over the load upper immediate +instruction, add operation, the zero register, and register class. +Here the instantiation of MipsHiLoRelocs in MipsInstrInfo.td is used +to MIPS32 to compute addresses for the static relocation model. + + // lib/Target/Mips/MipsInstrInfo.td + multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu, + Register ZeroReg, RegisterOperand GPROpnd> { + def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)), + (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; + ... + } + defm : MipsHiLoRelocs<LUi, ADDiu, ZERO, GPR32Opnd>; + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHiLoRelocs<LUi64, DADDiu, ZERO_64, GPR64Opnd>, SYM_32; + +The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32 +mode, as guarded by the predicate "SYM_32" and also for a submode of +LP64 where symbols are assumed to be 32 bits wide. A similar +multiclass for MIPS64 in LP64 mode is also defined: + + // lib/Target/Mips/Mips64InstrInfo.td + multiclass MipsHighestHigherHiLoRelocs<Instruction Lui, + Instruction Daddiu> { + ... + def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), + (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), + (Daddiu ZERO_64, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + } + +and it is instantiated twice: + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHighestHigherHiLoRelocs<LUi64, DADDiu>, SYM_64; + // lib/Target/Mips/MicroMips64r6InstrInfo.td + defm : MipsHighestHigherHiLoRelocs<LUi64, DADDIU_MM64R6>, SYM_64, + ISA_MICROMIPS64R6; + +These patterns are used during instruction selection to match +MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction +and operands. + +More details on how multiclasses in TableGen work can be found in the +section "Multiclass definitions and instances" in the document +"TableGen Language Introduction" + +4. Instruction definitions are multiply defined to cover the different +register classes. In some cases, such as LW/LW64, this also accounts +for the difference in the results of instruction execution. On MIPS32, +"lw" loads a 32 bit value from memory. On MIPS64, "lw" loads a 32 bit +value from memory and sign extends the value to 64 bits. + + // lib/Target/Mips/MipsInstrInfo.td + def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM; + // lib/Target/Mips/Mips64InstrInfo.td + def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM; + +defines two names "LUi" and "LUi64" with two different register +classes, but with the same encoding---"LUI_FM". These instructions load a +16-bit immediate into bits 31-16 and clear the lower 15 bits. On MIPS64, +the result is sign-extended to 64 bits. diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 3026f0be242d..0f6c2e53e60a 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -38,7 +38,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - if (DestRC->getSize() != SrcRC->getSize()) + if (RegInfo.getRegSizeInBits(*DestRC) != RegInfo.getRegSizeInBits(*SrcRC)) report_fatal_error("Copy one register into another with a different width"); unsigned Op; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 4c9430a2eca0..2a402deccbca 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1898,12 +1898,13 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &GPRC = PPC::GPRCRegClass; + const TargetRegisterClass &G8RC = PPC::G8RCRegClass; + const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; + const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); // Might we have over-aligned allocas? bool HasAlVars = MFI.hasVarSizedObjects() && @@ -1911,9 +1912,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // These kinds of spills might need two registers. if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f7663d8e5185..4659a2ea8032 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9057,6 +9057,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -9070,7 +9071,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -9153,7 +9154,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); - const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 8e159f47ea2e..790a8902b3d2 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -440,8 +440,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opcode)); } -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void PPCInstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(PPC::NOP); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index f11aed8fa268..b30d09e03ec4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -269,7 +269,7 @@ public: /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; std::pair<unsigned, unsigned> decomposeMachineOperandsTargetFlags(unsigned TF) const override; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 455d1ee1564a..f120a98e9457 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -3234,6 +3234,7 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -3245,7 +3246,8 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index c8ff9558cc88..fee008b9572a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -104,8 +104,9 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineOperand &LowOffsetOp = MI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); - // Clear the kill flags for the base and index registers in the first - // instruction. + // Clear the kill flags on the registers in the first instruction. + if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse()) + EarlierMI->getOperand(0).setIsKill(false); EarlierMI->getOperand(1).setIsKill(false); EarlierMI->getOperand(3).setIsKill(false); @@ -1114,10 +1115,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( return nullptr; unsigned OpNum = Ops[0]; - assert(Size == - MF.getRegInfo() - .getRegClass(MI.getOperand(OpNum).getReg()) - ->getSize() && + assert(Size * 8 == + TRI->getRegSizeInBits(*MF.getRegInfo() + .getRegClass(MI.getOperand(OpNum).getReg())) && "Invalid size combination"); if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 && diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index d9c2dba5bace..4178ec0b28f0 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -45,10 +45,11 @@ using namespace llvm; //===----------------------------------------------------------------------===// MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) - if (TRC->hasType(T)) + if (TRI->isTypeLegalForClass(*TRC, T)) return T; DEBUG(errs() << "Unknown type for register number: " << RegNo); llvm_unreachable("Unknown register type"); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 324da650e74e..c1cfc82b4a81 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3094,6 +3094,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); else if (IDVal.startswith(".att_syntax")) { + getParser().setParsingInlineAsm(false); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "prefix") Parser.Lex(); @@ -3106,6 +3107,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); + getParser().setParsingInlineAsm(true); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "noprefix") Parser.Lex(); diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index fdcc7e1ab7b0..19c93cfff0fe 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -95,7 +95,8 @@ void initializeFixupBWInstPassPass(PassRegistry &); /// encoding when possible in order to reduce code size. FunctionPass *createX86EvexToVexInsts(); -InstructionSelector *createX86InstructionSelector(X86Subtarget &, +InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &, X86RegisterBankInfo &); void initializeEvexToVexInstPassPass(PassRegistry &); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8fcc8e31d5d4..d2f650cf8f47 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -273,6 +273,16 @@ def FeatureFastSHLDRotate "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; +// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka +// "string operations"). See "REP String Enhancement" in the Intel Software +// Development Manual. This feature essentially means that REP MOVSB will copy +// using the largest available size instead of copying bytes one by one, making +// it at least as fast as REPMOVS{W,D,Q}. +def FeatureERMSB + : SubtargetFeature< + "ermsb", "HasERMSB", "true", + "REP MOVS/STOS are fast">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -498,6 +508,7 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [ FeatureAVX2, FeatureBMI, FeatureBMI2, + FeatureERMSB, FeatureFMA, FeatureLZCNT, FeatureMOVBE, diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 137ef166aaeb..161bfa7b5474 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -53,7 +53,6 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, return; } - SmallVector<uint64_t, 4> BitOffsets; SmallVector<unsigned, 8> SplitRegs; EVT PartVT = TLI.getRegisterType(Context, VT); @@ -64,8 +63,10 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)), PartTy, OrigArg.Flags}; SplitArgs.push_back(Info); - PerformArgSplit(Info.Reg, PartVT.getSizeInBits() * i); + SplitRegs.push_back(Info.Reg); } + + PerformArgSplit(SplitRegs); } namespace { @@ -112,10 +113,9 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector<ArgInfo, 8> SplitArgs; - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, VReg, Offset); - }); + splitToValueTypes( + OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef<unsigned> Regs) { MIRBuilder.buildUnmerge(Regs, VReg); }); FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) @@ -183,22 +183,10 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, for (auto &Arg : F.args()) { ArgInfo OrigArg(VRegs[Idx], Arg.getType()); setArgFlags(OrigArg, Idx + 1, DL, F); - LLT Ty = MRI.getType(VRegs[Idx]); - unsigned Dst = VRegs[Idx]; - bool Split = false; splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](unsigned Reg, uint64_t Offset) { - if (!Split) { - Split = true; - Dst = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildUndef(Dst); - } - unsigned Tmp = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset); - Dst = Tmp; + [&](ArrayRef<unsigned> Regs) { + MIRBuilder.buildMerge(VRegs[Idx], Regs); }); - if (Dst != VRegs[Idx]) - MIRBuilder.buildCopy(VRegs[Idx], Dst); Idx++; } diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index 204e6974c702..8a8afb568298 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -34,14 +34,15 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<unsigned> VRegs) const override; + private: /// A function of this type is used to perform value split action. - typedef std::function<void(unsigned, uint64_t)> SplitArgTy; + typedef std::function<void(ArrayRef<unsigned>)> SplitArgTy; void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl<ArgInfo> &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; }; -} // End of namespace llvm; +} // namespace llvm #endif diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 036f5d2610e4..b8477810b4c9 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2149,7 +2149,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (!LHSReg || !RHSReg) return false; - unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); + const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); + unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill); updateValueMap(I, ResultReg); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 8678a13b95d0..a94045cd536d 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1783,6 +1783,14 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } +int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &FrameReg, + int Adjustment) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + FrameReg = TRI->getStackRegister(); + return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment; +} + int X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, @@ -1839,9 +1847,6 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 && "we don't handle this case!"); - // Fill in FrameReg output argument. - FrameReg = TRI->getStackRegister(); - // This is how the math works out: // // %rsp grows (i.e. gets lower) left to right. Each box below is @@ -1866,12 +1871,8 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, // (C - E) == (C - A) - (B - A) + (B - E) // { Using [1], [2] and [3] above } // == getObjectOffset - LocalAreaOffset + StackSize - // - - // Get the Offset from the StackPointer - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); - return Offset + StackSize; + return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); } bool X86FrameLowering::assignCalleeSavedSpillSlots( @@ -1923,14 +1924,15 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment - SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); + SpillSlotOffset -= std::abs(SpillSlotOffset) % Align; // spill into slot - SpillSlotOffset -= RC->getSize(); - int SlotIndex = - MFI.CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + SpillSlotOffset -= Size; + int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); - MFI.ensureMaxAlignment(RC->getAlignment()); + MFI.ensureMaxAlignment(Align); } return true; diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 863dc8b22968..7d214cabad53 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -100,6 +100,8 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const override; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b5f29fb400ef..ada46643a5fe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5060,8 +5060,8 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); + return DAG.getBuildVector( + ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -14424,8 +14424,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // If the input is a buildvector just emit a smaller one. unsigned ElemsPerChunk = ResVT.getVectorNumElements(); if (In.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT, - makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); + return DAG.getBuildVector( + ResVT, dl, makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); // Everything else is legal. return Op; @@ -25944,6 +25944,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); @@ -25960,7 +25961,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, DstReg = MI.getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -30207,7 +30209,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); - if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) { // If we changed the computation somewhere in the DAG, this change will @@ -33777,7 +33779,7 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); } @@ -35937,7 +35939,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to // turn into {ax},{dx}. // MVT::Other is used to specify clobber names. - if (Res.second->hasType(VT) || VT == MVT::Other) + if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) return Res; // Correct type already, nothing to do. // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should @@ -35975,11 +35977,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; - else if (X86::VR128RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT)) Res.second = &X86::VR128RegClass; - else if (X86::VR256RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT)) Res.second = &X86::VR256RegClass; - else if (X86::VR512RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) Res.second = &X86::VR512RegClass; else { // Type mismatch and not a clobber: Return an error; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index bfd21c062aa2..66382014f6e8 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -989,10 +989,12 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // Constraints = "$src1 = $dst" - def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; - def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; + let mayLoad = 1, mayStore = 1 in { + def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; + def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7b456fd68343..26444dd1f619 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -6284,9 +6284,11 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, unsigned TrueReg, unsigned FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); assert(Cond.size() == 1 && "Invalid Cond array"); unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - MRI.getRegClass(DstReg)->getSize(), + TRI.getRegSizeInBits(RC) / 8, false /*HasMemoryOperand*/); BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); } @@ -6557,7 +6559,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool HasAVX512 = STI.hasAVX512(); bool HasVLX = STI.hasVLX(); - switch (RC->getSize()) { + switch (STI.getRegisterInfo()->getSpillSize(*RC)) { default: llvm_unreachable("Unknown spill size"); case 1: @@ -6603,28 +6605,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; case 16: { - assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); - // If stack is realigned we can use aligned stores. - if (isStackAligned) - return load ? - (HasVLX ? X86::VMOVAPSZ128rm : - HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : - HasAVX ? X86::VMOVAPSrm : - X86::MOVAPSrm): - (HasVLX ? X86::VMOVAPSZ128mr : - HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : - HasAVX ? X86::VMOVAPSmr : - X86::MOVAPSmr); - else - return load ? - (HasVLX ? X86::VMOVUPSZ128rm : - HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : - HasAVX ? X86::VMOVUPSrm : - X86::MOVUPSrm): - (HasVLX ? X86::VMOVUPSZ128mr : - HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : - HasAVX ? X86::VMOVUPSmr : - X86::MOVUPSmr); + if (X86::VR128XRegClass.hasSubClassEq(RC)) { + // If stack is realigned we can use aligned stores. + if (isStackAligned) + return load ? + (HasVLX ? X86::VMOVAPSZ128rm : + HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : + HasAVX ? X86::VMOVAPSrm : + X86::MOVAPSrm): + (HasVLX ? X86::VMOVAPSZ128mr : + HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : + HasAVX ? X86::VMOVAPSmr : + X86::MOVAPSmr); + else + return load ? + (HasVLX ? X86::VMOVUPSZ128rm : + HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : + HasAVX ? X86::VMOVUPSrm : + X86::MOVUPSrm): + (HasVLX ? X86::VMOVUPSZ128mr : + HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : + HasAVX ? X86::VMOVUPSmr : + X86::MOVUPSmr); + } + if (X86::BNDRRegClass.hasSubClassEq(RC)) { + if (STI.is64Bit()) + return load ? X86::BNDMOVRM64rm : X86::BNDMOVMR64mr; + else + return load ? X86::BNDMOVRM32rm : X86::BNDMOVMR32mr; + } + llvm_unreachable("Unknown 16-byte regclass"); } case 32: assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); @@ -6709,9 +6719,9 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= RC->getSize() && + assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); - unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); + unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); @@ -6728,7 +6738,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); @@ -6748,7 +6759,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); + unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); @@ -6763,7 +6774,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); @@ -7222,7 +7234,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, NewOpc = getSETFromCond(NewCC, HasMemoryOperand); else { unsigned DstReg = Instr.getOperand(0).getReg(); - NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + NewOpc = getCMovFromCond(NewCC, TRI->getRegSizeInBits(*DstRC)/8, HasMemoryOperand); } @@ -7750,7 +7763,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( unsigned DstIdx = (Imm >> 4) & 3; unsigned SrcIdx = (Imm >> 6) & 3; - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size <= RCSize && 4 <= Align) { int PtrOffset = SrcIdx * 4; unsigned NewImm = (DstIdx << 4) | ZMask; @@ -7772,7 +7787,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // To fold the load, adjust the pointer to the upper and use (V)MOVLPS. // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. if (OpNum == 2) { - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size <= RCSize && 8 <= Align) { unsigned NewOpCode = (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm : @@ -7861,7 +7878,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( return nullptr; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, + &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -8302,11 +8322,13 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineFunction &MF) { unsigned Opc = LoadMI.getOpcode(); unsigned UserOpc = UserMI.getOpcode(); - unsigned RegSize = - MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); + unsigned RegSize = TRI.getRegSizeInBits(*RC); if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) && - RegSize > 4) { + RegSize > 32) { // These instructions only load 32 bits, we can't fold them if the // destination register is wider than 32 bits (4 bytes), and its user // instruction isn't scalar (SS). @@ -8357,7 +8379,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, } if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) && - RegSize > 8) { + RegSize > 64) { // These instructions only load 64 bits, we can't fold them if the // destination register is wider than 64 bits (8 bytes), and its user // instruction isn't scalar (SD). @@ -8702,6 +8724,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedStore = I->second.second & TB_FOLDED_STORE; const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); unsigned NumDefs = MCID.NumDefs; std::vector<SDValue> AddrOps; @@ -8724,7 +8747,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = nullptr; if (FoldedLoad) { - EVT VT = *RC->vt_begin(); + EVT VT = *TRI.legalclasstypes_begin(*RC); std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), @@ -8736,7 +8759,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, @@ -8752,7 +8775,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { DstRC = getRegClass(MCID, 0, &RI, MF); - VTs.push_back(*DstRC->vt_begin()); + VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); @@ -8781,7 +8804,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = @@ -9514,7 +9537,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { } /// Return the noop instruction to use for a noop. -void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void X86InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 2fee48570ce1..38567831b3a4 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -457,7 +457,7 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index e31d2769047b..c3def461afdc 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -897,6 +897,7 @@ def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; +def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e1bf28cbf612..f22a50200c9a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4602,17 +4602,17 @@ def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteMove]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; let isCodeGenOnly = 1 in def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, Sched<[WriteMove]>; } // ExeDomain = SSEPackedInt @@ -4681,7 +4681,7 @@ def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), VEX; def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; @@ -4694,7 +4694,7 @@ def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; } // ExeDomain = SSEPackedInt @@ -4721,7 +4721,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), @@ -4811,12 +4811,12 @@ let Predicates = [UseSSE2] in { } } -// These are the correct encodings of the instructions so that we know how to -// read correct assembly, even though we continue to emit the wrong ones for -// compatibility with Darwin's buggy assembler. -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of +// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add +// these aliases. +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; // Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", @@ -7144,33 +7144,37 @@ let Predicates = [UseSSE41] in { /// SS42I_binop_rm - Simple SSE 4.2 binary operator multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, OpndItins itins, + bit Is2Addr = 1> { def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>; + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, - loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG; + loadv2i64, i128mem, SSE_INTALU_ITINS_P, 0>, + VEX_4V, VEX_WIG; let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, - loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; + loadv4i64, i256mem, SSE_INTALU_ITINS_P, 0>, + VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, SSE_INTALU_ITINS_P>; //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index fb9315792892..d0f1b7091da9 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -39,11 +39,16 @@ using namespace llvm; namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class X86InstructionSelector : public InstructionSelector { public: - X86InstructionSelector(const X86Subtarget &STI, + X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); + void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -70,10 +75,17 @@ private: bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + const X86TargetMachine &TM; const X86Subtarget &STI; const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; + bool OptForSize; + bool OptForMinSize; + + PredicateBitset AvailableFeatures; + PredicateBitset computeAvailableFeatures(const MachineFunction *MF, + const X86Subtarget *Subtarget) const; #define GET_GLOBALISEL_TEMPORARIES_DECL #include "X86GenGlobalISel.inc" @@ -86,10 +98,12 @@ private: #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL -X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, +X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, + const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false), + OptForMinSize(false), AvailableFeatures() #define GET_GLOBALISEL_TEMPORARIES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -181,6 +195,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } +void X86InstructionSelector::beginFunction(const MachineFunction &MF) { + OptForSize = MF.getFunction()->optForSize(); + OptForMinSize = MF.getFunction()->optForMinSize(); + AvailableFeatures = computeAvailableFeatures(&MF, &STI); +} + bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -571,7 +591,8 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I, } InstructionSelector * -llvm::createX86InstructionSelector(X86Subtarget &Subtarget, +llvm::createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &Subtarget, X86RegisterBankInfo &RBI) { - return new X86InstructionSelector(Subtarget, RBI); + return new X86InstructionSelector(TM, Subtarget, RBI); } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 9bab9a4cf3ba..1f16f3c9a14d 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -137,25 +137,29 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::FR32RegClassID: case X86::FR64RegClassID: // If AVX-512 isn't supported we should only inflate to these classes. - if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize()) + if (!Subtarget.hasAVX512() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128RegClassID: case X86::VR256RegClassID: // If VLX isn't supported we should only inflate to these classes. - if (!Subtarget.hasVLX() && Super->getSize() == RC->getSize()) + if (!Subtarget.hasVLX() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128XRegClassID: case X86::VR256XRegClassID: // If VLX isn't support we shouldn't inflate to these classes. - if (Subtarget.hasVLX() && Super->getSize() == RC->getSize()) + if (Subtarget.hasVLX() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::FR32XRegClassID: case X86::FR64XRegClassID: // If AVX-512 isn't support we shouldn't inflate to these classes. - if (Subtarget.hasAVX512() && Super->getSize() == RC->getSize()) + if (Subtarget.hasAVX512() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::GR8RegClassID: @@ -168,7 +172,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::VR512RegClassID: // Don't return a super-class that would shrink the spill size. // That can happen with the vector and float classes. - if (Super->getSize() == RC->getSize()) + if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; } Super = *I++; @@ -669,32 +673,28 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - unsigned BasePtr; - unsigned Opc = MI.getOpcode(); - bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm || - Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64; - - if (hasBasePointer(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); - else if (needsStackRealignment(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); - else if (AfterFPPop) - BasePtr = StackPtr; - else - BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + // Determine base register and offset. + int FIOffset; + unsigned BasePtr; + if (MI.isReturn()) { + assert((!needsStackRealignment(MF) || + MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && + "Return instruction can only reference SP relative frame objects"); + FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else { + FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); + } // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. - unsigned IgnoredFrameReg; + unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - FI.ChangeToImmediate(Offset); + FI.ChangeToImmediate(FIOffset); return; } @@ -710,15 +710,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // FrameIndex with base register. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); - // Now add the frame object offset to the offset from EBP. - int FIOffset; - if (AfterFPPop) { - // Tail call jmp happens after FP is popped. - const MachineFrameInfo &MFI = MF.getFrameInfo(); - FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); - } else - FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - if (BasePtr == StackPtr) FIOffset += SPAdj; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 9da8a18965ea..1a72a0ba3a64 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -106,7 +106,6 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; - bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; @@ -163,20 +162,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, - CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - } else if (BytesLeft) { + if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); @@ -195,6 +181,24 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( return Chain; } +namespace { + +// Represents a cover of a buffer of SizeVal bytes with blocks of size +// AVT, as well as how many bytes remain (BytesLeft is always smaller than +// the block size). +struct RepMovsRepeats { + RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) { + const unsigned UBytes = AVT.getSizeInBits() / 8; + Count = SizeVal / UBytes; + BytesLeft = SizeVal % UBytes; + } + + unsigned Count; + unsigned BytesLeft; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, @@ -229,7 +233,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( return SDValue(); MVT AVT; - if (Align & 1) + if (Subtarget.hasERMSB()) + // If the target has enhanced REPMOVSB, then it's at least as fast to use + // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle + // BytesLeft. + AVT = MVT::i8; + else if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; @@ -240,14 +249,18 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( // QWORD aligned AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal, dl); - unsigned BytesLeft = SizeVal % UBytes; + RepMovsRepeats Repeats(SizeVal, AVT); + if (Repeats.BytesLeft > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to handle + // BytesLeft. + AVT = MVT::i8; + Repeats = RepMovsRepeats(SizeVal, AVT); + } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); + DAG.getIntPtrConstant(Repeats.Count, dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -262,9 +275,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); - if (BytesLeft) { + if (Repeats.BytesLeft) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; + unsigned Offset = SizeVal - Repeats.BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -275,7 +288,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), + DAG.getConstant(Repeats.BytesLeft, dl, + SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 92a68759195c..4154530d04e7 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -303,6 +303,7 @@ void X86Subtarget::initializeEnvironment() { HasFastVectorFSQRT = false; HasFastLZCNT = false; HasFastSHLDRotate = false; + HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d0d88d326949..fd057f36c890 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -232,6 +232,9 @@ protected: /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; + /// True if the processor has enhanced REP MOVSB/STOSB. + bool HasERMSB; + /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -472,6 +475,7 @@ public: bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 03a1958121ab..623cf38aa951 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -286,7 +286,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI)); + GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI)); #endif I->setGISelAccessor(*GISel); } diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index a752357400b3..784612038c09 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -575,18 +575,17 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { assert(RS && "requiresRegisterScavenging failed"); MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); // Reserve slots close to SP or frame pointer for Scavenging spills. // When using SP for small frames, we don't need any scratch registers. // When using SP for large frames, we may need 2 scratch registers. // When using FP, for large or small frames, we may need 1 scratch register. + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); if (XFI->isLargeFrame(MF) || hasFP(MF)) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); if (XFI->isLargeFrame(MF) && !hasFP(MF)) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 45437815fa37..2efcd46cd8d4 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1605,7 +1605,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || + if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) || TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); @@ -1622,7 +1622,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index e91536ca1e83..75af0e97dfb5 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -10,6 +10,7 @@ #include "XCoreMachineFunctionInfo.h" #include "XCoreInstrInfo.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -35,13 +36,15 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) { if (LRSpillSlotSet) { return LRSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); if (! MF.getFunction()->isVarArg()) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - LRSpillSlot = MFI.CreateFixedObject(RC->getSize(), 0, true); + LRSpillSlot = MFI.CreateFixedObject(TRI.getSpillSize(RC), 0, true); } else { - LRSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + LRSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), true); } LRSpillSlotSet = true; return LRSpillSlot; @@ -51,9 +54,11 @@ int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) { if (FPSpillSlotSet) { return FPSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - FPSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + FPSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), true); FPSpillSlotSet = true; return FPSpillSlot; } @@ -62,10 +67,13 @@ const int* XCoreFunctionInfo::createEHSpillSlot(MachineFunction &MF) { if (EHSpillSlotSet) { return EHSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - EHSpillSlot[0] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); - EHSpillSlot[1] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); + EHSpillSlot[0] = MFI.CreateStackObject(Size, Align, true); + EHSpillSlot[1] = MFI.CreateStackObject(Size, Align, true); EHSpillSlotSet = true; return EHSpillSlot; } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index a2f6e5639d9d..78e71c18fe29 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -17,7 +17,9 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -27,10 +29,21 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; -#define DEBUG_TYPE "partialinlining" +#define DEBUG_TYPE "partial-inlining" STATISTIC(NumPartialInlined, "Number of functions partially inlined"); +// Command line option to disable partial-inlining. The default is false: +static cl::opt<bool> + DisablePartialInlining("disable-partial-inlining", cl::init(false), + cl::Hidden, cl::desc("Disable partial ininling")); + +// Command line option to set the maximum number of partial inlining allowed +// for the module. The default value of -1 means no limit. +static cl::opt<int> MaxNumPartialInlining( + "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of partial inlining. The default is unlimited")); + namespace { struct PartialInlinerImpl { PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} @@ -39,6 +52,12 @@ struct PartialInlinerImpl { private: InlineFunctionInfo IFI; + int NumPartialInlining = 0; + + bool IsLimitReached() { + return (MaxNumPartialInlining != -1 && + NumPartialInlining >= MaxNumPartialInlining); + } }; struct PartialInlinerLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid @@ -66,6 +85,9 @@ struct PartialInlinerLegacyPass : public ModulePass { Function *PartialInlinerImpl::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... + if (F->hasAddressTaken()) + return nullptr; + BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) @@ -149,11 +171,29 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { // Inline the top-level if test into all callers. std::vector<User *> Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); - for (User *User : Users) + + for (User *User : Users) { + CallSite CS; if (CallInst *CI = dyn_cast<CallInst>(User)) - InlineFunction(CI, IFI); + CS = CallSite(CI); else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) - InlineFunction(II, IFI); + CS = CallSite(II); + else + llvm_unreachable("All uses must be calls"); + + if (IsLimitReached()) + continue; + NumPartialInlining++; + + OptimizationRemarkEmitter ORE(CS.getCaller()); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) + << ore::NV("Callee", F) << " partially inlined into " + << ore::NV("Caller", CS.getCaller())); + + InlineFunction(CS, IFI); + } // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. @@ -166,6 +206,9 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { } bool PartialInlinerImpl::run(Module &M) { + if (DisablePartialInlining) + return false; + std::vector<Function *> Worklist; Worklist.reserve(M.size()); for (Function &F : M) diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index f11b58d1adc4..0d5910ebbfcc 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -282,6 +282,12 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { } if (!PGOInstrUse.empty()) MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + // Indirect call promotion that promotes intra-module targets only. + // For ThinLTO this is done earlier due to interactions with globalopt + // for imported functions. We don't run this at -O0. + if (OptLevel > 0) + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { @@ -414,11 +420,14 @@ void PassManagerBuilder::populateModulePassManager( else if (!GlobalExtensions->empty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + + // Rename anon globals to be able to export them in the summary. + // This has to be done after we add the extensions to the pass manager + // as there could be passes (e.g. Adddress sanitizer) which introduce + // new unnamed globals. if (PrepareForThinLTO) - // Rename anon globals to be able to export them in the summary. MPM.add(createNameAnonGlobalPass()); - - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); return; } @@ -468,16 +477,10 @@ void PassManagerBuilder::populateModulePassManager( // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd // profile annotation in backend more difficult. - if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) { - /// PGO instrumentation is added during the compile phase for ThinLTO, do - /// not run it a second time + // PGO instrumentation is added during the compile phase for ThinLTO, do + // not run it a second time + if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); - // Indirect call promotion that promotes intra-module targets only. - // For ThinLTO this is done earlier due to interactions with globalopt - // for imported functions. - MPM.add( - createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); - } if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the @@ -677,6 +680,11 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopSinkPass()); // Get rid of LCSSA nodes. MPM.add(createInstructionSimplifierPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + MPM.add(createCFGSimplificationPass()); + addExtensionsToPM(EP_OptimizerLast, MPM); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index e30a4bafb9b0..030461004f56 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -794,6 +795,11 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { if (Opnd->isConstant()) continue; + // The constant check above is really for a few special constant + // coefficients. + if (isa<UndefValue>(Opnd->getSymVal())) + continue; + const FAddendCoef &CE = Opnd->getCoef(); if (CE.isMinusOne() || CE.isMinusTwo()) NegOpndNum++; @@ -894,24 +900,22 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); + KnownBits LHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, 0, &CxtI); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); + KnownBits RHSKnown(BitWidth); + computeKnownBits(RHS, RHSKnown, 0, &CxtI); // Addition of two 2's complement numbers having opposite signs will never // overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + if ((LHSKnown.One[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]) || + (LHSKnown.Zero[BitWidth - 1] && RHSKnown.One[BitWidth - 1])) return true; // Check if carry bit of addition will not cause overflow. - if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) + if (checkRippleForAdd(LHSKnown.Zero, RHSKnown.Zero)) return true; - if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) + if (checkRippleForAdd(RHSKnown.Zero, LHSKnown.Zero)) return true; return false; @@ -931,18 +935,16 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); + KnownBits LHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, 0, &CxtI); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); + KnownBits RHSKnown(BitWidth); + computeKnownBits(RHS, RHSKnown, 0, &CxtI); // Subtraction of two 2's complement numbers having identical signs will // never overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1])) + if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) || + (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1])) return true; // TODO: implement logic similar to checkRippleForAdd @@ -1113,10 +1115,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // a sub and fuse this add with it. if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { IntegerType *IT = cast<IntegerType>(I.getType()); - APInt LHSKnownOne(IT->getBitWidth(), 0); - APInt LHSKnownZero(IT->getBitWidth(), 0); - computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I); - if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) + KnownBits LHSKnown(IT->getBitWidth()); + computeKnownBits(XorLHS, LHSKnown, 0, &I); + if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } @@ -1385,39 +1386,58 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { Value *LHSIntVal = LHSConv->getOperand(0); + Type *FPType = LHSConv->getType(); + + // TODO: This check is overly conservative. In many cases known bits + // analysis can tell us that the result of the addition has less significant + // bits than the integer type can hold. + auto IsValidPromotion = [](Type *FTy, Type *ITy) { + Type *FScalarTy = FTy->getScalarType(); + Type *IScalarTy = ITy->getScalarType(); + + // Do we have enough bits in the significand to represent the result of + // the integer addition? + unsigned MaxRepresentableBits = + APFloat::semanticsPrecision(FScalarTy->getFltSemantics()); + return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits; + }; // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) // ... if the constant fits in the integer value. This is useful for things // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer // requires a constant pool load, and generally allows the add to be better // instcombined. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } - } // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { Value *RHSIntVal = RHSConv->getOperand(0); - - // Only do this if x/y have the same type, if at least one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSIntVal->getType() == RHSIntVal->getType() && - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + // It's enough to check LHS types only because we require int types to + // be the same for this transform. + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + // Only do this if x/y have the same type, if at least one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSIntVal->getType() == RHSIntVal->getType() && + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + RHSIntVal, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } } } @@ -1617,10 +1637,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. if (Op0C->isMask()) { - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(Op1, RHSKnownZero, RHSKnownOne, 0, &I); - if ((*Op0C | RHSKnownZero).isAllOnesValue()) + KnownBits RHSKnown(BitWidth); + computeKnownBits(Op1, RHSKnown, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateXor(Op1, Op0); } } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3a98e8937bda..a97b5a9ec0bb 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -906,15 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (PredL) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 - case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 - case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return LHS; - } case ICmpInst::ICMP_NE: switch (PredR) { default: @@ -930,43 +921,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 return Builder->CreateICmpSLT(LHS0, LHSC); break; // (X != 13 & X s< 15) -> no change - case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 - case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return RHS; case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. break; } break; - case ICmpInst::ICMP_ULT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 - case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return LHS; - } - break; - case ICmpInst::ICMP_SLT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 - case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return LHS; - } - break; case ICmpInst::ICMP_UGT: switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return RHS; case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 return Builder->CreateICmp(PredL, LHS0, RHSC); @@ -980,9 +943,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return RHS; case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 return Builder->CreateICmp(PredL, LHS0, RHSC); @@ -1234,6 +1194,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { return nullptr; } +static Instruction *foldAndToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::And); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'or' is Op0. + // (A | B) & ~(A & B) --> A ^ B + // (A | B) & ~(B & A) --> A ^ B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateXor(A, B); + + // (A | ~B) & (~A | B) --> ~(A ^ B) + // (A | ~B) & (B | ~A) --> ~(A ^ B) + // (~B | A) & (~A | B) --> ~(A ^ B) + // (~B | A) & (B | ~A) --> ~(A ^ B) + if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B)))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + return nullptr; +} + +static Instruction *foldOrToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::Or); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'and' is Op0. + // (A & B) | ~(A | B) --> ~(A ^ B) + // (A & B) | ~(B | A) --> ~(A ^ B) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + // (A & ~B) | (~A & B) --> A ^ B + // (A & ~B) | (B & ~A) --> A ^ B + // (~B & A) | (~A & B) --> A ^ B + // (~B & A) | (B & ~A) --> A ^ B + if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateXor(A, B); + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -1247,15 +1257,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - // (A|B)&(A|C) -> A|(B&C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) - return replaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldAndToXor(I, *Builder)) + return Xor; + + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -1366,19 +1380,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return DeMorgan; { - Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - + Value *A = nullptr, *B = nullptr, *C = nullptr; // A&(A^B) => A & ~B { Value *tmpOp0 = Op0; @@ -1405,11 +1407,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } // (A&((~A)|B)) -> A&B - if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || - match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A)))) return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || - match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A)))) return BinaryOperator::CreateAnd(A, Op0); // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C @@ -1425,13 +1425,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B)))) + // (A | B) & (B ^ (~A)) -> (A & B) + // (B | A) & ((~A) ^ B) -> (A & B) + // (B | A) & (B ^ (~A)) -> (A & B) + if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && + match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) return BinaryOperator::CreateAnd(A, B); // ((~A) ^ B) & (A | B) -> (A & B) // ((~A) ^ B) & (B | A) -> (A & B) - if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) && + // (B ^ (~A)) & (A | B) -> (A & B) + // (B ^ (~A)) & (B | A) -> (A & B) + if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) return BinaryOperator::CreateAnd(A, B); } @@ -2037,15 +2042,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - // (A&B)|(A&C) -> A&(B|C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) - return replaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldOrToXor(I, *Builder)) + return Xor; + + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -2105,19 +2114,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { match(Op0, m_c_And(m_Specific(A), m_Value(B)))) return BinaryOperator::CreateOr(Op1, B); - // (A & ~B) | (A ^ B) -> (A ^ B) - // (~B & A) | (A ^ B) -> (A ^ B) - if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Xor(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // Commute the 'or' operands. - // (A ^ B) | (A & ~B) -> (A ^ B) - // (A ^ B) | (~B & A) -> (A ^ B) - if (match(Op1, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op0, m_Xor(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - // (A & C)|(B & D) Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && @@ -2182,23 +2178,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, V); } - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - // ((A|B)&1)|(B&-2) -> (A&1) | B if (match(A, m_Or(m_Value(V1), m_Specific(B))) || match(A, m_Or(m_Specific(B), m_Value(V1)))) { @@ -2374,6 +2353,58 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return Changed ? &I : nullptr; } +/// A ^ B can be specified using other logic ops in a variety of patterns. We +/// can fold these early and efficiently by morphing an existing instruction. +static Instruction *foldXorToXor(BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Xor); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // There are 4 commuted variants for each of the basic patterns. + + // (A & B) ^ (A | B) -> A ^ B + // (A & B) ^ (B | A) -> A ^ B + // (A | B) ^ (A & B) -> A ^ B + // (A | B) ^ (B & A) -> A ^ B + if ((match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) || + (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + // (A | ~B) ^ (~A | B) -> A ^ B + // (~B | A) ^ (~A | B) -> A ^ B + // (~A | B) ^ (A | ~B) -> A ^ B + // (B | ~A) ^ (A | ~B) -> A ^ B + if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + // (A & ~B) ^ (~A & B) -> A ^ B + // (~B & A) ^ (~A & B) -> A ^ B + // (~A & B) ^ (A & ~B) -> A ^ B + // (B & ~A) ^ (A & ~B) -> A ^ B + if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2387,6 +2418,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); + if (Instruction *NewXor = foldXorToXor(I)) + return NewXor; + // (A&B)^(A&C) -> A&(B^C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); @@ -2399,44 +2433,39 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); - // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I)) { - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || - Op0I->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(Op0I->getOperand(1))) - Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (IsFreeToInvert(Op0I->getOperand(0), - Op0I->getOperand(0)->hasOneUse()) && - IsFreeToInvert(Op0I->getOperand(1), - Op0I->getOperand(1)->hasOneUse())) { - Value *NotX = - Builder->CreateNot(Op0I->getOperand(0), "notlhs"); - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), "notrhs"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(NotX, NotY); - return BinaryOperator::CreateAnd(NotX, NotY); - } + // Is this a 'not' (~) fed by a binary operator? + BinaryOperator *NotOp; + if (match(&I, m_Not(m_BinOp(NotOp)))) { + if (NotOp->getOpcode() == Instruction::And || + NotOp->getOpcode() == Instruction::Or) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (dyn_castNotVal(NotOp->getOperand(1))) + NotOp->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) { + Value *NotY = Builder->CreateNot( + NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not"); + if (NotOp->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); + } - } else if (Op0I->getOpcode() == Instruction::AShr) { - // ~(~X >>s Y) --> (X >>s Y) - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) - return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1)); + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law + // ~(X | Y) === (~X & ~Y) - De Morgan's Law + if (IsFreeToInvert(NotOp->getOperand(0), + NotOp->getOperand(0)->hasOneUse()) && + IsFreeToInvert(NotOp->getOperand(1), + NotOp->getOperand(1)->hasOneUse())) { + Value *NotX = Builder->CreateNot(NotOp->getOperand(0), "notlhs"); + Value *NotY = Builder->CreateNot(NotOp->getOperand(1), "notrhs"); + if (NotOp->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); } + } else if (NotOp->getOpcode() == Instruction::AShr) { + // ~(~X >>s Y) --> (X >>s Y) + if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) + return BinaryOperator::CreateAShr(Op0NotVal, NotOp->getOperand(1)); } } @@ -2574,40 +2603,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { { Value *A, *B, *C, *D; - // (A & B)^(A | B) -> A ^ B - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | B)^(A & B) -> A ^ B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | ~B) ^ (~A | B) -> A ^ B - // (~B | A) ^ (~A | B) -> A ^ B - if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // (~A | B) ^ (A | ~B) -> A ^ B - if (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B))))) { - return BinaryOperator::CreateXor(A, B); - } - // (A & ~B) ^ (~A & B) -> A ^ B - // (~B & A) ^ (~A & B) -> A ^ B - if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // (~A & B) ^ (A & ~B) -> A ^ B - if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) { - return BinaryOperator::CreateXor(A, B); - } // (A ^ C)^(A | B) -> ((~A) & B) ^ C if (match(Op0, m_Xor(m_Value(D), m_Value(C))) && match(Op1, m_Or(m_Value(A), m_Value(B)))) { diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e7aa1a457371..313ab13b9e2b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" @@ -1378,14 +1379,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { return nullptr; unsigned BitWidth = IT->getBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; - unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros() - : KnownOne.countLeadingZeros(); + unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros() + : Known.One.countLeadingZeros(); APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits) : APInt::getHighBitsSet(BitWidth, NumMaskBits); @@ -1393,7 +1393,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // zero, this value is constant. // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. - if ((Mask & KnownZero) == Mask) { + if (Mask.isSubsetOf(Known.Zero)) { auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits)); return IC.replaceInstUsesWith(II, C); } @@ -1401,7 +1401,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // If the input to cttz/ctlz is known to be non-zero, // then change the 'ZeroIsUndef' parameter to 'true' // because we know the zero behavior can't affect the result. - if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { + if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { if (!match(II.getArgOperand(1), m_One())) { II.setOperand(1, IC.Builder->getTrue()); return &II; @@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (auto *CSrc0 = dyn_cast<Constant>(Src0)) { if (auto *CSrc1 = dyn_cast<Constant>(Src1)) { Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); - return replaceInstUsesWith(*II, - ConstantExpr::getSExt(CCmp, II->getType())); + if (CCmp->isNullValue()) { + return replaceInstUsesWith( + *II, ConstantExpr::getSExt(CCmp, II->getType())); + } + + // The result of V_ICMP/V_FCMP assembly instructions (which this + // intrinsic exposes) is one bit per thread, masked with the EXEC + // register (which contains the bitmask of live threads). So a + // comparison that always returns true is the same as a read of the + // EXEC register. + Value *NewF = Intrinsic::getDeclaration( + II->getModule(), Intrinsic::read_register, II->getType()); + Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; + MDNode *MD = MDNode::get(II->getContext(), MDArgs); + Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; + CallInst *NewCall = Builder->CreateCall(NewF, Args); + NewCall->addAttribute(AttributeList::FunctionIndex, + Attribute::Convergent); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); } // Canonicalize constants to RHS. @@ -3599,9 +3617,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. - APInt KnownZero(1, 0), KnownOne(1, 0); - computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II); - if (KnownOne.isAllOnesValue()) + KnownBits Known(1); + computeKnownBits(IIOperand, Known, 0, II); + if (Known.One.isAllOnesValue()) return eraseInstFromFunction(*II); // Update the cache of affected values for this assumption (we might be diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 9127ddca5915..312d9baae43a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,9 +14,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -676,11 +677,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: - uint32_t BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI); + KnownBits Known(Op1C->getType()->getBitWidth()); + computeKnownBits(ICI->getOperand(0), Known, 0, &CI); - APInt KnownZeroMask(~KnownZero); + APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoTransform) return ICI; @@ -726,13 +726,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI); - computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI); + KnownBits KnownLHS(BitWidth); + KnownBits KnownRHS(BitWidth); + computeKnownBits(LHS, KnownLHS, 0, &CI); + computeKnownBits(RHS, KnownRHS, 0, &CI); - if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { - APInt KnownBits = KnownZeroLHS | KnownOneLHS; + if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { + APInt KnownBits = KnownLHS.Zero | KnownLHS.One; APInt UnknownBit = ~KnownBits; if (UnknownBit.countPopulation() == 1) { if (!DoTransform) return ICI; @@ -740,7 +740,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *Result = Builder->CreateXor(LHS, RHS); // Mask off any bits that are set and won't be shifted away. - if (KnownOneLHS.uge(UnknownBit)) + if (KnownLHS.One.uge(UnknownBit)) Result = Builder->CreateAnd(Result, ConstantInt::get(ITy, UnknownBit)); @@ -1049,10 +1049,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { if (ICI->hasOneUse() && ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI); + KnownBits Known(BitWidth); + computeKnownBits(Op0, Known, 0, &CI); - APInt KnownZeroMask(~KnownZero); + APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { Value *In = ICI->getOperand(0); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 003029ae39d5..d846a631b96f 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -175,19 +176,18 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { /// Given a signed integer type and a set of known zero and one bits, compute /// the maximum and minimum values that could have the specified known zero and /// known one bits, returning them in Min/Max. -static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, +/// TODO: Move to method on KnownBits struct? +static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known, APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && + assert(Known.getBitWidth() == Min.getBitWidth() && + Known.getBitWidth() == Max.getBitWidth() && "KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); + APInt UnknownBits = ~(Known.Zero|Known.One); // The minimum value is when all unknown bits are zeros, EXCEPT for the sign // bit if it is unknown. - Min = KnownOne; - Max = KnownOne|UnknownBits; + Min = Known.One; + Max = Known.One|UnknownBits; if (UnknownBits.isNegative()) { // Sign bit is unknown Min.setBit(Min.getBitWidth()-1); @@ -198,19 +198,18 @@ static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero, /// Given an unsigned integer type and a set of known zero and one bits, compute /// the maximum and minimum values that could have the specified known zero and /// known one bits, returning them in Min/Max. -static void computeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, +/// TODO: Move to method on KnownBits struct? +static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known, APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && + assert(Known.getBitWidth() == Min.getBitWidth() && + Known.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); + APInt UnknownBits = ~(Known.Zero|Known.One); // The minimum value is when the unknown bits are all zeros. - Min = KnownOne; + Min = Known.One; // The maximum value is when the unknown bits are all ones. - Max = KnownOne|UnknownBits; + Max = Known.One|UnknownBits; } /// This is called when we see this pattern: @@ -1479,14 +1478,14 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, // of the high bits truncated out of x are known. unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), SrcBits = X->getType()->getScalarSizeInBits(); - APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - computeKnownBits(X, KnownZero, KnownOne, 0, &Cmp); + KnownBits Known(SrcBits); + computeKnownBits(X, Known, 0, &Cmp); // If all the high bits are known, we can do this xform. - if ((KnownZero | KnownOne).countLeadingOnes() >= SrcBits - DstBits) { + if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { // Pull in the high bits from known-ones set. APInt NewRHS = C->zext(SrcBits); - NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); + NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS)); } } @@ -4001,16 +4000,16 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { IsSignBit = isSignBitCheck(Pred, *CmpC, UnusedBit); } - APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); - APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + KnownBits Op0Known(BitWidth); + KnownBits Op1Known(BitWidth); if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth, IsSignBit), - Op0KnownZero, Op0KnownOne, 0)) + Op0Known, 0)) return &I; if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) + Op1Known, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be @@ -4019,15 +4018,11 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); if (I.isSigned()) { - computeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min, - Op0Max); - computeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min, - Op1Max); + computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); + computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } else { - computeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min, - Op0Max); - computeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min, - Op1Max); + computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); + computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } // If Min and Max are known to be the same, then SimplifyDemandedBits @@ -4054,8 +4049,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // If all bits are known zero except for one, then we know at most one bit // is set. If the comparison is against zero, then this is a check to see if // *that* bit is set. - APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0) { + APInt Op0KnownZeroInverted = ~Op0Known.Zero; + if (~Op1Known.Zero == 0) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; const APInt *LHSC; @@ -4193,8 +4188,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // Turn a signed comparison into an unsigned one if both operands are known to // have the same sign. if (I.isSigned() && - ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || - (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) || + (Op0Known.One.isNegative() && Op1Known.One.isNegative()))) return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 71000063ab3c..776686d3d117 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -489,10 +489,9 @@ public: return nullptr; // Don't do anything with FI } - void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + void computeKnownBits(Value *V, KnownBits &Known, unsigned Depth, Instruction *CxtI) const { - return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, - &DT); + return llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); } bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0, @@ -536,24 +535,23 @@ private: /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - Instruction *CxtI); + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, + unsigned Depth, Instruction *CxtI); bool SimplifyDemandedBits(Instruction *I, unsigned Op, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0); + const APInt &DemandedMask, KnownBits &Known, + unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne /// bits. It also tries to handle simplifications that can be done based on /// DemandedMask, but without modifying the Instruction. Value *SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, - APInt &KnownZero, APInt &KnownOne, + KnownBits &Known, unsigned Depth, Instruction *CxtI); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. - Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne); + Value *simplifyShrShlDemandedBits( + Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, + const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known); /// \brief Tries to simplify operands to an integer instruction based on its /// demanded bits. diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5d6d899da4b5..76829c5e457b 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -1476,11 +1477,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // The motivation for this call into value tracking is to take advantage of // the assumption cache, so make sure that is populated. if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { - APInt KnownOne(1, 0), KnownZero(1, 0); - computeKnownBits(CondVal, KnownZero, KnownOne, 0, &SI); - if (KnownOne == 1) + KnownBits Known(1); + computeKnownBits(CondVal, Known, 0, &SI); + if (Known.One == 1) return replaceInstUsesWith(SI, TrueVal); - if (KnownZero == 1) + if (Known.Zero == 1) return replaceInstUsesWith(SI, FalseVal); } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 2ba052b7e02d..8d0ed8532779 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -26,7 +27,7 @@ using namespace llvm::PatternMatch; /// constant integer. If so, check to see if there are any bits set in the /// constant that are not demanded. If so, shrink the constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { + const APInt &Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -37,13 +38,11 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded = Demanded.zextOrTrunc(C->getBitWidth()); if (C->isSubsetOf(Demanded)) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= *C; - I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); + I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded)); return true; } @@ -54,10 +53,10 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, /// the instruction has any properties that allow us to simplify its operands. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + KnownBits Known(BitWidth); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known, 0, &Inst); if (!V) return false; if (V == &Inst) return true; @@ -70,11 +69,11 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { /// change and false otherwise. bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, - APInt &KnownZero, APInt &KnownOne, + KnownBits &Known, unsigned Depth) { Use &U = I->getOperandUse(OpNo); - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, - KnownOne, Depth, I); + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, + Depth, I); if (!NewVal) return false; U = NewVal; return true; @@ -88,15 +87,16 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// with a constant or one of its operands. In such cases, this function does /// the replacement and returns true. In all other cases, it returns false after /// analyzing the expression and setting KnownOne and known to be one in the -/// expression. KnownZero contains all the bits that are known to be zero in the -/// expression. These are provided to potentially allow the caller (which might -/// recursively be SimplifyDemandedBits itself) to simplify the expression. -/// KnownOne and KnownZero always follow the invariant that: -/// KnownOne & KnownZero == 0. -/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and -/// KnownZero may only be accurate for those bits set in DemandedMask. Note also -/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the -/// same. +/// expression. Known.Zero contains all the bits that are known to be zero in +/// the expression. These are provided to potentially allow the caller (which +/// might recursively be SimplifyDemandedBits itself) to simplify the +/// expression. +/// Known.One and Known.Zero always follow the invariant that: +/// Known.One & Known.Zero == 0. +/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and +/// Known.Zero may only be accurate for those bits set in DemandedMask. Note +/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all +/// be the same. /// /// This returns null if it did not change anything and it permits no /// simplification. This returns V itself if it did some simplification of V's @@ -104,8 +104,7 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// some other non-null value if it found out that V is equal to another value /// in the context where the specified bits are demanded, but not for all users. Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth, + KnownBits &Known, unsigned Depth, Instruction *CxtI) { assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); @@ -113,18 +112,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Type *VTy = V->getType(); assert( (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); + Known.getBitWidth() == BitWidth && + "Value *V, DemandedMask and Known must have same BitWidth"); if (isa<Constant>(V)) { - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; } - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); if (DemandedMask == 0) // Not demanding any bits from V. return UndefValue::get(VTy); @@ -133,20 +130,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *I = dyn_cast<Instruction>(V); if (!I) { - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; // Only analyze instructions. } // If there are multiple uses of this value and we aren't at the root, then // we can't do any simplifications of the operands, because DemandedMask // only reflects the bits demanded by *one* of the users. - if (Depth != 0 && !I->hasOneUse()) { - return SimplifyMultipleUseDemandedBits(I, DemandedMask, KnownZero, KnownOne, - Depth, CxtI); - } + if (Depth != 0 && !I->hasOneUse()) + return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits so that we can try to simplify the @@ -157,22 +151,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, + Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // Output known-0 are known to be clear if zero in either the LHS | RHS. - APInt IKnownZero = RHSKnownZero | LHSKnownZero; + APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; // Output known-1 bits are only known if set in both the LHS & RHS. - APInt IKnownOne = RHSKnownOne & LHSKnownOne; + APInt IKnownOne = RHSKnown.One & LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -181,33 +174,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) return I; - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Or: { // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, + Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // Output known-0 bits are only known if clear in both the LHS & RHS. - APInt IKnownZero = RHSKnownZero & LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - APInt IKnownOne = RHSKnownOne | LHSKnownOne; + APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; + // Output known-1 are known. to be set if s.et in either the LHS | RHS. + APInt IKnownOne = RHSKnown.One | LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -216,34 +208,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Xor: { - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); + APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | + (RHSKnown.One & LHSKnown.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); + APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | + (RHSKnown.One & LHSKnown.Zero); // If the client is only demanding bits that we know, return the known // constant. @@ -252,15 +242,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if (DemandedMask.isSubsetOf(RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) { + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) { Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -271,10 +261,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) && - RHSKnownOne.isSubsetOf(LHSKnownOne)) { + if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) && + RHSKnown.One.isSubsetOf(LHSKnown.One)) { Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); + ~RHSKnown.One & DemandedMask); Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); return InsertNewInstWith(And, *I); } @@ -292,10 +282,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && isa<ConstantInt>(I->getOperand(1)) && isa<ConstantInt>(LHSInst->getOperand(1)) && - (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); - APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); @@ -309,9 +299,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZero = std::move(IKnownZero); + Known.Zero = std::move(IKnownZero); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = std::move(IKnownOne); + Known.One = std::move(IKnownOne); break; } case Instruction::Select: @@ -321,13 +311,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask) || @@ -335,21 +323,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Only known if known in both the LHS and RHS. - KnownOne = RHSKnownOne & LHSKnownOne; - KnownZero = RHSKnownZero & LHSKnownZero; + Known.One = RHSKnown.One & LHSKnown.One; + Known.Zero = RHSKnown.Zero & LHSKnown.Zero; break; case Instruction::Trunc: { unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask = DemandedMask.zext(truncBf); - KnownZero = KnownZero.zext(truncBf); - KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + Known.Zero = Known.Zero.zext(truncBf); + Known.One = Known.One.zext(truncBf); + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: @@ -369,27 +356,25 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask = DemandedMask.trunc(SrcBitWidth); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + Known.Zero = Known.Zero.trunc(SrcBitWidth); + Known.One = Known.One.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; DemandedMask = DemandedMask.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); // The top bits are known to be zero. - KnownZero.setBitsFrom(SrcBitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); break; } case Instruction::SExt: { @@ -406,27 +391,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits.setBit(SrcBitWidth-1); InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne, - Depth + 1)) + Known.Zero = Known.Zero.trunc(SrcBitWidth); + Known.One = Known.One.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1)) return I; InputDemandedBits = InputDemandedBits.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { // Convert to ZExt cast CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstWith(NewCast, *I); - } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set - KnownOne |= NewBits; + } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set + Known.One |= NewBits; } break; } @@ -440,11 +424,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || - SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne, - Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || ShrinkDemandedConstant(I, 1, DemandedFromOps) || - SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne, - Depth + 1)) { + SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { // Disable the nsw and nuw flags here: We can no longer guarantee that // we won't wrap after simplification. Removing the nsw/nuw flags is // legal here because the top bit is not demanded. @@ -456,30 +438,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If we are known to be adding/subtracting zeros to every bit below // the highest demanded bit, we just return the other side. - if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps) + if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); // We can't do this with the LHS for subtraction. if (I->getOpcode() == Instruction::Add && - (DemandedFromOps & LHSKnownZero) == DemandedFromOps) + DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); } // Otherwise just hand the add/sub off to computeKnownBits to fill in // the known zeros and ones. - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - { - Value *VarX; ConstantInt *C1; - if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { - Instruction *Shr = cast<Instruction>(I->getOperand(0)); - Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, - KnownZero, KnownOne); - if (R) - return R; - } + case Instruction::Shl: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { + const APInt *ShrAmt; + if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) { + Instruction *Shr = cast<Instruction>(I->getOperand(0)); + if (Value *R = simplifyShrShlDemandedBits( + Shr, *ShrAmt, I, *SA, DemandedMask, Known)) + return R; } uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); @@ -492,17 +472,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn.setHighBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + Known.Zero <<= ShiftAmt; + Known.One <<= ShiftAmt; // low bits known zero. if (ShiftAmt) - KnownZero.setLowBits(ShiftAmt); + Known.Zero.setLowBits(ShiftAmt); } break; + } case Instruction::LShr: { const APInt *SA; if (match(I->getOperand(1), m_APInt(SA))) { @@ -516,14 +496,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<LShrOperator>(I)->isExact()) DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero.lshrInPlace(ShiftAmt); - KnownOne.lshrInPlace(ShiftAmt); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); if (ShiftAmt) - KnownZero.setHighBits(ShiftAmt); // high bits known zero. + Known.Zero.setHighBits(ShiftAmt); // high bits known zero. } break; } @@ -560,15 +539,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<AShrOperator>(I)->isExact()) DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero.lshrInPlace(ShiftAmt); - KnownOne.lshrInPlace(ShiftAmt); + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); // Handle the sign bits. APInt SignMask(APInt::getSignMask(BitWidth)); @@ -577,14 +555,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { + if (BitWidth <= ShiftAmt || Known.Zero[BitWidth-ShiftAmt-1] || + !DemandedMask.intersects(HighBits)) { BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), I->getOperand(1)); LShr->setIsExact(cast<BinaryOperator>(I)->isExact()); return InsertNewInstWith(LShr, *I); - } else if ((KnownOne & SignMask) != 0) { // New bits are known one. - KnownOne |= HighBits; + } else if (Known.One.intersects(SignMask)) { // New bits are known one. + Known.One |= HighBits; } } break; @@ -602,25 +580,24 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); - if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. - KnownZero = LHSKnownZero & LowBits; - KnownOne = LHSKnownOne & LowBits; + Known.Zero = LHSKnown.Zero & LowBits; + Known.One = LHSKnown.One & LowBits; // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero)) + Known.Zero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0)) - KnownOne |= ~LowBits; + if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One)) + Known.One |= ~LowBits; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); break; } } @@ -628,22 +605,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. if (DemandedMask.isSignBitSet()) { - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isSignBitSet()) - KnownZero.setSignBit(); + if (LHSKnown.Zero.isSignBitSet()) + Known.Zero.setSignBit(); } break; case Instruction::URem: { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + KnownBits Known2(BitWidth); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) || - SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1)) + if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) || + SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1)) return I; - unsigned Leaders = KnownZero2.countLeadingOnes(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + unsigned Leaders = Known2.Zero.countLeadingOnes(); + Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } case Instruction::Call: @@ -707,56 +683,54 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return ConstantInt::getNullValue(VTy); // We know that the upper bits are set to zero. - KnownZero.setBitsFrom(ArgWidth); + Known.Zero.setBitsFrom(ArgWidth); return nullptr; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero.setBitsFrom(32); + Known.Zero.setBitsFrom(32); return nullptr; } } - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) - return Constant::getIntegerValue(VTy, KnownOne); + if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + return Constant::getIntegerValue(VTy, Known.One); return nullptr; } -/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne +/// Helper routine of SimplifyDemandedUseBits. It computes Known /// bits. It also tries to handle simplifications that can be done based on /// DemandedMask, but without modifying the Instruction. Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, unsigned Depth, Instruction *CxtI) { unsigned BitWidth = DemandedMask.getBitWidth(); Type *ITy = I->getType(); - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + KnownBits LHSKnown(BitWidth); + KnownBits RHSKnown(BitWidth); // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can + // context, we can at least compute the known bits, and we can // do simplifications that apply to *just* the one user if we know that // this instruction has a simpler value in that context. switch (I->getOpcode()) { case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // Output known-0 are known to be clear if zero in either the LHS | RHS. - APInt IKnownZero = RHSKnownZero | LHSKnownZero; + APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; // Output known-1 bits are only known if set in both the LHS & RHS. - APInt IKnownOne = RHSKnownOne & LHSKnownOne; + APInt IKnownOne = RHSKnown.One & LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -766,13 +740,13 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this // context. - if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Or: { @@ -780,15 +754,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // Output known-0 bits are only known if clear in both the LHS & RHS. - APInt IKnownZero = RHSKnownZero & LHSKnownZero; + APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - APInt IKnownOne = RHSKnownOne | LHSKnownOne; + APInt IKnownOne = RHSKnown.One | LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -798,30 +771,29 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this // context. - if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) return I->getOperand(1); - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Xor: { // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); + APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | + (RHSKnown.One & LHSKnown.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); + APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | + (RHSKnown.One & LHSKnown.Zero); // If the client is only demanding bits that we know, return the known // constant. @@ -830,25 +802,25 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If all of the demanded bits are known zero on one side, return the // other. - if (DemandedMask.isSubsetOf(RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZero = std::move(IKnownZero); + Known.Zero = std::move(IKnownZero); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = std::move(IKnownOne); + Known.One = std::move(IKnownOne); break; } default: - // Compute the KnownZero/KnownOne bits to simplify things downstream. - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); + // Compute the Known bits to simplify things downstream. + computeKnownBits(I, Known, Depth, CxtI); // If this user is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) - return Constant::getIntegerValue(ITy, KnownOne); + if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + return Constant::getIntegerValue(ITy, Known.One); break; } @@ -874,29 +846,26 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, /// /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was /// not successful. -Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, - Instruction *Shl, - const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne) { - - const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); - const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); +Value * +InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, + Instruction *Shl, const APInt &ShlOp1, + const APInt &DemandedMask, + KnownBits &Known) { if (!ShlOp1 || !ShrOp1) - return nullptr; // Noop. + return nullptr; // No-op. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); - unsigned BitWidth = Ty->getIntegerBitWidth(); + unsigned BitWidth = Ty->getScalarSizeInBits(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) return nullptr; // Undef. unsigned ShlAmt = ShlOp1.getZExtValue(); unsigned ShrAmt = ShrOp1.getZExtValue(); - KnownOne.clearAllBits(); - KnownZero.setLowBits(ShlAmt - 1); - KnownZero &= DemandedMask; + Known.One.clearAllBits(); + Known.Zero.setLowBits(ShlAmt - 1); + Known.Zero &= DemandedMask; APInt BitMask1(APInt::getAllOnesValue(BitWidth)); APInt BitMask2(APInt::getAllOnesValue(BitWidth)); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 81f2d9fa179f..4729c79ca4c3 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -60,6 +60,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" @@ -641,14 +642,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { // They do! Return "L op' R". ++NumExpand; - // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. - if ((L == A && R == B) || - (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) - return Op0; - // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) - return V; - // Otherwise, create a new instruction. C = Builder->CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; @@ -666,14 +659,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { // They do! Return "L op' R". ++NumExpand; - // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. - if ((L == B && R == C) || - (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) - return Op1; - // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) - return V; - // Otherwise, create a new instruction. A = Builder->CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; @@ -2196,11 +2181,10 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { // There might be assume intrinsics dominating this return that completely // determine the value. If so, constant fold it. - unsigned BitWidth = VTy->getPrimitiveSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI); - if ((KnownZero|KnownOne).isAllOnesValue()) - RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne)); + KnownBits Known(VTy->getPrimitiveSizeInBits()); + computeKnownBits(ResultOp, Known, 0, &RI); + if ((Known.Zero|Known.One).isAllOnesValue()) + RI.setOperand(0, Constant::getIntegerValue(VTy, Known.One)); return nullptr; } @@ -2279,10 +2263,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { } unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI); - unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); - unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); + KnownBits Known(BitWidth); + computeKnownBits(Cond, Known, 0, &SI); + unsigned LeadingKnownZeros = Known.Zero.countLeadingOnes(); + unsigned LeadingKnownOnes = Known.One.countLeadingOnes(); // Compute the number of leading bits we can ignore. // TODO: A better way to determine this would use ComputeNumSignBits(). @@ -2879,11 +2863,10 @@ bool InstCombiner::run() { Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { unsigned BitWidth = Ty->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I); - if ((KnownZero | KnownOne).isAllOnesValue()) { - Constant *C = ConstantInt::get(Ty, KnownOne); + KnownBits Known(BitWidth); + computeKnownBits(I, Known, /*Depth*/0, I); + if ((Known.Zero | Known.One).isAllOnesValue()) { + Constant *C = ConstantInt::get(Ty, Known.One); DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << " from: " << *I << '\n'); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 036dd8d39a08..b866958e3c4b 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -265,11 +265,10 @@ static cl::opt<bool> cl::Hidden, cl::init(false)); static cl::opt<bool> - ClUseMachOGlobalsSection("asan-globals-live-support", - cl::desc("Use linker features to support dead " - "code stripping of globals " - "(Mach-O only)"), - cl::Hidden, cl::init(true)); + ClUseGlobalsGC("asan-globals-live-support", + cl::desc("Use linker features to support dead " + "code stripping of globals"), + cl::Hidden, cl::init(true)); // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -594,13 +593,15 @@ struct AddressSanitizer : public FunctionPass { }; class AddressSanitizerModule : public ModulePass { - public: +public: explicit AddressSanitizerModule(bool CompileKernel = false, - bool Recover = false) + bool Recover = false, + bool UseGlobalsGC = true) : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), - Recover(Recover || ClRecover) {} + Recover(Recover || ClRecover), + UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC) {} bool runOnModule(Module &M) override; - static char ID; // Pass identification, replacement for typeid + static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } private: @@ -635,6 +636,7 @@ private: GlobalsMetadata GlobalsMD; bool CompileKernel; bool Recover; + bool UseGlobalsGC; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -913,9 +915,10 @@ INITIALIZE_PASS( "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, - bool Recover) { + bool Recover, + bool UseGlobalsGC) { assert(!CompileKernel || Recover); - return new AddressSanitizerModule(CompileKernel, Recover); + return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -1537,9 +1540,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // binary in order to allow the linker to properly dead strip. This is only // supported on recent versions of ld64. bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const { - if (!ClUseMachOGlobalsSection) - return false; - if (!TargetTriple.isOSBinFormatMachO()) return false; @@ -1911,9 +1911,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Initializers[i] = Initializer; } - if (TargetTriple.isOSBinFormatCOFF()) { + if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); - } else if (ShouldUseMachOGlobalsSection()) { + } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); } else { InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 61d627673c90..d7eb857cff7e 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -943,14 +943,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { BasicBlock *BB = MI->getParent(); DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); DEBUG(dbgs() << *BB << "\n"); + auto OrigBBFreq = BFI.getBlockFreq(BB); BasicBlock *DefaultBB = SplitBlock(BB, MI); BasicBlock::iterator It(*MI); ++It; assert(It != DefaultBB->end()); BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); - DefaultBB->setName("MemOP.Default"); MergeBB->setName("MemOP.Merge"); + BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); + DefaultBB->setName("MemOP.Default"); auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index a5afc8ad977c..c1bbc4e96b16 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -351,8 +351,10 @@ bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, ARCInstKind Class) { - // Check for possible releases. - if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + // Check for possible releases. Treat clang.arc.use as a releasing instruction + // to prevent sinking a retain past it. + if (!CanAlterRefCount(Inst, Ptr, PA, Class) && + Class != ARCInstKind::IntrinsicUser) return false; DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index ee6333e88716..f62e111460ca 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -53,6 +53,12 @@ using namespace consthoist; STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); +static cl::opt<bool> ConstHoistWithBlockFrequency( + "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to reduce the " + "chance to execute const materialization more frequently than " + "without hoisting.")); + namespace { /// \brief The constant hoisting pass. class ConstantHoistingLegacyPass : public FunctionPass { @@ -68,6 +74,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (ConstHoistWithBlockFrequency) + AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); } @@ -82,6 +90,7 @@ private: char ConstantHoistingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", @@ -99,9 +108,13 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) { DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n"); DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); - bool MadeChange = Impl.runImpl( - Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn), - getAnalysis<DominatorTreeWrapperPass>().getDomTree(), Fn.getEntryBlock()); + bool MadeChange = + Impl.runImpl(Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn), + getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + ConstHoistWithBlockFrequency + ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI() + : nullptr, + Fn.getEntryBlock()); if (MadeChange) { DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -148,33 +161,142 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, return IDom->getBlock()->getTerminator(); } +/// \brief Given \p BBs as input, find another set of BBs which collectively +/// dominates \p BBs and have the minimal sum of frequencies. Return the BB +/// set found in \p BBs. +void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, + BasicBlock *Entry, + SmallPtrSet<BasicBlock *, 8> &BBs) { + assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); + // Nodes on the current path to the root. + SmallPtrSet<BasicBlock *, 8> Path; + // Candidates includes any block 'BB' in set 'BBs' that is not strictly + // dominated by any other blocks in set 'BBs', and all nodes in the path + // in the dominator tree from Entry to 'BB'. + SmallPtrSet<BasicBlock *, 16> Candidates; + for (auto BB : BBs) { + Path.clear(); + // Walk up the dominator tree until Entry or another BB in BBs + // is reached. Insert the nodes on the way to the Path. + BasicBlock *Node = BB; + // The "Path" is a candidate path to be added into Candidates set. + bool isCandidate = false; + do { + Path.insert(Node); + if (Node == Entry || Candidates.count(Node)) { + isCandidate = true; + break; + } + assert(DT.getNode(Node)->getIDom() && + "Entry doens't dominate current Node"); + Node = DT.getNode(Node)->getIDom()->getBlock(); + } while (!BBs.count(Node)); + + // If isCandidate is false, Node is another Block in BBs dominating + // current 'BB'. Drop the nodes on the Path. + if (!isCandidate) + continue; + + // Add nodes on the Path into Candidates. + Candidates.insert(Path.begin(), Path.end()); + } + + // Sort the nodes in Candidates in top-down order and save the nodes + // in Orders. + unsigned Idx = 0; + SmallVector<BasicBlock *, 16> Orders; + Orders.push_back(Entry); + while (Idx != Orders.size()) { + BasicBlock *Node = Orders[Idx++]; + for (auto ChildDomNode : DT.getNode(Node)->getChildren()) { + if (Candidates.count(ChildDomNode->getBlock())) + Orders.push_back(ChildDomNode->getBlock()); + } + } + + // Visit Orders in bottom-up order. + typedef std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency> + InsertPtsCostPair; + // InsertPtsMap is a map from a BB to the best insertion points for the + // subtree of BB (subtree not including the BB itself). + DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap; + InsertPtsMap.reserve(Orders.size() + 1); + for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { + BasicBlock *Node = *RIt; + bool NodeInBBs = BBs.count(Node); + SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first; + BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; + + // Return the optimal insert points in BBs. + if (Node == Entry) { + BBs.clear(); + if (InsertPtsFreq > BFI.getBlockFreq(Node)) + BBs.insert(Entry); + else + BBs.insert(InsertPts.begin(), InsertPts.end()); + break; + } + + BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); + // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child + // will update its parent's ParentInsertPts and ParentPtsFreq. + SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first; + BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; + // Choose to insert in Node or in subtree of Node. + if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + ParentInsertPts.insert(Node); + ParentPtsFreq += BFI.getBlockFreq(Node); + } else { + ParentInsertPts.insert(InsertPts.begin(), InsertPts.end()); + ParentPtsFreq += InsertPtsFreq; + } + } +} + /// \brief Find an insertion point that dominates all uses. -Instruction *ConstantHoistingPass::findConstantInsertionPoint( +SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint( const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all basic blocks. SmallPtrSet<BasicBlock *, 8> BBs; + SmallPtrSet<Instruction *, 8> InsertPts; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); - if (BBs.count(Entry)) - return &Entry->front(); + if (BBs.count(Entry)) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } + + if (BFI) { + findBestInsertionSet(*DT, *BFI, Entry, BBs); + for (auto BB : BBs) { + BasicBlock::iterator InsertPt = BB->begin(); + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + ; + InsertPts.insert(&*InsertPt); + } + return InsertPts; + } while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; BB1 = *BBs.begin(); BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); - if (BB == Entry) - return &Entry->front(); + if (BB == Entry) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); - return findMatInsertPt(&FirstInst); + InsertPts.insert(findMatInsertPt(&FirstInst)); + return InsertPts; } @@ -557,29 +679,54 @@ bool ConstantHoistingPass::emitBaseConstants() { bool MadeChange = false; for (auto const &ConstInfo : ConstantVec) { // Hoist and hide the base constant behind a bitcast. - Instruction *IP = findConstantInsertionPoint(ConstInfo); - IntegerType *Ty = ConstInfo.BaseConstant->getType(); - Instruction *Base = - new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); - DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB " - << IP->getParent()->getName() << '\n' << *Base << '\n'); - NumConstantsHoisted++; + SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo); + assert(!IPSet.empty() && "IPSet is empty"); + + unsigned UsesNum = 0; + unsigned ReBasesNum = 0; + for (Instruction *IP : IPSet) { + IntegerType *Ty = ConstInfo.BaseConstant->getType(); + Instruction *Base = + new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); + DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant + << ") to BB " << IP->getParent()->getName() << '\n' + << *Base << '\n'); + + // Emit materialization code for all rebased constants. + unsigned Uses = 0; + for (auto const &RCI : ConstInfo.RebasedConstants) { + for (auto const &U : RCI.Uses) { + Uses++; + BasicBlock *OrigMatInsertBB = + findMatInsertPt(U.Inst, U.OpndIdx)->getParent(); + // If Base constant is to be inserted in multiple places, + // generate rebase for U using the Base dominating U. + if (IPSet.size() == 1 || + DT->dominates(Base->getParent(), OrigMatInsertBB)) { + emitBaseConstants(Base, RCI.Offset, U); + ReBasesNum++; + } + } + } + UsesNum = Uses; - // Emit materialization code for all rebased constants. - for (auto const &RCI : ConstInfo.RebasedConstants) { - NumConstantsRebased++; - for (auto const &U : RCI.Uses) - emitBaseConstants(Base, RCI.Offset, U); + // Use the same debug location as the last user of the constant. + assert(!Base->use_empty() && "The use list is empty!?"); + assert(isa<Instruction>(Base->user_back()) && + "All uses should be instructions."); + Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc()); } + (void)UsesNum; + (void)ReBasesNum; + // Expect all uses are rebased after rebase is done. + assert(UsesNum == ReBasesNum && "Not all uses are rebased"); + + NumConstantsHoisted++; - // Use the same debug location as the last user of the constant. - assert(!Base->use_empty() && "The use list is empty!?"); - assert(isa<Instruction>(Base->user_back()) && - "All uses should be instructions."); - Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc()); + // Base constant is also included in ConstInfo.RebasedConstants, so + // deduct 1 from ConstInfo.RebasedConstants.size(). + NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1; - // Correct for base constant, which we counted above too. - NumConstantsRebased--; MadeChange = true; } return MadeChange; @@ -595,9 +742,11 @@ void ConstantHoistingPass::deleteDeadCastInst() const { /// \brief Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, - DominatorTree &DT, BasicBlock &Entry) { + DominatorTree &DT, BlockFrequencyInfo *BFI, + BasicBlock &Entry) { this->TTI = &TTI; this->DT = &DT; + this->BFI = BFI; this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -628,7 +777,10 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &TTI = AM.getResult<TargetIRAnalysis>(F); - if (!runImpl(F, TTI, DT, F.getEntryBlock())) + auto BFI = ConstHoistWithBlockFrequency + ? &AM.getResult<BlockFrequencyAnalysis>(F) + : nullptr; + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index c843c61ea94e..b5a4cc2f3953 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" @@ -26,6 +26,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -95,7 +96,8 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { return true; } -static bool processPHI(PHINode *P, LazyValueInfo *LVI) { +static bool processPHI(PHINode *P, LazyValueInfo *LVI, + const SimplifyQuery &SQ) { bool Changed = false; BasicBlock *BB = P->getParent(); @@ -149,9 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI) { Changed = true; } - // FIXME: Provide TLI, DT, AT to SimplifyInstruction. - const DataLayout &DL = BB->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(P, DL)) { + if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -488,9 +488,8 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { ConstantInt::getFalse(C->getContext()); } -static bool runImpl(Function &F, LazyValueInfo *LVI) { +static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { bool FnChanged = false; - // Visiting in a pre-order depth-first traversal causes us to simplify early // blocks before querying later blocks (which require us to analyze early // blocks). Eagerly simplifying shallow blocks means there is strictly less @@ -505,7 +504,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) { BBChanged |= processSelect(cast<SelectInst>(II), LVI); break; case Instruction::PHI: - BBChanged |= processPHI(cast<PHINode>(II), LVI); + BBChanged |= processPHI(cast<PHINode>(II), LVI, SQ); break; case Instruction::ICmp: case Instruction::FCmp: @@ -566,14 +565,25 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { return false; LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - return runImpl(F, LVI); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>(); + auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; + const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); + return runImpl(F, LVI, SQ); } PreservedAnalyses CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); - bool Changed = runImpl(F, LVI); + auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); + auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); + auto *AC = AM.getCachedResult<AssumptionAnalysis>(F); + const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); + bool Changed = runImpl(F, LVI, SQ); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index 7019287954a1..48eda09c463e 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -537,9 +538,9 @@ bool GuardWideningImpl::parseRangeChecks( } else if (match(Check.getBase(), m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(OpLHS, KnownZero, KnownOne, DL); - if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) { + KnownBits Known(BitWidth); + computeKnownBits(OpLHS, Known, DL); + if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { Check.setBase(OpLHS); APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); Check.setOffset(ConstantInt::get(Ctx, NewOffset)); diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5d8701431a2c..9e2563879da2 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -152,15 +152,15 @@ private: Function *F) const; void appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack, - DenseSet<Value *> *Visited) const; + Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, + DenseSet<Value *> &Visited) const; bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const; void collectRewritableIntrinsicOperands( IntrinsicInst *II, - std::vector<std::pair<Value *, bool>> *PostorderStack, - DenseSet<Value *> *Visited) const; + std::vector<std::pair<Value *, bool>> &PostorderStack, + DenseSet<Value *> &Visited) const; std::vector<Value *> collectFlatAddressExpressions(Function &F) const; @@ -204,7 +204,6 @@ static bool isAddressExpression(const Value &V) { // // Precondition: V is an address expression. static SmallVector<Value *, 2> getPointerOperands(const Value &V) { - assert(isAddressExpression(V)); const Operator &Op = cast<Operator>(V); switch (Op.getOpcode()) { case Instruction::PHI: { @@ -254,8 +253,8 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, // TODO: Move logic to TTI? void InferAddressSpaces::collectRewritableIntrinsicOperands( - IntrinsicInst *II, std::vector<std::pair<Value *, bool>> *PostorderStack, - DenseSet<Value *> *Visited) const { + IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack, + DenseSet<Value *> &Visited) const { switch (II->getIntrinsicID()) { case Intrinsic::objectsize: case Intrinsic::amdgcn_atomic_inc: @@ -272,13 +271,13 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands( // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack, - DenseSet<Value *> *Visited) const { + Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, + DenseSet<Value *> &Visited) const { assert(V->getType()->isPointerTy()); if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { - if (Visited->insert(V).second) - PostorderStack->push_back(std::make_pair(V, false)); + if (Visited.insert(V).second) + PostorderStack.push_back(std::make_pair(V, false)); } } @@ -293,14 +292,18 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { DenseSet<Value *> Visited; auto PushPtrOperand = [&](Value *Ptr) { - appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack, - &Visited); + appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, + Visited); }; - // We only explore address expressions that are reachable from loads and - // stores for now because we aim at generating faster loads and stores. + // Look at operations that may be interesting accelerate by moving to a known + // address space. We aim at generating after loads and stores, but pure + // addressing calculations may also be faster. for (Instruction &I : instructions(F)) { - if (auto *LI = dyn_cast<LoadInst>(&I)) + if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { + if (!GEP->getType()->isVectorTy()) + PushPtrOperand(GEP->getPointerOperand()); + } else if (auto *LI = dyn_cast<LoadInst>(&I)) PushPtrOperand(LI->getPointerOperand()); else if (auto *SI = dyn_cast<StoreInst>(&I)) PushPtrOperand(SI->getPointerOperand()); @@ -316,7 +319,7 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { if (auto *MTI = dyn_cast<MemTransferInst>(MI)) PushPtrOperand(MTI->getRawSource()); } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) - collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited); + collectRewritableIntrinsicOperands(II, PostorderStack, Visited); else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) { // FIXME: Handle vectors of pointers if (Cmp->getOperand(0)->getType()->isPointerTy()) { @@ -338,8 +341,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().second = true; for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { - appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack, - &Visited); + appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, + Visited); } } return Postorder; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 08eb95a1a3d3..a0da81605a80 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1289,6 +1289,36 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredToDestList.size() == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + bool SeenFirstBranchToOnlyDest = false; + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. + else + SuccBB->removePredecessor(BB, true); // This is unreachable successor. + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast<Instruction>(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + // FIXME: in case this instruction is defined in the current BB and it + // resolves to a single value from all predecessors, we can do RAUW. + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 946d85d7360f..5042fc18d7c4 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -345,6 +345,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, if (!SI->isSimple()) return false; + // Don't convert stores of non-integral pointer types to memsets (which stores + // integers). + if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) + return false; + // Avoid merging nontemporal stores. if (SI->getMetadata(LLVMContext::MD_nontemporal)) return false; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index e5689368de80..8ce96cf1b7a6 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -58,13 +58,14 @@ class LoopRotate { AssumptionCache *AC; DominatorTree *DT; ScalarEvolution *SE; + const SimplifyQuery &SQ; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE) - : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) { - } + DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), + SQ(SQ) {} bool processLoop(Loop *L); private: @@ -311,8 +312,6 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); @@ -342,8 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - // FIXME: Provide TLI, DT, AC to SimplifyInstruction. - Value *V = SimplifyInstruction(C, DL); + Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C)); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. @@ -671,7 +669,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; - LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE); + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ); bool Changed = LR.processLoop(&L); if (!Changed) @@ -714,7 +714,11 @@ public: auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE); + auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ); return LR.processLoop(L); } }; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index a99c9999c619..8fa806a7e8bc 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This pass transforms loops that contain branches on loop-invariant conditions -// to have multiple loops. For example, it turns the left into the right code: +// to multiple loops. For example, it turns the left into the right code: // // for (...) if (lic) // A for (...) diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 659353e912fe..49ce0262c97b 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -352,20 +352,10 @@ Value *StructurizeCFG::invert(Value *Condition) { if (Instruction *Inst = dyn_cast<Instruction>(Condition)) { // Third: Check all the users for an invert BasicBlock *Parent = Inst->getParent(); - for (User *U : Condition->users()) { - if (Instruction *I = dyn_cast<Instruction>(U)) { + for (User *U : Condition->users()) + if (Instruction *I = dyn_cast<Instruction>(U)) if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; - } - } - - // Avoid creating a new instruction in the common case of a compare. - if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) { - if (Cmp->hasOneUse()) { - Cmp->setPredicate(Cmp->getInversePredicate()); - return Cmp; - } - } // Last option: Create a new instruction return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 1cfe3bd53648..7ffdad597a9b 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, unsigned HiBits = LongLen - ShortLen; const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - APInt Zeros(LongLen, 0), Ones(LongLen, 0); + KnownBits Known(LongLen); - computeKnownBits(V, Zeros, Ones, DL); + computeKnownBits(V, Known, DL); - if (Zeros.countLeadingOnes() >= HiBits) + if (Known.Zero.countLeadingOnes() >= HiBits) return VALRNG_KNOWN_SHORT; - if (Ones.countLeadingZeros() < HiBits) + if (Known.One.countLeadingZeros() < HiBits) return VALRNG_LIKELY_LONG; // Long integer divisions are often used in hashtable implementations. It's diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 82552684b832..ed72099ec3ed 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -73,24 +73,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { } /// \brief Build a set of blocks to extract if the input blocks are viable. -template <typename IteratorT> -static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, - IteratorT BBEnd) { +static SetVector<BasicBlock *> +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector<BasicBlock *> Result; - assert(BBBegin != BBEnd); - // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - do { - if (!Result.insert(*BBBegin)) - llvm_unreachable("Repeated basic blocks in extraction input"); + for (BasicBlock *BB : BBs) { - if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + if (!CodeExtractor::isBlockValidForExtraction(*BB)) { Result.clear(); return Result; } - } while (++BBBegin != BBEnd); + } #ifndef NDEBUG for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), @@ -106,23 +108,17 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, return Result; } -/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. -static SetVector<BasicBlock *> -buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { - return buildExtractionBlockSet(BBs.begin(), BBs.end()); -} - CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), NumExitBlocks(~0U) {} /// definedInRegion - Return true if the specified value is defined in the @@ -194,9 +190,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); - BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, - Header->getName()+".ce"); + BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -205,11 +199,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { Blocks.insert(NewBB); Header = NewBB; - // Okay, update dominator sets. The blocks that dominate the new one are the - // blocks that dominate TIBB plus the new block itself. - if (DT) - DT->splitBlock(NewBB); - // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { @@ -224,12 +213,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); + PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8c5442762643..d3002c5fb750 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -1038,9 +1039,9 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); - unsigned TrailZ = KnownZero.countTrailingOnes(); + KnownBits Known(BitWidth); + computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); + unsigned TrailZ = Known.Zero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. @@ -1268,21 +1269,37 @@ static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) { } } -/// Prepend \p DIExpr with a deref and offset operation. +enum { WithStackValue = true }; + +/// Prepend \p DIExpr with a deref and offset operation and optionally turn it +/// into a stack value. static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, - bool Deref, int64_t Offset) { - if (!Deref && !Offset) + bool Deref, int64_t Offset = 0, + bool StackValue = false) { + if (!Deref && !Offset && !StackValue) return DIExpr; - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector<uint64_t, 4> Ops; + + SmallVector<uint64_t, 8> Ops; + appendOffset(Ops, Offset); if (Deref) Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); if (DIExpr) - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + for (auto Op : DIExpr->expr_ops()) { + // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. + if (StackValue) { + if (Op.getOp() == dwarf::DW_OP_stack_value) + StackValue = false; + else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + Ops.push_back(dwarf::DW_OP_stack_value); + StackValue = false; + } + } + Ops.push_back(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + Ops.push_back(Op.getArg(I)); + } + if (StackValue) + Ops.push_back(dwarf::DW_OP_stack_value); return Builder.createExpression(Ops); } @@ -1374,12 +1391,15 @@ void llvm::salvageDebugInfo(Instruction &I) { unsigned BitWidth = M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); APInt Offset(BitWidth, 0); - // Rewrite a constant GEP into a DIExpression. + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - // GEP offsets are i32 and thus alwaus fit into an int64_t. - DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue()); + // GEP offsets are i32 and thus always fit into an int64_t. + DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(), + WithStackValue); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1391,7 +1411,7 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0); + DIExpr = prependDIExpr(DIB, DIExpr, WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 3c669ce644e2..43ab725b0769 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + // The current loop unroll pass can only unroll loops with a single latch + // that's a conditional branch exiting the loop. + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); @@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + auto CheckSuccessors = [&](unsigned S1, unsigned S2) { + return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + }; + + if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { + DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" + " exiting the loop can be unrolled\n"); + return false; + } + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index b375d51005d5..8959e77438e9 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); + // Don't handle unreachable blocks. If there are successors with phis, this + // would leave them behind with missing predecessors. + if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) || + CurBlock->getSinglePredecessor() == CurBlock) { + DeleteList.insert(CurBlock); + return; + } + // If there is only the default destination, just branch. if (!SI->getNumCases()) { BranchInst::Create(Default, CurBlock); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 2f575b9d5027..f86e97b6cc72 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -60,6 +60,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -3055,6 +3056,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { @@ -3079,8 +3089,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; - if (!PostBB || - !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || @@ -3746,7 +3755,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { if (!isa<DbgInfoIntrinsic>(I)) return false; - SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; auto *PhiLPInst = cast<PHINode>(RI->getValue()); // Check incoming blocks to see if any of them are trivial. @@ -4359,8 +4368,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + KnownBits Known(Bits); + computeKnownBits(Cond, Known, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) @@ -4372,7 +4381,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, SmallVector<ConstantInt *, 8> DeadCases; for (auto &Case : SI->cases()) { APInt CaseVal = Case.getCaseValue()->getValue(); - if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); @@ -4386,7 +4395,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (KnownZero | KnownOne).countPopulation(); + Bits - (Known.Zero | Known.One).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index f6070868de44..27373427d4f7 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -35,10 +35,8 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions removed"); -static bool runImpl(Function &F, const DominatorTree *DT, - const TargetLibraryInfo *TLI, AssumptionCache *AC, +static bool runImpl(Function &F, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { - const DataLayout &DL = F.getParent()->getDataLayout(); SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -56,7 +54,8 @@ static bool runImpl(Function &F, const DominatorTree *DT, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) { + if (Value *V = + SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); @@ -65,7 +64,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, Changed = true; } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { + if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { // RecursivelyDeleteTriviallyDeadInstruction can remove more than one // instruction, so simply incrementing the iterator does not work. // When instructions get deleted re-iterate instead. @@ -113,8 +112,9 @@ namespace { &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - - return runImpl(F, DT, TLI, AC, ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + return runImpl(F, SQ, ORE); } }; } @@ -141,7 +141,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F, auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, &TLI, &DT, &AC); + bool Changed = runImpl(F, SQ, &ORE); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index aa71e3669ea2..2c1c30463a23 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -37,10 +38,6 @@ using namespace llvm; using namespace PatternMatch; static cl::opt<bool> - ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden, - cl::desc("Treat error-reporting calls as cold")); - -static cl::opt<bool> EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, cl::init(false), cl::desc("Enable unsafe double to float " @@ -459,11 +456,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { Value *Offset = GEP->getOperand(2); unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, - nullptr); - KnownZero.flipAllBits(); + KnownBits Known(BitWidth); + computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr); + Known.Zero.flipAllBits(); size_t ArrSize = cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); @@ -477,7 +472,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // optimize if we can prove that the program has undefined behavior when // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && NullTermIdx == ArrSize - 1)) return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), @@ -846,6 +841,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, // Is the inner call really malloc()? Function *InnerCallee = Malloc->getCalledFunction(); + if (!InnerCallee) + return nullptr; + LibFunc Func; if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || Func != LibFunc_malloc) @@ -930,6 +928,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, if (V == nullptr) return nullptr; + // If call isn't an intrinsic, check that it isn't within a function with the + // same name as the float version of this call. + // + // e.g. inline float expf(float val) { return (float) exp((double) val); } + // + // A similar such definition exists in the MinGW-w64 math.h header file which + // when compiled with -O2 -ffast-math causes the generation of infinite loops + // where expf is called. + if (!Callee->isIntrinsic()) { + const Function *F = CI->getFunction(); + StringRef FName = F->getName(); + StringRef CalleeName = Callee->getName(); + if ((FName.size() == (CalleeName.size() + 1)) && + (FName.back() == 'f') && + FName.startswith(CalleeName)) + return nullptr; + } + // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1632,7 +1648,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) + if (!Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 4409d7a404f8..97dcb40a1d72 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" @@ -65,7 +66,9 @@ public: bool run(); private: - Value *getPointerOperand(Value *I); + Value *getPointerOperand(Value *I) const; + + GetElementPtrInst *getSourceGEP(Value *Src) const; unsigned getPointerAddressSpace(Value *I); @@ -215,7 +218,7 @@ bool Vectorizer::run() { return Changed; } -Value *Vectorizer::getPointerOperand(Value *I) { +Value *Vectorizer::getPointerOperand(Value *I) const { if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast<StoreInst>(I)) @@ -231,6 +234,19 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) { return -1; } +GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const { + // First strip pointer bitcasts. Make sure pointee size is the same with + // and without casts. + // TODO: a stride set by the add instruction below can match the difference + // in pointee type size here. Currently it will not be vectorized. + Value *SrcPtr = getPointerOperand(Src); + Value *SrcBase = SrcPtr->stripPointerCasts(); + if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) == + DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType())) + SrcPtr = SrcBase; + return dyn_cast<GetElementPtrInst>(SrcPtr); +} + // FIXME: Merge with llvm::isConsecutiveAccess bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { Value *PtrA = getPointerOperand(A); @@ -283,8 +299,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // Look through GEPs after checking they're the same except for the last // index. - GetElementPtrInst *GEPA = dyn_cast<GetElementPtrInst>(getPointerOperand(A)); - GetElementPtrInst *GEPB = dyn_cast<GetElementPtrInst>(getPointerOperand(B)); + GetElementPtrInst *GEPA = getSourceGEP(A); + GetElementPtrInst *GEPB = getSourceGEP(B); if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) return false; unsigned FinalIndex = GEPA->getNumOperands() - 1; @@ -328,11 +344,9 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // If any bits are known to be zero other than the sign bit in OpA, we can // add 1 to it while guaranteeing no overflow of any sort. if (!Safe) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT); - KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1); - if (KnownZero != 0) + KnownBits Known(BitWidth); + computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT); + if (Known.Zero.countTrailingZeros() < (BitWidth - 1)) Safe = true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 7eb8fabe0b2f..87ce0194dad6 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3586,8 +3586,12 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, IRBuilder<> B(MiddleBlock->getTerminator()); Value *CountMinusOne = B.CreateSub( CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); - Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(), - "cast.cmo"); + Value *CMO = + !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, CountMinusOne, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); + CMO->setName("cast.cmo"); Value *Escape = II.transform(B, CMO, PSE.getSE(), DL); Escape->setName("ind.escape"); MissingVals[UI] = Escape; @@ -4516,14 +4520,15 @@ void InnerLoopVectorizer::predicateInstructions() { for (auto KV : PredicatedInstructions) { BasicBlock::iterator I(KV.first); BasicBlock *Head = I->getParent(); - auto *BB = SplitBlock(Head, &*std::next(I), DT, LI); auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DT, LI); I->moveBefore(T); sinkScalarOperands(&*I); - I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if"); - BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue"); + BasicBlock *PredicatedBlock = I->getParent(); + Twine BBNamePrefix = Twine("pred.") + I->getOpcodeName(); + PredicatedBlock->setName(BBNamePrefix + ".if"); + PredicatedBlock->getSingleSuccessor()->setName(BBNamePrefix + ".continue"); // If the instruction is non-void create a Phi node at reconvergence point. if (!I->getType()->isVoidTy()) { @@ -7324,8 +7329,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, VectorTy, VF - 1, VectorTy); - // TODO: IF-converted IFs become selects. - return 0; + // Phi nodes in non-header blocks (not inductions, reductions, etc.) are + // converted into select instructions. We require N - 1 selects per phi + // node, where N is the number of incoming values. + if (VF > 1 && Phi->getParent() != TheLoop->getHeader()) + return (Phi->getNumIncomingValues() - 1) * + TTI.getCmpSelInstrCost( + Instruction::Select, ToVectorTy(Phi->getType(), VF), + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + + return TTI.getCFInstrCost(Instruction::PHI); } case Instruction::UDiv: case Instruction::SDiv: |