diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Utils | |
parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Utils')
41 files changed, 2875 insertions, 1487 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp index df9d5da9e26e..364878dc588d 100644 --- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -36,9 +36,11 @@ static inline bool CompareVars(const ASanStackVariableDescription &a, // with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. static const size_t kMinAlignment = 16; +// We want to add a full redzone after every variable. // The larger the variable Size the larger is the redzone. // The resulting frame size is a multiple of Alignment. -static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { +static size_t VarAndRedzoneSize(size_t Size, size_t Granularity, + size_t Alignment) { size_t Res = 0; if (Size <= 4) Res = 16; else if (Size <= 16) Res = 32; @@ -46,7 +48,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { else if (Size <= 512) Res = Size + 64; else if (Size <= 4096) Res = Size + 128; else Res = Size + 256; - return alignTo(Res, Alignment); + return alignTo(std::max(Res, 2 * Granularity), Alignment); } ASanStackFrameLayout @@ -80,7 +82,8 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars, assert(Size > 0); size_t NextAlignment = IsLast ? Granularity : std::max(Granularity, Vars[i + 1].Alignment); - size_t SizeWithRedzone = VarAndRedzoneSize(Size, NextAlignment); + size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity, + NextAlignment); Vars[i].Offset = Offset; Offset += SizeWithRedzone; } diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 4c9746b8c691..0f0668f24db5 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -50,31 +50,45 @@ // // For more details about DWARF discriminators, please visit // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include <utility> using namespace llvm; #define DEBUG_TYPE "add-discriminators" +// Command line option to disable discriminator generation even in the +// presence of debug information. This is only needed when debugging +// debug info generation issues. +static cl::opt<bool> NoDiscriminators( + "no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); + namespace { + // The legacy pass of AddDiscriminators. struct AddDiscriminatorsLegacyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid + AddDiscriminatorsLegacyPass() : FunctionPass(ID) { initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -85,18 +99,12 @@ struct AddDiscriminatorsLegacyPass : public FunctionPass { } // end anonymous namespace char AddDiscriminatorsLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators", "Add DWARF path discriminators", false, false) INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators", "Add DWARF path discriminators", false, false) -// Command line option to disable discriminator generation even in the -// presence of debug information. This is only needed when debugging -// debug info generation issues. -static cl::opt<bool> NoDiscriminators( - "no-discriminators", cl::init(false), - cl::desc("Disable generation of discriminator information.")); - // Create the legacy AddDiscriminatorsPass. FunctionPass *llvm::createAddDiscriminatorsPass() { return new AddDiscriminatorsLegacyPass(); @@ -166,11 +174,11 @@ static bool addDiscriminators(Function &F) { bool Changed = false; - typedef std::pair<StringRef, unsigned> Location; - typedef DenseSet<const BasicBlock *> BBSet; - typedef DenseMap<Location, BBSet> LocationBBMap; - typedef DenseMap<Location, unsigned> LocationDiscriminatorMap; - typedef DenseSet<Location> LocationSet; + using Location = std::pair<StringRef, unsigned>; + using BBSet = DenseSet<const BasicBlock *>; + using LocationBBMap = DenseMap<Location, BBSet>; + using LocationDiscriminatorMap = DenseMap<Location, unsigned>; + using LocationSet = DenseSet<Location>; LocationBBMap LBM; LocationDiscriminatorMap LDM; @@ -242,6 +250,7 @@ static bool addDiscriminators(Function &F) { bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) { return addDiscriminators(F); } + PreservedAnalyses AddDiscriminatorsPass::run(Function &F, FunctionAnalysisManager &AM) { if (!addDiscriminators(F)) diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 3d5cbfc93f2e..606bd8baccaa 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1,4 +1,4 @@ -//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==// +//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==// // // The LLVM Compiler Infrastructure // @@ -13,22 +13,36 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/Local.h" -#include <algorithm> +#include <cassert> +#include <cstdint> +#include <string> +#include <utility> +#include <vector> + using namespace llvm; void llvm::DeleteDeadBlock(BasicBlock *BB) { @@ -130,8 +144,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, } // Begin by getting rid of unneeded PHIs. - if (isa<PHINode>(BB->front())) + SmallVector<Value *, 4> IncomingValues; + if (isa<PHINode>(BB->front())) { + for (auto &I : *BB) + if (PHINode *PN = dyn_cast<PHINode>(&I)) { + if (PN->getIncomingValue(0) != PN) + IncomingValues.push_back(PN->getIncomingValue(0)); + } else + break; FoldSingleEntryPHINodes(BB, MemDep); + } // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -143,6 +165,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, // Move all definitions in the successor to the predecessor... PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + // Eliminate duplicate dbg.values describing the entry PHI node post-splice. + for (auto *Incoming : IncomingValues) { + if (isa<Instruction>(Incoming)) { + SmallVector<DbgValueInst *, 2> DbgValues; + SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2> + DbgValueSet; + llvm::findDbgValues(DbgValues, Incoming); + for (auto &DVI : DbgValues) { + auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()}); + if (!R.second) + DVI->eraseFromParent(); + } + } + } + // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); @@ -454,7 +491,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. - if (Preds.size() == 0) { + if (Preds.empty()) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); @@ -675,7 +712,6 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); } - Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 175cbd2ce0df..3653c307619b 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -16,9 +16,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" @@ -28,6 +30,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; #define DEBUG_TYPE "break-crit-edges" @@ -198,59 +202,23 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (!DT && !LI) return NewBB; - // Now update analysis information. Since the only predecessor of NewBB is - // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate - // anything, as there are other successors of DestBB. However, if all other - // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a - // loop header) then NewBB dominates DestBB. - SmallVector<BasicBlock*, 8> OtherPreds; - - // If there is a PHI in the block, loop over predecessors with it, which is - // faster than iterating pred_begin/end. - if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingBlock(i) != NewBB) - OtherPreds.push_back(PN->getIncomingBlock(i)); - } else { - for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); - I != E; ++I) { - BasicBlock *P = *I; - if (P != NewBB) - OtherPreds.push_back(P); - } - } - - bool NewBBDominatesDestBB = true; - - // Should we update DominatorTree information? if (DT) { - DomTreeNode *TINode = DT->getNode(TIBB); - - // The new block is not the immediate dominator for any other nodes, but - // TINode is the immediate dominator for the new node. + // Update the DominatorTree. + // ---> NewBB -----\ + // / V + // TIBB -------\\------> DestBB // - if (TINode) { // Don't break unreachable code! - DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); - DomTreeNode *DestBBNode = nullptr; - - // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. - if (!OtherPreds.empty()) { - DestBBNode = DT->getNode(DestBB); - while (!OtherPreds.empty() && NewBBDominatesDestBB) { - if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) - NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); - OtherPreds.pop_back(); - } - OtherPreds.clear(); - } - - // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it - // doesn't dominate anything. - if (NewBBDominatesDestBB) { - if (!DestBBNode) DestBBNode = DT->getNode(DestBB); - DT->changeImmediateDominator(DestBBNode, NewBBNode); - } - } + // First, inform the DT about the new path from TIBB to DestBB via NewBB, + // then delete the old edge from TIBB to DestBB. By doing this in that order + // DestBB stays reachable in the DT the whole time and its subtree doesn't + // get disconnected. + SmallVector<DominatorTree::UpdateType, 3> Updates; + Updates.push_back({DominatorTree::Insert, TIBB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, DestBB}); + if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB)) + Updates.push_back({DominatorTree::Delete, TIBB, DestBB}); + + DT->applyUpdates(Updates); } // Update LoopInfo if it is around. @@ -326,3 +294,159 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, return NewBB; } + +// Return the unique indirectbr predecessor of a block. This may return null +// even if such a predecessor exists, if it's not useful for splitting. +// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) +// predecessors of BB. +static BasicBlock * +findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) { + // If the block doesn't have any PHIs, we don't care about it, since there's + // no point in splitting it. + PHINode *PN = dyn_cast<PHINode>(BB->begin()); + if (!PN) + return nullptr; + + // Verify we have exactly one IBR predecessor. + // Conservatively bail out if one of the other predecessors is not a "regular" + // terminator (that is, not a switch or a br). + BasicBlock *IBB = nullptr; + for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { + BasicBlock *PredBB = PN->getIncomingBlock(Pred); + TerminatorInst *PredTerm = PredBB->getTerminator(); + switch (PredTerm->getOpcode()) { + case Instruction::IndirectBr: + if (IBB) + return nullptr; + IBB = PredBB; + break; + case Instruction::Br: + case Instruction::Switch: + OtherPreds.push_back(PredBB); + continue; + default: + return nullptr; + } + } + + return IBB; +} + +bool llvm::SplitIndirectBrCriticalEdges(Function &F, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + // Check whether the function has any indirectbrs, and collect which blocks + // they may jump to. Since most functions don't have indirect branches, + // this lowers the common case's overhead to O(Blocks) instead of O(Edges). + SmallSetVector<BasicBlock *, 16> Targets; + for (auto &BB : F) { + auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator()); + if (!IBI) + continue; + + for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) + Targets.insert(IBI->getSuccessor(Succ)); + } + + if (Targets.empty()) + return false; + + bool ShouldUpdateAnalysis = BPI && BFI; + bool Changed = false; + for (BasicBlock *Target : Targets) { + SmallVector<BasicBlock *, 16> OtherPreds; + BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); + // If we did not found an indirectbr, or the indirectbr is the only + // incoming edge, this isn't the kind of edge we're looking for. + if (!IBRPred || OtherPreds.empty()) + continue; + + // Don't even think about ehpads/landingpads. + Instruction *FirstNonPHI = Target->getFirstNonPHI(); + if (FirstNonPHI->isEHPad() || Target->isLandingPad()) + continue; + + BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); + if (ShouldUpdateAnalysis) { + // Copy the BFI/BPI from Target to BodyBlock. + for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors(); + I < E; ++I) + BPI->setEdgeProbability(BodyBlock, I, + BPI->getEdgeProbability(Target, I)); + BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); + } + // It's possible Target was its own successor through an indirectbr. + // In this case, the indirectbr now comes from BodyBlock. + if (IBRPred == Target) + IBRPred = BodyBlock; + + // At this point Target only has PHIs, and BodyBlock has the rest of the + // block's body. Create a copy of Target that will be used by the "direct" + // preds. + ValueToValueMapTy VMap; + BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); + + BlockFrequency BlockFreqForDirectSucc; + for (BasicBlock *Pred : OtherPreds) { + // If the target is a loop to itself, then the terminator of the split + // block (BodyBlock) needs to be updated. + BasicBlock *Src = Pred != Target ? Pred : BodyBlock; + Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + if (ShouldUpdateAnalysis) + BlockFreqForDirectSucc += BFI->getBlockFreq(Src) * + BPI->getEdgeProbability(Src, DirectSucc); + } + if (ShouldUpdateAnalysis) { + BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); + BlockFrequency NewBlockFreqForTarget = + BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; + BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); + BPI->eraseBlock(Target); + } + + // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that + // they are clones, so the number of PHIs are the same. + // (a) Remove the edge coming from IBRPred from the "Direct" PHI + // (b) Leave that as the only edge in the "Indirect" PHI. + // (c) Merge the two in the body block. + BasicBlock::iterator Indirect = Target->begin(), + End = Target->getFirstNonPHI()->getIterator(); + BasicBlock::iterator Direct = DirectSucc->begin(); + BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); + + assert(&*End == Target->getTerminator() && + "Block was expected to only contain PHIs"); + + while (Indirect != End) { + PHINode *DirPHI = cast<PHINode>(Direct); + PHINode *IndPHI = cast<PHINode>(Indirect); + + // Now, clean up - the direct block shouldn't get the indirect value, + // and vice versa. + DirPHI->removeIncomingValue(IBRPred); + Direct++; + + // Advance the pointer here, to avoid invalidation issues when the old + // PHI is erased. + Indirect++; + + PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); + NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), + IBRPred); + + // Create a PHI in the body block, to merge the direct and indirect + // predecessors. + PHINode *MergePHI = + PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); + MergePHI->addIncoming(NewIndPHI, Target); + MergePHI->addIncoming(DirPHI, DirectSucc); + + IndPHI->replaceAllUsesWith(MergePHI); + IndPHI->eraseFromParent(); + } + + Changed = true; + } + + return Changed; +} diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 83ec7f55d1af..f711b192f604 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -1,4 +1,4 @@ -//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// // // The LLVM Compiler Infrastructure // @@ -17,27 +17,32 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" +#include <cassert> +#include <cstdint> using namespace llvm; #define DEBUG_TYPE "bypass-slow-division" namespace { - struct DivOpInfo { - bool SignedOp; - Value *Dividend; - Value *Divisor; - - DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) - : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} - }; struct QuotRemPair { Value *Quotient; @@ -55,38 +60,11 @@ namespace { Value *Quotient = nullptr; Value *Remainder = nullptr; }; -} - -namespace llvm { - template<> - struct DenseMapInfo<DivOpInfo> { - static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { - return Val1.SignedOp == Val2.SignedOp && - Val1.Dividend == Val2.Dividend && - Val1.Divisor == Val2.Divisor; - } - - static DivOpInfo getEmptyKey() { - return DivOpInfo(false, nullptr, nullptr); - } - static DivOpInfo getTombstoneKey() { - return DivOpInfo(true, nullptr, nullptr); - } +using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; +using BypassWidthsTy = DenseMap<unsigned, unsigned>; +using VisitedSetTy = SmallPtrSet<Instruction *, 4>; - static unsigned getHashValue(const DivOpInfo &Val) { - return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ - reinterpret_cast<uintptr_t>(Val.Divisor)) ^ - (unsigned)Val.SignedOp; - } - }; - - typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy; - typedef DenseMap<unsigned, unsigned> BypassWidthsTy; - typedef SmallPtrSet<Instruction *, 4> VisitedSetTy; -} - -namespace { enum ValueRange { /// Operand definitely fits into BypassType. No runtime checks are needed. VALRNG_KNOWN_SHORT, @@ -116,17 +94,21 @@ class FastDivInsertionTask { return SlowDivOrRem->getOpcode() == Instruction::SDiv || SlowDivOrRem->getOpcode() == Instruction::SRem; } + bool isDivisionOp() { return SlowDivOrRem->getOpcode() == Instruction::SDiv || SlowDivOrRem->getOpcode() == Instruction::UDiv; } + Type *getSlowType() { return SlowDivOrRem->getType(); } public: FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); + Value *getReplacement(DivCacheTy &Cache); }; -} // anonymous namespace + +} // end anonymous namespace FastDivInsertionTask::FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths) { @@ -175,7 +157,7 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { // Then, look for a value in Cache. Value *Dividend = SlowDivOrRem->getOperand(0); Value *Divisor = SlowDivOrRem->getOperand(1); - DivOpInfo Key(isSignedOp(), Dividend, Divisor); + DivRemMapKey Key(isSignedOp(), Dividend, Divisor); auto CacheI = Cache.find(Key); if (CacheI == Cache.end()) { @@ -225,7 +207,7 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); } - case Instruction::PHI: { + case Instruction::PHI: // Stop IR traversal in case of a crazy input code. This limits recursion // depth. if (Visited.size() >= 16) @@ -241,7 +223,6 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || isa<UndefValue>(V); }); - } default: return false; } @@ -371,11 +352,6 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { Value *Dividend = SlowDivOrRem->getOperand(0); Value *Divisor = SlowDivOrRem->getOperand(1); - if (isa<ConstantInt>(Divisor)) { - // Keep division by a constant for DAGCombiner. - return None; - } - VisitedSetTy SetL; ValueRange DividendRange = getValueRange(Dividend, SetL); if (DividendRange == VALRNG_LIKELY_LONG) @@ -391,7 +367,9 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { if (DividendShort && DivisorShort) { // If both operands are known to be short then just replace the long - // division with a short one in-place. + // division with a short one in-place. Since we're not introducing control + // flow in this case, narrowing the division is always a win, even if the + // divisor is a constant (and will later get replaced by a multiplication). IRBuilder<> Builder(SlowDivOrRem); Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); @@ -401,7 +379,16 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); return QuotRemPair(ExtDiv, ExtRem); - } else if (DividendShort && !isSignedOp()) { + } + + if (isa<ConstantInt>(Divisor)) { + // If the divisor is not a constant, DAGCombiner will convert it to a + // multiplication by a magic constant. It isn't clear if it is worth + // introducing control flow to get a narrower multiply. + return None; + } + + if (DividendShort && !isSignedOp()) { // If the division is unsigned and Dividend is known to be short, then // either // 1) Divisor is less or equal to Dividend, and the result can be computed diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 83bc05d0311c..972e47f9270a 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -5,12 +5,13 @@ add_llvm_library(LLVMTransformUtils BreakCriticalEdges.cpp BuildLibCalls.cpp BypassSlowDivision.cpp + CallPromotionUtils.cpp CloneFunction.cpp CloneModule.cpp - CmpInstAnalysis.cpp CodeExtractor.cpp CtorUtils.cpp DemoteRegToStack.cpp + EntryExitInstrumenter.cpp EscapeEnumerator.cpp Evaluator.cpp FlattenCFG.cpp diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp new file mode 100644 index 000000000000..eb3139ce4293 --- /dev/null +++ b/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -0,0 +1,328 @@ +//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities useful for promoting indirect call sites to +// direct call sites. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CallPromotionUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "call-promotion-utils" + +/// Fix-up phi nodes in an invoke instruction's normal destination. +/// +/// After versioning an invoke instruction, values coming from the original +/// block will now either be coming from the original block or the "else" block. +static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock, + BasicBlock *ElseBlock, + Instruction *NewInst) { + for (auto &I : *Invoke->getNormalDest()) { + auto *Phi = dyn_cast<PHINode>(&I); + if (!Phi) + break; + int Idx = Phi->getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + Value *V = Phi->getIncomingValue(Idx); + if (dyn_cast<Instruction>(V) == Invoke) { + Phi->setIncomingBlock(Idx, ElseBlock); + Phi->addIncoming(NewInst, OrigBlock); + continue; + } + Phi->addIncoming(V, ElseBlock); + } +} + +/// Fix-up phi nodes in an invoke instruction's unwind destination. +/// +/// After versioning an invoke instruction, values coming from the original +/// block will now be coming from either the "then" block or the "else" block. +static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock, + BasicBlock *ThenBlock, + BasicBlock *ElseBlock) { + for (auto &I : *Invoke->getUnwindDest()) { + auto *Phi = dyn_cast<PHINode>(&I); + if (!Phi) + break; + int Idx = Phi->getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + auto *V = Phi->getIncomingValue(Idx); + Phi->setIncomingBlock(Idx, ThenBlock); + Phi->addIncoming(V, ElseBlock); + } +} + +/// Get the phi node having the returned value of a call or invoke instruction +/// as it's operand. +static bool getRetPhiNode(Instruction *Inst, BasicBlock *Block) { + BasicBlock *FromBlock = Inst->getParent(); + for (auto &I : *Block) { + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + break; + int Idx = PHI->getBasicBlockIndex(FromBlock); + if (Idx == -1) + continue; + auto *V = PHI->getIncomingValue(Idx); + if (V == Inst) + return true; + } + return false; +} + +/// Create a phi node for the returned value of a call or invoke instruction. +/// +/// After versioning a call or invoke instruction that returns a value, we have +/// to merge the value of the original and new instructions. We do this by +/// creating a phi node and replacing uses of the original instruction with this +/// phi node. +static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst) { + + if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty()) + return; + + BasicBlock *RetValBB = NewInst->getParent(); + if (auto *Invoke = dyn_cast<InvokeInst>(NewInst)) + RetValBB = Invoke->getNormalDest(); + BasicBlock *PhiBB = RetValBB->getSingleSuccessor(); + + if (getRetPhiNode(OrigInst, PhiBB)) + return; + + IRBuilder<> Builder(&PhiBB->front()); + PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); + SmallVector<User *, 16> UsersToUpdate; + for (User *U : OrigInst->users()) + UsersToUpdate.push_back(U); + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(OrigInst, Phi); + Phi->addIncoming(OrigInst, OrigInst->getParent()); + Phi->addIncoming(NewInst, RetValBB); +} + +/// Cast a call or invoke instruction to the given type. +/// +/// When promoting a call site, the return type of the call site might not match +/// that of the callee. If this is the case, we have to cast the returned value +/// to the correct type. The location of the cast depends on if we have a call +/// or invoke instruction. +Instruction *createRetBitCast(CallSite CS, Type *RetTy) { + + // Save the users of the calling instruction. These uses will be changed to + // use the bitcast after we create it. + SmallVector<User *, 16> UsersToUpdate; + for (User *U : CS.getInstruction()->users()) + UsersToUpdate.push_back(U); + + // Determine an appropriate location to create the bitcast for the return + // value. The location depends on if we have a call or invoke instruction. + Instruction *InsertBefore = nullptr; + if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) + InsertBefore = &*Invoke->getNormalDest()->getFirstInsertionPt(); + else + InsertBefore = &*std::next(CS.getInstruction()->getIterator()); + + // Bitcast the return value to the correct type. + auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(), + RetTy, "", InsertBefore); + + // Replace all the original uses of the calling instruction with the bitcast. + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(CS.getInstruction(), Cast); + + return Cast; +} + +/// Predicate and clone the given call site. +/// +/// This function creates an if-then-else structure at the location of the call +/// site. The "if" condition compares the call site's called value to the given +/// callee. The original call site is moved into the "else" block, and a clone +/// of the call site is placed in the "then" block. The cloned instruction is +/// returned. +static Instruction *versionCallSite(CallSite CS, Value *Callee, + MDNode *BranchWeights, + BasicBlock *&ThenBlock, + BasicBlock *&ElseBlock, + BasicBlock *&MergeBlock) { + + IRBuilder<> Builder(CS.getInstruction()); + Instruction *OrigInst = CS.getInstruction(); + + // Create the compare. The called value and callee must have the same type to + // be compared. + auto *LHS = + Builder.CreateBitCast(CS.getCalledValue(), Builder.getInt8PtrTy()); + auto *RHS = Builder.CreateBitCast(Callee, Builder.getInt8PtrTy()); + auto *Cond = Builder.CreateICmpEQ(LHS, RHS); + + // Create an if-then-else structure. The original instruction is moved into + // the "else" block, and a clone of the original instruction is placed in the + // "then" block. + TerminatorInst *ThenTerm = nullptr; + TerminatorInst *ElseTerm = nullptr; + SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm, + BranchWeights); + ThenBlock = ThenTerm->getParent(); + ElseBlock = ElseTerm->getParent(); + MergeBlock = OrigInst->getParent(); + + ThenBlock->setName("if.true.direct_targ"); + ElseBlock->setName("if.false.orig_indirect"); + MergeBlock->setName("if.end.icp"); + + Instruction *NewInst = OrigInst->clone(); + OrigInst->moveBefore(ElseTerm); + NewInst->insertBefore(ThenTerm); + + // If the original call site is an invoke instruction, we have extra work to + // do since invoke instructions are terminating. + if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) { + auto *NewInvoke = cast<InvokeInst>(NewInst); + + // Invoke instructions are terminating, so we don't need the terminator + // instructions that were just created. + ThenTerm->eraseFromParent(); + ElseTerm->eraseFromParent(); + + // Branch from the "merge" block to the original normal destination. + Builder.SetInsertPoint(MergeBlock); + Builder.CreateBr(OrigInvoke->getNormalDest()); + + // Now set the normal destination of new the invoke instruction to be the + // "merge" block. + NewInvoke->setNormalDest(MergeBlock); + } + + return NewInst; +} + +bool llvm::isLegalToPromote(CallSite CS, Function *Callee, + const char **FailureReason) { + assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + + // Check the return type. The callee's return value type must be bitcast + // compatible with the call site's type. + Type *CallRetTy = CS.getInstruction()->getType(); + Type *FuncRetTy = Callee->getReturnType(); + if (CallRetTy != FuncRetTy) + if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) { + if (FailureReason) + *FailureReason = "Return type mismatch"; + return false; + } + + // The number of formal arguments of the callee. + unsigned NumParams = Callee->getFunctionType()->getNumParams(); + + // Check the number of arguments. The callee and call site must agree on the + // number of arguments. + if (CS.arg_size() != NumParams && !Callee->isVarArg()) { + if (FailureReason) + *FailureReason = "The number of arguments mismatch"; + return false; + } + + // Check the argument types. The callee's formal argument types must be + // bitcast compatible with the corresponding actual argument types of the call + // site. + for (unsigned I = 0; I < NumParams; ++I) { + Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I); + Type *ActualTy = CS.getArgument(I)->getType(); + if (FormalTy == ActualTy) + continue; + if (!CastInst::isBitCastable(ActualTy, FormalTy)) { + if (FailureReason) + *FailureReason = "Argument type mismatch"; + return false; + } + } + + return true; +} + +static void promoteCall(CallSite CS, Function *Callee, Instruction *&Cast) { + assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + + // Set the called function of the call site to be the given callee. + CS.setCalledFunction(Callee); + + // Since the call site will no longer be direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and !callees + // metadata. + CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr); + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr); + + // If the function type of the call site matches that of the callee, no + // additional work is required. + if (CS.getFunctionType() == Callee->getFunctionType()) + return; + + // Save the return types of the call site and callee. + Type *CallSiteRetTy = CS.getInstruction()->getType(); + Type *CalleeRetTy = Callee->getReturnType(); + + // Change the function type of the call site the match that of the callee. + CS.mutateFunctionType(Callee->getFunctionType()); + + // Inspect the arguments of the call site. If an argument's type doesn't + // match the corresponding formal argument's type in the callee, bitcast it + // to the correct type. + for (Use &U : CS.args()) { + unsigned ArgNo = CS.getArgumentNo(&U); + Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo); + Type *ActualTy = U.get()->getType(); + if (FormalTy != ActualTy) { + auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "", + CS.getInstruction()); + CS.setArgument(ArgNo, Cast); + } + } + + // If the return type of the call site doesn't match that of the callee, cast + // the returned value to the appropriate type. + if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) + Cast = createRetBitCast(CS, CallSiteRetTy); +} + +Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee, + MDNode *BranchWeights) { + + // Version the indirect call site. If the called value is equal to the given + // callee, 'NewInst' will be executed, otherwise the original call site will + // be executed. + BasicBlock *ThenBlock, *ElseBlock, *MergeBlock; + Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights, ThenBlock, + ElseBlock, MergeBlock); + + // Promote 'NewInst' so that it directly calls the desired function. + Instruction *Cast = NewInst; + promoteCall(CallSite(NewInst), Callee, Cast); + + // If the original call site is an invoke instruction, we have to fix-up phi + // nodes in the invoke's normal and unwind destinations. + if (auto *OrigInvoke = dyn_cast<InvokeInst>(CS.getInstruction())) { + fixupPHINodeForNormalDest(OrigInvoke, MergeBlock, ElseBlock, Cast); + fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock); + } + + // Create a phi node for the returned value of the call site. + createRetPHINode(CS.getInstruction(), Cast ? Cast : NewInst); + + // Return the new direct call. + return NewInst; +} + +#undef DEBUG_TYPE diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 9c4e13903ed7..3b19ba1b50f2 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -747,7 +747,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Function *F = OrigLoop->getHeader()->getParent(); Loop *ParentLoop = OrigLoop->getParentLoop(); - Loop *NewLoop = new Loop(); + Loop *NewLoop = LI->AllocateLoop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index e5392b53050d..8fee10854229 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Core.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp deleted file mode 100644 index d9294c499309..000000000000 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ /dev/null @@ -1,108 +0,0 @@ -//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file holds routines to help analyse compare instructions -// and fold them into constants or other compare instructions -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/CmpInstAnalysis.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" - -using namespace llvm; - -unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) { - ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate() - : ICI->getPredicate(); - switch (Pred) { - // False -> 0 - case ICmpInst::ICMP_UGT: return 1; // 001 - case ICmpInst::ICMP_SGT: return 1; // 001 - case ICmpInst::ICMP_EQ: return 2; // 010 - case ICmpInst::ICMP_UGE: return 3; // 011 - case ICmpInst::ICMP_SGE: return 3; // 011 - case ICmpInst::ICMP_ULT: return 4; // 100 - case ICmpInst::ICMP_SLT: return 4; // 100 - case ICmpInst::ICMP_NE: return 5; // 101 - case ICmpInst::ICMP_ULE: return 6; // 110 - case ICmpInst::ICMP_SLE: return 6; // 110 - // True -> 7 - default: - llvm_unreachable("Invalid ICmp predicate!"); - } -} - -Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, - CmpInst::Predicate &NewICmpPred) { - switch (Code) { - default: llvm_unreachable("Illegal ICmp code!"); - case 0: // False. - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; - case 2: NewICmpPred = ICmpInst::ICMP_EQ; break; - case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; - case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; - case 5: NewICmpPred = ICmpInst::ICMP_NE; break; - case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; - case 7: // True. - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - } - return nullptr; -} - -bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { - return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || - (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || - (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); -} - -bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, - Value *&X, Value *&Y, Value *&Z) { - ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)); - if (!C) - return false; - - switch (I->getPredicate()) { - default: - return false; - case ICmpInst::ICMP_SLT: - // X < 0 is equivalent to (X & SignMask) != 0. - if (!C->isZero()) - return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); - Pred = ICmpInst::ICMP_NE; - break; - case ICmpInst::ICMP_SGT: - // X > -1 is equivalent to (X & SignMask) == 0. - if (!C->isMinusOne()) - return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); - Pred = ICmpInst::ICMP_EQ; - break; - case ICmpInst::ICMP_ULT: - // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0. - if (!C->getValue().isPowerOf2()) - return false; - Y = ConstantInt::get(I->getContext(), -C->getValue()); - Pred = ICmpInst::ICMP_EQ; - break; - case ICmpInst::ICMP_UGT: - // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. - if (!(C->getValue() + 1).isPowerOf2()) - return false; - Y = ConstantInt::get(I->getContext(), ~C->getValue()); - Pred = ICmpInst::ICMP_NE; - break; - } - - X = I->getOperand(0); - Z = ConstantInt::getNullValue(C->getType()); - return true; -} diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 1189714dfab1..7a404241cb14 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -14,34 +14,57 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <map> #include <set> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "code-extractor" @@ -55,7 +78,8 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, cl::desc("Aggregate arguments to code-extracted functions")); /// \brief Test whether a block is valid for extraction. -bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { +bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, + bool AllowVarArgs) { // Landing pads must be in the function where they were inserted for cleanup. if (BB.isEHPad()) return false; @@ -87,14 +111,19 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { } } - // Don't hoist code containing allocas, invokes, or vastarts. + // Don't hoist code containing allocas or invokes. If explicitly requested, + // allow vastart. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { if (isa<AllocaInst>(I) || isa<InvokeInst>(I)) return false; if (const CallInst *CI = dyn_cast<CallInst>(I)) if (const Function *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::vastart) - return false; + if (F->getIntrinsicID() == Intrinsic::vastart) { + if (AllowVarArgs) + continue; + else + return false; + } } return true; @@ -102,21 +131,21 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { /// \brief Build a set of blocks to extract if the input blocks are viable. static SetVector<BasicBlock *> -buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) { +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, + bool AllowVarArgs) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector<BasicBlock *> Result; // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. for (BasicBlock *BB : BBs) { - // If this block is dead, don't process it. if (DT && !DT->isReachableFromEntry(BB)) continue; if (!Result.insert(BB)) llvm_unreachable("Repeated basic blocks in extraction input"); - if (!CodeExtractor::isBlockValidForExtraction(*BB)) { + if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) { Result.clear(); return Result; } @@ -138,16 +167,18 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) { CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) + BranchProbabilityInfo *BPI, bool AllowVarArgs) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} + BPI(BPI), AllowVarArgs(AllowVarArgs), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), - NumExitBlocks(~0U) {} + BPI(BPI), AllowVarArgs(false), + Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, + /* AllowVarArgs */ false)) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -202,7 +233,6 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( if (Blocks.count(&BB)) continue; for (Instruction &II : BB) { - if (isa<DbgInfoIntrinsic>(II)) continue; @@ -287,7 +317,9 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( CommonExitBlock->getFirstNonPHI()->getIterator()); - for (auto *Pred : predecessors(CommonExitBlock)) { + for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock); + PI != PE;) { + BasicBlock *Pred = *PI++; if (Blocks.count(Pred)) continue; Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); @@ -373,7 +405,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, // Follow the bitcast. Instruction *MarkerAddr = nullptr; for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { SinkLifeStart = false; HoistLifeEnd = false; @@ -407,7 +438,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &SinkCands) const { - for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. @@ -457,7 +487,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); + BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -525,7 +555,6 @@ void CodeExtractor::splitReturnBlocks() { /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) -/// Function *CodeExtractor::constructFunction(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, @@ -544,7 +573,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, default: RetTy = Type::getInt16Ty(header->getContext()); break; } - std::vector<Type*> paramTy; + std::vector<Type *> paramTy; // Add the types of the input values to the function's argument list for (Value *value : inputs) { @@ -575,7 +604,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, paramTy.push_back(PointerType::getUnqual(StructTy)); } FunctionType *funcType = - FunctionType::get(RetTy, paramTy, false); + FunctionType::get(RetTy, paramTy, + AllowVarArgs && oldFunction->isVarArg()); // Create the new function Function *newFunction = Function::Create(funcType, @@ -620,7 +650,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } else RewriteVal = &*AI++; - std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end()); + std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User *use : Users) if (Instruction *inst = dyn_cast<Instruction>(use)) if (Blocks.count(inst->getParent())) @@ -639,7 +669,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which // blocks were originally in the code region. - std::vector<User*> Users(header->user_begin(), header->user_end()); + std::vector<User *> Users(header->user_begin(), header->user_end()); for (unsigned i = 0, e = Users.size(); i != e; ++i) // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block @@ -651,19 +681,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, return newFunction; } -/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI -/// that uses the value within the basic block, and return the predecessor -/// block associated with that use, or return 0 if none is found. -static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { - for (Use &U : Used->uses()) { - PHINode *P = dyn_cast<PHINode>(U.getUser()); - if (P && P->getParent() == BB) - return P->getIncomingBlock(U); - } - - return nullptr; -} - /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. @@ -672,7 +689,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ValueSet &inputs, ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; + std::vector<Value *> params, StructValues, ReloadOutputs, Reloads; Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); @@ -702,7 +719,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector<Type*> ArgTypes; + std::vector<Type *> ArgTypes; for (ValueSet::iterator v = StructValues.begin(), ve = StructValues.end(); v != ve; ++v) ArgTypes.push_back((*v)->getType()); @@ -729,6 +746,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Emit the call to the function CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } codeReplacer->getInstList().push_back(call); Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); @@ -736,7 +761,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); - // Reload the outputs passed in by reference + // Reload the outputs passed in by reference. + Function::arg_iterator OAI = OutputArgBegin; for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *Output = nullptr; if (AggregateArgs) { @@ -753,12 +779,40 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); Reloads.push_back(load); codeReplacer->getInstList().push_back(load); - std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end()); + std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction *inst = cast<Instruction>(Users[u]); if (!Blocks.count(inst->getParent())) inst->replaceUsesOfWith(outputs[i], load); } + + // Store to argument right after the definition of output value. + auto *OutI = dyn_cast<Instruction>(outputs[i]); + if (!OutI) + continue; + // Find proper insertion point. + Instruction *InsertPt = OutI->getNextNode(); + // Let's assume that there is no other guy interleave non-PHI in PHIs. + if (isa<PHINode>(InsertPt)) + InsertPt = InsertPt->getParent()->getFirstNonPHI(); + + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt); + new StoreInst(outputs[i], GEP, InsertPt); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(outputs[i], &*OAI, InsertPt); + ++OAI; + } } // Now we can emit a switch statement using the call as a value. @@ -771,7 +825,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // over all of the blocks in the extracted region, updating any terminator // instructions in the to-be-extracted region that branch to blocks that are // not in the region to be extracted. - std::map<BasicBlock*, BasicBlock*> ExitBlockMap; + std::map<BasicBlock *, BasicBlock *> ExitBlockMap; unsigned switchVal = 0; for (BasicBlock *Block : Blocks) { @@ -801,75 +855,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, break; } - ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); + ReturnInst::Create(Context, brVal, NewTarget); // Update the switch instruction. TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), OldTarget); - - // Restore values just before we exit - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned out = 0, e = outputs.size(); out != e; ++out) { - // For an invoke, the normal destination is the only one that is - // dominated by the result of the invocation - BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent(); - - bool DominatesDef = true; - - BasicBlock *NormalDest = nullptr; - if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out])) - NormalDest = Invoke->getNormalDest(); - - if (NormalDest) { - DefBlock = NormalDest; - - // Make sure we are looking at the original successor block, not - // at a newly inserted exit block, which won't be in the dominator - // info. - for (const auto &I : ExitBlockMap) - if (DefBlock == I.second) { - DefBlock = I.first; - break; - } - - // In the extract block case, if the block we are extracting ends - // with an invoke instruction, make sure that we don't emit a - // store of the invoke value for the unwind block. - if (!DT && DefBlock != OldTarget) - DominatesDef = false; - } - - if (DT) { - DominatesDef = DT->dominates(DefBlock, OldTarget); - - // If the output value is used by a phi in the target block, - // then we need to test for dominance of the phi's predecessor - // instead. Unfortunately, this a little complicated since we - // have already rewritten uses of the value to uses of the reload. - BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], - OldTarget); - if (pred && DT && DT->dominates(DefBlock, pred)) - DominatesDef = true; - } - - if (DominatesDef) { - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), - FirstOut+out); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(), - NTRet); - new StoreInst(outputs[out], GEP, NTRet); - } else { - new StoreInst(outputs[out], &*OAI, NTRet); - } - } - // Advance output iterator even if we don't emit a store - if (!AggregateArgs) ++OAI; - } } // rewrite the original branch instruction with this new target @@ -940,8 +931,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, BranchProbabilityInfo *BPI) { - typedef BlockFrequencyInfoImplBase::Distribution Distribution; - typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; + using Distribution = BlockFrequencyInfoImplBase::Distribution; + using BlockNode = BlockFrequencyInfoImplBase::BlockNode; // Update the branch weights for the exit block. TerminatorInst *TI = CodeReplacer->getTerminator(); @@ -985,12 +976,31 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs, SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; - // Assumption: this is a single-entry code region, and the header is the first // block in the region. BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); + + // For functions with varargs, check that varargs handling is only done in the + // outlined function, i.e vastart and vaend are only used in outlined blocks. + if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) { + auto containsVarArgIntrinsic = [](Instruction &I) { + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (const Function *F = CI->getCalledFunction()) + return F->getIntrinsicID() == Intrinsic::vastart || + F->getIntrinsicID() == Intrinsic::vaend; + return false; + }; + + for (auto &BB : *oldFunction) { + if (Blocks.count(&BB)) + continue; + if (llvm::any_of(BB, containsVarArgIntrinsic)) + return nullptr; + } + } + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; // Calculate the entry frequency of the new function before we change the root // block. @@ -1012,8 +1022,6 @@ Function *CodeExtractor::extractCodeRegion() { // that the return is not in the region. splitReturnBlocks(); - Function *oldFunction = header->getParent(); - // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, @@ -1023,7 +1031,22 @@ Function *CodeExtractor::extractCodeRegion() { // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); - newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + auto *BranchI = BranchInst::Create(header); + // If the original function has debug info, we have to add a debug location + // to the new branch instruction from the artificial entry block. + // We use the debug location of the first instruction in the extracted + // blocks, as there is no other equivalent line in the source code. + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } + newFuncRoot->getInstList().push_back(BranchI); findAllocas(SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); @@ -1044,7 +1067,7 @@ Function *CodeExtractor::extractCodeRegion() { } // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. + // weights for each of those blocks. DenseMap<BasicBlock *, BlockFrequency> ExitWeights; SmallPtrSet<BasicBlock *, 1> ExitBlocks; for (BasicBlock *Block : Blocks) { @@ -1097,8 +1120,8 @@ Function *CodeExtractor::extractCodeRegion() { // Look at all successors of the codeReplacer block. If any of these blocks // had PHI nodes in them, we need to update the "from" block to be the code // replacer, not the original block in the extracted region. - std::vector<BasicBlock*> Succs(succ_begin(codeReplacer), - succ_end(codeReplacer)); + std::vector<BasicBlock *> Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); for (unsigned i = 0, e = Succs.size(); i != e; ++i) for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp index 6642a97a29c2..82b67c293102 100644 --- a/lib/Transforms/Utils/CtorUtils.cpp +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp new file mode 100644 index 000000000000..421663f82565 --- /dev/null +++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -0,0 +1,163 @@ +//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +static void insertCall(Function &CurFn, StringRef Func, + Instruction *InsertionPt, DebugLoc DL) { + Module &M = *InsertionPt->getParent()->getParent()->getParent(); + LLVMContext &C = InsertionPt->getParent()->getContext(); + + if (Func == "mcount" || + Func == ".mcount" || + Func == "\01__gnu_mcount_nc" || + Func == "\01_mcount" || + Func == "\01mcount" || + Func == "__mcount" || + Func == "_mcount" || + Func == "__cyg_profile_func_enter_bare") { + Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C)); + CallInst *Call = CallInst::Create(Fn, "", InsertionPt); + Call->setDebugLoc(DL); + return; + } + + if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") { + Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}; + + Constant *Fn = M.getOrInsertFunction( + Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false)); + + Instruction *RetAddr = CallInst::Create( + Intrinsic::getDeclaration(&M, Intrinsic::returnaddress), + ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "", + InsertionPt); + RetAddr->setDebugLoc(DL); + + Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)), + RetAddr}; + + CallInst *Call = + CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt); + Call->setDebugLoc(DL); + return; + } + + // We only know how to call a fixed set of instrumentation functions, because + // they all expect different arguments, etc. + report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'"); +} + +static bool runOnFunction(Function &F, bool PostInlining) { + StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined" + : "instrument-function-entry"; + + StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined" + : "instrument-function-exit"; + + StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString(); + StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString(); + + bool Changed = false; + + // If the attribute is specified, insert instrumentation and then "consume" + // the attribute so that it's not inserted again if the pass should happen to + // run later for some reason. + + if (!EntryFunc.empty()) { + DebugLoc DL; + if (auto SP = F.getSubprogram()) + DL = DebugLoc::get(SP->getScopeLine(), 0, SP); + + insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL); + Changed = true; + F.removeAttribute(AttributeList::FunctionIndex, EntryAttr); + } + + if (!ExitFunc.empty()) { + for (BasicBlock &BB : F) { + TerminatorInst *T = BB.getTerminator(); + DebugLoc DL; + if (DebugLoc TerminatorDL = T->getDebugLoc()) + DL = TerminatorDL; + else if (auto SP = F.getSubprogram()) + DL = DebugLoc::get(0, 0, SP); + + if (isa<ReturnInst>(T)) { + insertCall(F, ExitFunc, T, DL); + Changed = true; + } + } + F.removeAttribute(AttributeList::FunctionIndex, ExitAttr); + } + + return Changed; +} + +namespace { +struct EntryExitInstrumenter : public FunctionPass { + static char ID; + EntryExitInstrumenter() : FunctionPass(ID) { + initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<GlobalsAAWrapperPass>(); + } + bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); } +}; +char EntryExitInstrumenter::ID = 0; + +struct PostInlineEntryExitInstrumenter : public FunctionPass { + static char ID; + PostInlineEntryExitInstrumenter() : FunctionPass(ID) { + initializePostInlineEntryExitInstrumenterPass( + *PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<GlobalsAAWrapperPass>(); + } + bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); } +}; +char PostInlineEntryExitInstrumenter::ID = 0; +} + +INITIALIZE_PASS( + EntryExitInstrumenter, "ee-instrument", + "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", + false, false) +INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", + "Instrument function entry/exit with calls to e.g. mcount() " + "(post inlining)", + false, false) + +FunctionPass *llvm::createEntryExitInstrumenterPass() { + return new EntryExitInstrumenter(); +} + +FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() { + return new PostInlineEntryExitInstrumenter(); +} + +PreservedAnalyses +llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) { + runOnFunction(F, PostInlining); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 1328f2f3ec01..3c5e299fae98 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -12,19 +12,33 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Evaluator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <iterator> #define DEBUG_TYPE "evaluator" @@ -193,7 +207,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. - while (1) { + while (true) { Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); @@ -318,7 +332,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (!LI->isSimple()) { DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); return false; // no volatile/atomic accesses. @@ -344,9 +357,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); - AllocaTmps.push_back( - make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, - UndefValue::get(Ty), AI->getName())); + AllocaTmps.push_back(llvm::make_unique<GlobalVariable>( + Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), + AI->getName())); InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { @@ -420,6 +433,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, DEBUG(dbgs() << "Skipping assume intrinsic.\n"); ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::sideeffect) { + DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); + ++CurInst; + continue; } DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); @@ -559,7 +576,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, BasicBlock::iterator CurInst = CurBB->begin(); - while (1) { + while (true) { BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); @@ -594,4 +611,3 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, CurBB = NextBB; } } - diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 435eff3bef47..5fdcc6d1d727 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -1,4 +1,4 @@ -//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===// // // The LLVM Compiler Infrastructure // @@ -14,25 +14,37 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include <cassert> + using namespace llvm; #define DEBUG_TYPE "flattencfg" namespace { + class FlattenCFGOpt { AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); + /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -43,9 +55,11 @@ class FlattenCFGOpt { public: FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); }; -} + +} // end anonymous namespace /// If \param [in] BB has more than one predecessor that is a conditional /// branch, attempt to use parallel and/or for the branch condition. \returns @@ -120,7 +134,6 @@ public: /// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. -/// bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { PHINode *PHI = dyn_cast<PHINode>(BB->begin()); if (PHI) @@ -237,8 +250,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // Do branch inversion. BasicBlock *CurrBlock = LastCondBlock; bool EverChanged = false; - for (;CurrBlock != FirstCondBlock; - CurrBlock = CurrBlock->getSinglePredecessor()) { + for (; CurrBlock != FirstCondBlock; + CurrBlock = CurrBlock->getSinglePredecessor()) { BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI) @@ -309,7 +322,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // in the 2nd if-region to compare. \returns true if \param Block1 and \param /// Block2 have identical instructions and do not have memory reference alias /// with \param Head2. -/// bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, BasicBlock *Block1, BasicBlock *Block2) { @@ -330,7 +342,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, BasicBlock::iterator iter2 = Block2->begin(); BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); - while (1) { + while (true) { if (iter1 == end1) { if (iter2 != end2) return false; @@ -384,7 +396,6 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, /// To: /// if (a || b) /// statement; -/// bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { BasicBlock *IfTrue2, *IfFalse2; Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); @@ -475,8 +486,7 @@ bool FlattenCFGOpt::run(BasicBlock *BB) { /// FlattenCFG - This function is used to flatten a CFG. For /// example, it uses parallel-and and parallel-or mode to collapse -// if-conditions and merge if-regions with identical statements. -/// +/// if-conditions and merge if-regions with identical statements. bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { return FlattenCFGOpt(AA).run(BB); } diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 4a2be3a53176..bddcbd86e914 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -13,13 +13,41 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <utility> using namespace llvm; @@ -160,7 +188,6 @@ int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, /// For more details see declaration comments. int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) const { - Type *TyL = L->getType(); Type *TyR = R->getType(); @@ -226,8 +253,8 @@ int FunctionComparator::cmpConstants(const Constant *L, if (!L->isNullValue() && R->isNullValue()) return -1; - auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L)); - auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R)); + auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L)); + auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R)); if (GlobalValueL && GlobalValueR) { return cmpGlobalValues(GlobalValueL, GlobalValueR); } @@ -401,10 +428,9 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::TokenTyID: return 0; - case Type::PointerTyID: { + case Type::PointerTyID: assert(PTyL && PTyR && "Both types must be pointers here."); return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); - } case Type::StructTyID: { StructType *STyL = cast<StructType>(TyL); @@ -637,7 +663,6 @@ int FunctionComparator::cmpOperations(const Instruction *L, // Read method declaration comments for more details. int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const { - unsigned int ASL = GEPL->getPointerAddressSpace(); unsigned int ASR = GEPR->getPointerAddressSpace(); @@ -869,15 +894,19 @@ namespace { // buffer. class HashAccumulator64 { uint64_t Hash; + public: // Initialize to random constant, so the state isn't zero. HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { - Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); + Hash = hashing::detail::hash_16_bytes(Hash, V); } + // No finishing is required, because the entire hash value is used. uint64_t getHash() { return Hash; } }; + } // end anonymous namespace // A function hash is calculated by considering only the number of arguments and @@ -919,5 +948,3 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { } return H.getHash(); } - - diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index a98d07237b47..6b5f593073b4 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -13,9 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/FunctionImportUtils.h" -#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" using namespace llvm; /// Checks if we should import SGV as a definition, otherwise import as a @@ -23,21 +21,15 @@ using namespace llvm; bool FunctionImportGlobalProcessing::doImportAsDefinition( const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) { - // For alias, we tie the definition to the base object. Extract it and recurse - if (auto *GA = dyn_cast<GlobalAlias>(SGV)) { - if (GA->isInterposable()) - return false; - const GlobalObject *GO = GA->getBaseObject(); - if (!GO->hasLinkOnceODRLinkage()) - return false; - return FunctionImportGlobalProcessing::doImportAsDefinition( - GO, GlobalsToImport); - } // Only import the globals requested for importing. - if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV))) - return true; - // Otherwise no. - return false; + if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV))) + return false; + + assert(!isa<GlobalAlias>(SGV) && + "Unexpected global alias in the import list."); + + // Otherwise yes. + return true; } bool FunctionImportGlobalProcessing::doImportAsDefinition( @@ -132,8 +124,10 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, return SGV->getLinkage(); switch (SGV->getLinkage()) { + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: case GlobalValue::ExternalLinkage: - // External defnitions are converted to available_externally + // External and linkonce definitions are converted to available_externally // definitions upon import, so that they are available for inlining // and/or optimization, but are turned into declarations later // during the EliminateAvailableExternally pass. @@ -150,12 +144,6 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, // An imported available_externally declaration stays that way. return SGV->getLinkage(); - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - // These both stay the same when importing the definition. - // The ThinLTO pass will eventually force-import their definitions. - return SGV->getLinkage(); - case GlobalValue::WeakAnyLinkage: // Can't import weak_any definitions correctly, or we might change the // program semantics, since the linker will pick the first weak_any @@ -213,6 +201,23 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, } void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { + + // Check the summaries to see if the symbol gets resolved to a known local + // definition. + if (GV.hasName()) { + ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID()); + if (VI) { + // Need to check all summaries are local in case of hash collisions. + bool IsLocal = VI.getSummaryList().size() && + llvm::all_of(VI.getSummaryList(), + [](const std::unique_ptr<GlobalValueSummary> &Summary) { + return Summary->isDSOLocal(); + }); + if (IsLocal) + GV.setDSOLocal(true); + } + } + bool DoPromote = false; if (GV.hasLocalLinkage() && ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 2a18c140c788..fedf6e100d6c 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -12,11 +12,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -26,25 +30,46 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Attributes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <string> +#include <utility> +#include <vector> using namespace llvm; @@ -62,28 +87,37 @@ bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); } + bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); } namespace { + /// A class for recording information about inlining a landing pad. class LandingPadInliningInfo { - BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. - BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. - LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. - PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts. + /// Destination of the invoke's unwind. + BasicBlock *OuterResumeDest; + + /// Destination for the callee's resume. + BasicBlock *InnerResumeDest = nullptr; + + /// LandingPadInst associated with the invoke. + LandingPadInst *CallerLPad = nullptr; + + /// PHI for EH values from landingpad insts. + PHINode *InnerEHValuesPHI = nullptr; + SmallVector<Value*, 8> UnwindDestPHIValues; public: LandingPadInliningInfo(InvokeInst *II) - : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), - CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { + : OuterResumeDest(II->getUnwindDest()) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. - llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock *InvokeBB = II->getParent(); BasicBlock::iterator I = OuterResumeDest->begin(); for (; isa<PHINode>(I); ++I) { // Save the value to use for this edge. @@ -126,7 +160,8 @@ namespace { } } }; -} // anonymous namespace + +} // end anonymous namespace /// Get or create a target for the branch from ResumeInsts. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { @@ -189,7 +224,7 @@ static Value *getParentPad(Value *EHPad) { return cast<CatchSwitchInst>(EHPad)->getParentPad(); } -typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy; +using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; /// Helper for getUnwindDestToken that does the descendant-ward part of /// the search. @@ -617,7 +652,7 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, // track of which values came into them from the invoke before removing the // edge from this block. SmallVector<Value *, 8> UnwindDestPHIValues; - llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock *InvokeBB = II->getParent(); for (Instruction &I : *UnwindDest) { // Save the value to use for this edge. PHINode *PHI = dyn_cast<PHINode>(&I); @@ -1359,6 +1394,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } } + /// Update the block frequencies of the caller after a callee has been inlined. /// /// Each block cloned into the caller has its block frequency scaled by the @@ -1454,7 +1490,8 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, bool InsertLifetime) { + AAResults *CalleeAAR, bool InsertLifetime, + Function *ForwardVarArgsTo) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getFunction() && "Instruction not in function!"); @@ -1464,8 +1501,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration() || // call, or call to a vararg function! - CalledFunc->getFunctionType()->isVarArg()) return false; + CalledFunc->isDeclaration() || + (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! + return false; // The inliner does not know how to inline through calls with operand bundles // in general ... @@ -1592,8 +1630,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, auto &DL = Caller->getParent()->getDataLayout(); - assert(CalledFunc->arg_size() == CS.arg_size() && - "No varargs calls can be inlined!"); + assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && + "Varargs calls can only be inlined if the Varargs are forwarded!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. @@ -1772,9 +1810,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Move any dbg.declares describing the allocas into the entry basic block. DIBuilder DIB(*Caller->getParent()); for (auto &AI : IFI.StaticAllocas) - replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); + replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0, + DIExpression::NoDeref); } + SmallVector<Value*,4> VarArgsToForward; + for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); + i < CS.getNumArgOperands(); i++) + VarArgsToForward.push_back(CS.getArgOperand(i)); + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; @@ -1783,7 +1827,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { - for (Instruction &I : *BB) { + for (auto II = BB->begin(); II != BB->end();) { + Instruction &I = *II++; CallInst *CI = dyn_cast<CallInst>(&I); if (!CI) continue; @@ -1806,7 +1851,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // f -> g -> musttail f ==> f -> f // f -> g -> tail f ==> f -> f CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); - ChildTCK = std::min(CallSiteTailKind, ChildTCK); + if (ChildTCK != CallInst::TCK_NoTail) + ChildTCK = std::min(CallSiteTailKind, ChildTCK); CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); @@ -1814,6 +1860,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // 'nounwind'. if (MarkNoUnwind) CI->setDoesNotThrow(); + + if (ForwardVarArgsTo && !VarArgsToForward.empty() && + CI->getCalledFunction() == ForwardVarArgsTo) { + SmallVector<Value*, 6> Params(CI->arg_operands()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); + Call->setDebugLoc(CI->getDebugLoc()); + CI->replaceAllUsesWith(Call); + CI->eraseFromParent(); + } } } } @@ -1848,8 +1904,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + if (AllocaArraySize != std::numeric_limits<uint64_t>::max() && + std::numeric_limits<uint64_t>::max() / AllocaArraySize >= + AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } @@ -1980,7 +2037,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // match the callee's return type, we also need to change the return type of // the intrinsic. if (Caller->getReturnType() == TheCall->getType()) { - auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; }); Returns.erase(NewEnd, Returns.end()); diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 089f2b5f3b18..ae0e2bb6c280 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -56,9 +56,10 @@ static bool VerifyLoopLCSSA = true; #else static bool VerifyLoopLCSSA = false; #endif -static cl::opt<bool,true> -VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), - cl::desc("Verify loop lcssa form (time consuming)")); +static cl::opt<bool, true> + VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), + cl::Hidden, + cl::desc("Verify loop lcssa form (time consuming)")); /// Return true if the specified block is in the list. static bool isExitBlock(BasicBlock *BB, diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 74610613001c..a1961eecb391 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1,4 +1,4 @@ -//===-- Local.cpp - Functions to perform local transformations ------------===// +//===- Local.cpp - Functions to perform local transformations -------------===// // // The LLVM Compiler Infrastructure // @@ -13,42 +13,74 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <climits> +#include <cstdint> +#include <iterator> +#include <map> +#include <utility> + using namespace llvm; using namespace llvm::PatternMatch; @@ -282,7 +314,6 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, return false; } - //===----------------------------------------------------------------------===// // Local dead code elimination. // @@ -541,7 +572,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Control Flow Graph Restructuring. // - /// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this /// method is called when we're about to delete Pred as a predecessor of BB. If /// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. @@ -578,12 +608,10 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { } } - /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its /// predecessor is known to have one successor (DestBB!). Eliminate the edge /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. -/// void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { @@ -602,7 +630,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { if (DestBB->hasAddressTaken()) { BlockAddress *BA = BlockAddress::get(DestBB); Constant *Replacement = - ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1); + ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1); BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, BA->getType())); BA->destroyConstant(); @@ -621,9 +649,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { DestBB->moveAfter(PredBB); if (DT) { - BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); - DT->changeImmediateDominator(DestBB, PredBBIDom); - DT->eraseNode(PredBB); + // For some irreducible CFG we end up having forward-unreachable blocks + // so check if getNode returns a valid node before updating the domtree. + if (DomTreeNode *DTN = DT->getNode(PredBB)) { + BasicBlock *PredBBIDom = DTN->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); + } } // Nuke BB. PredBB->eraseFromParent(); @@ -640,7 +672,6 @@ static bool CanMergeValues(Value *First, Value *Second) { /// almost-empty BB ending in an unconditional branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. -/// static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); @@ -696,8 +727,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { return true; } -typedef SmallVector<BasicBlock *, 16> PredBlockVector; -typedef DenseMap<BasicBlock *, Value *> IncomingValueMap; +using PredBlockVector = SmallVector<BasicBlock *, 16>; +using IncomingValueMap = DenseMap<BasicBlock *, Value *>; /// \brief Determines the value to use as the phi node input for a block. /// @@ -927,7 +958,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { /// nodes in this block. This doesn't try to be clever about PHI nodes /// which differ only in the order of the incoming values, but instcombine /// orders them so it usually won't matter. -/// bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for @@ -937,9 +967,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { static PHINode *getEmptyKey() { return DenseMapInfo<PHINode *>::getEmptyKey(); } + static PHINode *getTombstoneKey() { return DenseMapInfo<PHINode *>::getTombstoneKey(); } + static unsigned getHashValue(PHINode *PN) { // Compute a hash value on the operands. Instcombine will likely have // sorted them, which helps expose duplicates, but we have to check all @@ -948,6 +980,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { hash_combine_range(PN->value_op_begin(), PN->value_op_end()), hash_combine_range(PN->block_begin(), PN->block_end()))); } + static bool isEqual(PHINode *LHS, PHINode *RHS) { if (LHS == getEmptyKey() || LHS == getTombstoneKey() || RHS == getEmptyKey() || RHS == getTombstoneKey()) @@ -984,7 +1017,6 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// often possible though. If alignment is important, a more reliable approach /// is to simply align all global variables and allocation instructions to /// their preferred alignment from the beginning. -/// static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned PrefAlign, const DataLayout &DL) { @@ -1068,12 +1100,11 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, // Since we can't guarantee that the original dbg.declare instrinsic // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. - llvm::BasicBlock::InstListType::iterator PrevI(I); + BasicBlock::InstListType::iterator PrevI(I); if (PrevI != I->getParent()->getInstList().begin()) { --PrevI; if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) if (DVI->getValue() == I->getOperand(0) && - DVI->getOffset() == 0 && DVI->getVariable() == DIVar && DVI->getExpression() == DIExpr) return true; @@ -1092,7 +1123,6 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, findDbgValues(DbgValues, APN); for (auto *DVI : DbgValues) { assert(DVI->getValue() == APN); - assert(DVI->getOffset() == 0); if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) return true; } @@ -1100,12 +1130,13 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, } /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value -/// that has an associated llvm.dbg.decl intrinsic. -void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, +/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, StoreInst *SI, DIBuilder &Builder) { - auto *DIVar = DDI->getVariable(); + assert(DII->isAddressOfVariable()); + auto *DIVar = DII->getVariable(); assert(DIVar && "Missing variable"); - auto *DIExpr = DDI->getExpression(); + auto *DIExpr = DII->getExpression(); Value *DV = SI->getOperand(0); // If an argument is zero extended then use argument directly. The ZExt @@ -1116,7 +1147,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); if (ExtendedArg) { - // If this DDI was already describing only a fragment of a variable, ensure + // If this DII was already describing only a fragment of a variable, ensure // that fragment is appropriately narrowed here. // But if a fragment wasn't used, describe the value as the original // argument (rather than the zext or sext) so that it remains described even @@ -1129,23 +1160,23 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DIExpr->elements_end() - 3); Ops.push_back(dwarf::DW_OP_LLVM_fragment); Ops.push_back(FragmentOffset); - const DataLayout &DL = DDI->getModule()->getDataLayout(); + const DataLayout &DL = DII->getModule()->getDataLayout(); Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); DIExpr = Builder.createExpression(Ops); } DV = ExtendedArg; } if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(), + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(), SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value -/// that has an associated llvm.dbg.decl intrinsic. -void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, +/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, LoadInst *LI, DIBuilder &Builder) { - auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); + auto *DIVar = DII->getVariable(); + auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); if (LdStHasDebugValue(DIVar, DIExpr, LI)) @@ -1156,16 +1187,16 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // preferable to keep tracking both the loaded value and the original // address in case the alloca can not be elided. Instruction *DbgValue = Builder.insertDbgValueIntrinsic( - LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr); + LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr); DbgValue->insertAfter(LI); } -/// Inserts a llvm.dbg.value intrinsic after a phi -/// that has an associated llvm.dbg.decl intrinsic. -void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, +/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated +/// llvm.dbg.declare or llvm.dbg.addr intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, PHINode *APN, DIBuilder &Builder) { - auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); + auto *DIVar = DII->getVariable(); + auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); if (PhiHasDebugValue(DIVar, DIExpr, APN)) @@ -1178,7 +1209,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // insertion point. // FIXME: Insert dbg.value markers in the successors when appropriate. if (InsertionPt != BB->end()) - Builder.insertDbgValueIntrinsic(APN, 0, DIVar, DIExpr, DDI->getDebugLoc(), + Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(), &*InsertionPt); } @@ -1222,7 +1253,7 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), + DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DDI->getExpression(), DDI->getDebugLoc(), CI); } @@ -1233,16 +1264,25 @@ bool llvm::LowerDbgDeclare(Function &F) { return true; } -/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the -/// alloca 'V', if any. -DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { - if (auto *L = LocalAsMetadata::getIfExists(V)) - if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) - for (User *U : MDV->users()) - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) - return DDI; +/// Finds all intrinsics declaring local variables as living in the memory that +/// 'V' points to. This may include a mix of dbg.declare and +/// dbg.addr intrinsics. +TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) { + auto *L = LocalAsMetadata::getIfExists(V); + if (!L) + return {}; + auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L); + if (!MDV) + return {}; + + TinyPtrVector<DbgInfoIntrinsic *> Declares; + for (User *U : MDV->users()) { + if (auto *DII = dyn_cast<DbgInfoIntrinsic>(U)) + if (DII->isAddressOfVariable()) + Declares.push_back(DII); + } - return nullptr; + return Declares; } void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { @@ -1253,29 +1293,40 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { DbgValues.push_back(DVI); } +static void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers, + Value *V) { + if (auto *L = LocalAsMetadata::getIfExists(V)) + if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) + for (User *U : MDV->users()) + if (DbgInfoIntrinsic *DII = dyn_cast<DbgInfoIntrinsic>(U)) + DbgUsers.push_back(DII); +} bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, - bool Deref, int Offset) { - DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address); - if (!DDI) - return false; - DebugLoc Loc = DDI->getDebugLoc(); - auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); - assert(DIVar && "Missing variable"); - DIExpr = DIExpression::prepend(DIExpr, Deref, Offset); - // Insert llvm.dbg.declare immediately after the original alloca, and remove - // old llvm.dbg.declare. - Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); - DDI->eraseFromParent(); - return true; + bool DerefBefore, int Offset, bool DerefAfter) { + auto DbgAddrs = FindDbgAddrUses(Address); + for (DbgInfoIntrinsic *DII : DbgAddrs) { + DebugLoc Loc = DII->getDebugLoc(); + auto *DIVar = DII->getVariable(); + auto *DIExpr = DII->getExpression(); + assert(DIVar && "Missing variable"); + DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter); + // Insert llvm.dbg.declare immediately after InsertBefore, and remove old + // llvm.dbg.declare. + Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); + if (DII == InsertBefore) + InsertBefore = &*std::next(InsertBefore->getIterator()); + DII->eraseFromParent(); + } + return !DbgAddrs.empty(); } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref, int Offset) { + DIBuilder &Builder, bool DerefBefore, + int Offset, bool DerefAfter) { return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, - Deref, Offset); + DerefBefore, Offset, DerefAfter); } static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, @@ -1302,8 +1353,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, DIExpr = Builder.createExpression(Ops); } - Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr, - Loc, DVI); + Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); DVI->eraseFromParent(); } @@ -1322,17 +1372,28 @@ void llvm::salvageDebugInfo(Instruction &I) { SmallVector<DbgValueInst *, 1> DbgValues; auto &M = *I.getModule(); - auto MDWrap = [&](Value *V) { + auto wrapMD = [&](Value *V) { return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); }; - if (isa<BitCastInst>(&I)) { - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) { - // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value - // to use the cast's source. - DVI->setOperand(0, MDWrap(I.getOperand(0))); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) { + auto *DIExpr = DVI->getExpression(); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::NoDeref, + DIExpression::WithStackValue); + DVI->setOperand(0, wrapMD(I.getOperand(0))); + DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + }; + + if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) { + // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value, + // dbg.addr, and dbg.declare to use the cast's source. + SmallVector<DbgInfoIntrinsic *, 1> DbgUsers; + findDbgUsers(DbgUsers, &I); + for (auto *DII : DbgUsers) { + DII->setOperand(0, wrapMD(I.getOperand(0))); + DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); } } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { findDbgValues(DbgValues, &I); @@ -1343,27 +1404,27 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite a constant GEP into a DIExpression. Since we are performing // arithmetic to compute the variable's *value* in the DIExpression, we // need to mark the expression with a DW_OP_stack_value. - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { - auto *DIExpr = DVI->getExpression(); - DIBuilder DIB(M, /*AllowUnresolved*/ false); + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) // GEP offsets are i32 and thus always fit into an int64_t. - DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, - Offset.getSExtValue(), - DIExpression::WithStackValue); - DVI->setOperand(0, MDWrap(I.getOperand(0))); - DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); - } + applyOffset(DVI, Offset.getSExtValue()); } + } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { + if (BI->getOpcode() == Instruction::Add) + if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1))) + if (ConstInt->getBitWidth() <= 64) { + APInt Offset = ConstInt->getValue(); + findDbgValues(DbgValues, &I); + for (auto *DVI : DbgValues) + applyOffset(DVI, Offset.getSExtValue()); + } } else if (isa<LoadInst>(&I)) { findDbgValues(DbgValues, &I); for (auto *DVI : DbgValues) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); - DIBuilder DIB(M, /*AllowUnresolved*/ false); DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); - DVI->setOperand(0, MDWrap(I.getOperand(0))); - DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); + DVI->setOperand(0, wrapMD(I.getOperand(0))); + DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); } } @@ -1480,7 +1541,6 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, static bool markAliveBlocks(Function &F, SmallPtrSetImpl<BasicBlock*> &Reachable) { - SmallVector<BasicBlock*, 128> Worklist; BasicBlock *BB = &F.front(); Worklist.push_back(BB); @@ -1586,13 +1646,16 @@ static bool markAliveBlocks(Function &F, static CatchPadInst *getEmptyKey() { return DenseMapInfo<CatchPadInst *>::getEmptyKey(); } + static CatchPadInst *getTombstoneKey() { return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); } + static unsigned getHashValue(CatchPadInst *CatchPad) { return static_cast<unsigned>(hash_combine_range( CatchPad->value_op_begin(), CatchPad->value_op_end())); } + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { if (LHS == getEmptyKey() || LHS == getTombstoneKey() || RHS == getEmptyKey() || RHS == getTombstoneKey()) @@ -1832,7 +1895,8 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); } -bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { +bool llvm::callsGCLeafFunction(ImmutableCallSite CS, + const TargetLibraryInfo &TLI) { // Check if the function is specifically marked as a gc leaf function. if (CS.hasFnAttr("gc-leaf-function")) return true; @@ -1846,6 +1910,14 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { IID != Intrinsic::experimental_deoptimize; } + // Lib calls can be materialized by some passes, and won't be + // marked as 'gc-leaf-function.' All available Libcalls are + // GC-leaf. + LibFunc LF; + if (TLI.getLibFunc(CS, LF)) { + return TLI.has(LF); + } + return false; } @@ -1893,6 +1965,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, } namespace { + /// A potential constituent of a bitreverse or bswap expression. See /// collectBitParts for a fuller explanation. struct BitPart { @@ -1902,12 +1975,14 @@ struct BitPart { /// The Value that this is a bitreverse/bswap of. Value *Provider; + /// The "provenance" of each bit. Provenance[A] = B means that bit A /// in Provider becomes bit B in the result of this expression. SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. enum { Unset = -1 }; }; + } // end anonymous namespace /// Analyze the specified subexpression and see if it is capable of providing @@ -1933,7 +2008,6 @@ struct BitPart { /// /// Because we pass around references into \c BPS, we must use a container that /// does not invalidate internal references (std::map instead of DenseMap). -/// static const Optional<BitPart> & collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, std::map<Value *, Optional<BitPart>> &BPS) { @@ -2069,8 +2143,6 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, return From == BitWidth - To - 1; } -/// Given an OR instruction, check to see if this is a bitreverse -/// idiom. If so, insert the new intrinsic and return true. bool llvm::recognizeBSwapOrBitReverseIdiom( Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl<Instruction *> &InsertedInsts) { diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index e21e34df8ded..f43af9772771 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -258,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); // Create the new outer loop. - Loop *NewOuter = new Loop(); + Loop *NewOuter = LI->AllocateLoop(); // Change the parent loop to use the outer loop as its child now. if (Loop *Parent = L->getParentLoop()) diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index f2527f89e83e..dc98a39adcc5 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -21,8 +21,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -68,9 +67,23 @@ static inline void remapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); + + // Unwrap arguments of dbg.value intrinsics. + bool Wrapped = false; + if (auto *V = dyn_cast<MetadataAsValue>(Op)) + if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) { + Op = Unwrapped->getValue(); + Wrapped = true; + } + + auto wrap = [&](Value *V) { + auto &C = I->getContext(); + return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V; + }; + ValueToValueMapTy::iterator It = VMap.find(Op); if (It != VMap.end()) - I->setOperand(op, It->second); + I->setOperand(op, wrap(It->second)); } if (PHINode *PN = dyn_cast<PHINode>(I)) { @@ -200,7 +213,7 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); - NewLoop = new Loop(); + NewLoop = LI->AllocateLoop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); if (NewLoopParent) @@ -255,8 +268,7 @@ static bool isEpilogProfitable(Loop *L) { return false; } -/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true -/// if unrolling was successful, or false if the loop was unmodified. Unrolling +/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. @@ -285,37 +297,36 @@ static bool isEpilogProfitable(Loop *L) { /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// -/// If we want to perform PGO-based loop peeling, PeelCount is set to the +/// If we want to perform PGO-based loop peeling, PeelCount is set to the /// number of iterations we want to peel off. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. -bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, - bool AllowRuntime, bool AllowExpensiveTripCount, - bool PreserveCondBr, bool PreserveOnlyFirst, - unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, OptimizationRemarkEmitter *ORE, - bool PreserveLCSSA) { +LoopUnrollResult llvm::UnrollLoop( + Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, + bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, + unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); - return false; + return LoopUnrollResult::Unmodified; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); - return false; + return LoopUnrollResult::Unmodified; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); - return false; + return LoopUnrollResult::Unmodified; } // The current loop unroll pass can only unroll loops with a single latch @@ -329,7 +340,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); - return false; + return LoopUnrollResult::Unmodified; } auto CheckSuccessors = [&](unsigned S1, unsigned S2) { @@ -339,14 +350,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" " exiting the loop can be unrolled\n"); - return false; + return LoopUnrollResult::Unmodified; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); - return false; + return LoopUnrollResult::Unmodified; } if (TripCount != 0) @@ -362,7 +373,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // Don't enter the unroll code if there is nothing to do. if (TripCount == 0 && Count < 2 && PeelCount == 0) { DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); - return false; + return LoopUnrollResult::Unmodified; } assert(Count > 0); @@ -395,8 +406,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, "Did not expect runtime trip-count unrolling " "and peeling for the same loop"); - if (PeelCount) - peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); + if (PeelCount) { + bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); + + // Successful peeling may result in a change in the loop preheader/trip + // counts. If we later unroll the loop, we want these to be updated. + if (Peeled) { + BasicBlock *ExitingBlock = L->getExitingBlock(); + assert(ExitingBlock && "Loop without exiting block?"); + Preheader = L->getLoopPreheader(); + TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + } + } // Loops containing convergent instructions must have a count that divides // their TripMultiple. @@ -418,15 +440,15 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, - EpilogProfitability, LI, SE, DT, - PreserveLCSSA)) { + EpilogProfitability, UnrollRemainder, LI, SE, + DT, AC, PreserveLCSSA)) { if (Force) RuntimeTripCount = false; else { DEBUG( dbgs() << "Wont unroll; remainder loop could not be generated" "when assuming runtime trip count\n"); - return false; + return LoopUnrollResult::Unmodified; } } @@ -450,36 +472,53 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // Report the unrolling decision. if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << TripCount << "!\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), - L->getHeader()) - << "completely unrolled loop with " - << NV("UnrollCount", TripCount) << " iterations"); + << " with trip count " << TripCount << "!\n"); + if (ORE) + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unrolled loop with " + << NV("UnrollCount", TripCount) << " iterations"; + }); } else if (PeelCount) { DEBUG(dbgs() << "PEELING loop %" << Header->getName() << " with iteration count " << PeelCount << "!\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", PeelCount) - << " iterations"); + if (ORE) + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), + L->getHeader()) + << " peeled loop by " << NV("PeelCount", PeelCount) + << " iterations"; + }); } else { - OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), - L->getHeader()); - Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count); + auto DiagBuilder = [&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + return Diag << "unrolled loop by a factor of " + << NV("UnrollCount", Count); + }; DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - ORE->emit(Diag << " with a breakout at trip " - << NV("BreakoutTrip", BreakoutTrip)); + if (ORE) + ORE->emit([&]() { + return DiagBuilder() << " with a breakout at trip " + << NV("BreakoutTrip", BreakoutTrip); + }); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); - ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple) - << " trips per branch"); + if (ORE) + ORE->emit([&]() { + return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) + << " trips per branch"; + }); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); - ORE->emit(Diag << " with run-time trip count"); + if (ORE) + ORE->emit( + [&]() { return DiagBuilder() << " with run-time trip count"; }); } DEBUG(dbgs() << "!\n"); } @@ -523,8 +562,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (Header->getParent()->isDebugInfoForProfiling()) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) - if (const DILocation *DIL = I.getDebugLoc()) - I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count)); + if (!isa<DbgInfoIntrinsic>(&I)) + if (const DILocation *DIL = I.getDebugLoc()) + I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count)); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; @@ -796,7 +836,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, Loop *OuterL = L->getParentLoop(); // Update LoopInfo if the loop is completely removed. if (CompletelyUnroll) - LI->markAsRemoved(L); + LI->erase(L); // After complete unrolling most of the blocks should be contained in OuterL. // However, some of them might happen to be out of OuterL (e.g. if they @@ -821,7 +861,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (NeedToFixLCSSA) { // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop - // after LoopInfo's been updated by markAsRemoved. + // after LoopInfo's been updated by LoopInfo::erase. Loop *LatchLoop = LI->getLoopFor(Latches.back()); Loop *FixLCSSALoop = OuterL; if (!FixLCSSALoop->contains(LatchLoop)) @@ -844,7 +884,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } } - return true; + return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled + : LoopUnrollResult::PartiallyUnrolled; } /// Given an llvm.loop loop id metadata node, returns the loop hint metadata diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 5c21490793e7..4273ce0b6200 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -1,4 +1,4 @@ -//===-- UnrollLoopPeel.cpp - Loop peeling utilities -----------------------===// +//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,29 +13,42 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> using namespace llvm; #define DEBUG_TYPE "loop-unroll" + STATISTIC(NumPeeled, "Number of loops peeled"); static cl::opt<unsigned> UnrollPeelMaxCount( @@ -49,7 +62,8 @@ static cl::opt<unsigned> UnrollForcePeelCount( // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). -static const unsigned InfiniteIterationsToInvariance = UINT_MAX; +static const unsigned InfiniteIterationsToInvariance = + std::numeric_limits<unsigned>::max(); // Check whether we are capable of peeling this loop. static bool canPeel(Loop *L) { @@ -210,8 +224,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); } } - - return; } /// \brief Update the branch weights of the latch of a peeled-off loop @@ -236,7 +248,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned IterNumber, unsigned AvgIters, uint64_t &PeeledHeaderWeight) { - // FIXME: Pick a more realistic distribution. // Currently the proportion of weight we assign to the fall-through // side of the branch drops linearly with the iteration number, and we use @@ -272,7 +283,6 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI) { - BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *PreHeader = L->getLoopPreheader(); diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index d43ce7abb7cd..efff06f79cb7 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -25,7 +25,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/BasicBlock.h" @@ -294,7 +293,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, - const bool UseEpilogRemainder, BasicBlock *InsertTop, + const bool UseEpilogRemainder, const bool UnrollRemainder, + BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { @@ -393,35 +393,14 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); - // Add unroll disable metadata to disable future unrolling for this loop. - SmallVector<Metadata *, 4> MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - MDNode *LoopID = NewLoop->getLoopID(); - if (LoopID) { - // First remove any existing loop unrolling metadata. - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - bool IsUnrollMetadata = false; - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); - IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); - } - if (!IsUnrollMetadata) - MDs.push_back(LoopID->getOperand(i)); - } - } - LLVMContext &Context = NewLoop->getHeader()->getContext(); - SmallVector<Metadata *, 1> DisableOperands; - DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); + // Only add loop metadata if the loop is not going to be completely + // unrolled. + if (UnrollRemainder) + return NewLoop; - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - NewLoop->setLoopID(NewLoopID); + // Add unroll disable metadata to disable future unrolling for this loop. + NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else @@ -435,12 +414,9 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder) { - // Support runtime unrolling for multiple exit blocks and multiple exiting - // blocks. - if (!UnrollRuntimeMultiExit) - return false; - // Even if runtime multi exit is enabled, we currently have some correctness - // constrains in unrolling a multi-exit loop. + // We currently have some correctness constrains in unrolling a multi-exit + // loop. Check for these below. + // We rely on LCSSA form being preserved when the exit blocks are transformed. if (!PreserveLCSSA) return false; @@ -470,7 +446,54 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, return true; } +/// Returns true if we can profitably unroll the multi-exit loop L. Currently, +/// we return true only if UnrollRuntimeMultiExit is set to true. +static bool canProfitablyUnrollMultiExitLoop( + Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, + bool PreserveLCSSA, bool UseEpilogRemainder) { + +#if !defined(NDEBUG) + SmallVector<BasicBlock *, 8> OtherExitsDummyCheck; + assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, + PreserveLCSSA, UseEpilogRemainder) && + "Should be safe to unroll before checking profitability!"); +#endif + + // Priority goes to UnrollRuntimeMultiExit if it's supplied. + if (UnrollRuntimeMultiExit.getNumOccurrences()) + return UnrollRuntimeMultiExit; + + // The main pain point with multi-exit loop unrolling is that once unrolled, + // we will not be able to merge all blocks into a straight line code. + // There are branches within the unrolled loop that go to the OtherExits. + // The second point is the increase in code size, but this is true + // irrespective of multiple exits. + + // Note: Both the heuristics below are coarse grained. We are essentially + // enabling unrolling of loops that have a single side exit other than the + // normal LatchExit (i.e. exiting into a deoptimize block). + // The heuristics considered are: + // 1. low number of branches in the unrolled version. + // 2. high predictability of these extra branches. + // We avoid unrolling loops that have more than two exiting blocks. This + // limits the total number of branches in the unrolled loop to be atmost + // the unroll factor (since one of the exiting blocks is the latch block). + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() > 2) + return false; + // The second heuristic is that L has one exit other than the latchexit and + // that exit is a deoptimize block. We know that deoptimize blocks are rarely + // taken, which also implies the branch leading to the deoptimize block is + // highly predictable. + return (OtherExits.size() == 1 && + OtherExits[0]->getTerminatingDeoptimizeCall()); + // TODO: These can be fine-tuned further to consider code size or deopt states + // that are captured by the deoptimize exit block. + // Also, we can extend this to support more cases, if we actually + // know of kinds of multiexit loops that would benefit from unrolling. +} /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. @@ -513,10 +536,14 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, + bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, bool PreserveLCSSA) { + DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); DEBUG(L->dump()); + DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : + dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { @@ -538,8 +565,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, "one of the loop latch successors should be the exit block!"); // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; - bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( - L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); + bool isMultiExitUnrollingEnabled = + canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, + UseEpilogRemainder) && + canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, + UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { @@ -724,7 +754,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( - L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, + L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, + InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. @@ -753,11 +784,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ - Value *newVal = VMap[Phi->getIncomingValue(i)]; + Value *newVal = VMap.lookup(Phi->getIncomingValue(i)); // newVal can be a constant or derived from values outside the loop, and - // hence need not have a VMap value. - if (!newVal) + // hence need not have a VMap value. Also, since lookup already generated + // a default "null" VMap entry for this value, we need to populate that + // VMap entry correctly, with the mapped entry being itself. + if (!newVal) { newVal = Phi->getIncomingValue(i); + VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i); + } Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } @@ -868,6 +903,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } + if (remainderLoop && UnrollRemainder) { + DEBUG(dbgs() << "Unrolling remainder loop\n"); + UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, + /*Force*/ false, /*AllowRuntime*/ false, + /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, + /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, + /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, + /*ORE*/ nullptr, PreserveLCSSA); + } + NumRuntimeUnrolled++; return true; } diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 3c522786641a..c3fa05a11a24 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -432,7 +432,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { bool FP = I->getType()->isFloatingPointTy(); Instruction *UAI = Prev.getUnsafeAlgebraInst(); - if (!UAI && FP && !I->hasUnsafeAlgebra()) + if (!UAI && FP && !I->isFast()) UAI = I; // Found an unsafe (unvectorizable) algebra instruction. switch (I->getOpcode()) { @@ -565,7 +565,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( auto *I = Phi->user_back(); if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && DT->dominates(Previous, I->user_back())) { - SinkAfter[I] = Previous; + if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. + SinkAfter[I] = Previous; return true; } } @@ -659,11 +660,11 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, break; } - // We only match FP sequences with unsafe algebra, so we can unconditionally + // We only match FP sequences that are 'fast', so we can unconditionally // set it on any generated instructions. IRBuilder<>::FastMathFlagGuard FMFG(Builder); FastMathFlags FMF; - FMF.setUnsafeAlgebra(); + FMF.setFast(); Builder.setFastMathFlags(FMF); Value *Cmp; @@ -677,7 +678,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - const SCEV *Step, BinaryOperator *BOp) + const SCEV *Step, BinaryOperator *BOp, + SmallVectorImpl<Instruction *> *Casts) : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { assert(IK != IK_NoInduction && "Not an induction"); @@ -704,6 +706,12 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub))) && "Binary opcode should be specified for FP induction"); + + if (Casts) { + for (auto &Inst : *Casts) { + RedundantCasts.push_back(Inst); + } + } } int InductionDescriptor::getConsecutiveDirection() const { @@ -767,7 +775,7 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, // Floating point operations had to be 'fast' to enable the induction. FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); Value *MulExp = B.CreateFMul(StepValue, Index); if (isa<Instruction>(MulExp)) @@ -807,7 +815,7 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, StartValue = Phi->getIncomingValue(1); } else { assert(TheLoop->contains(Phi->getIncomingBlock(1)) && - "Unexpected Phi node in the loop"); + "Unexpected Phi node in the loop"); BEValue = Phi->getIncomingValue(1); StartValue = Phi->getIncomingValue(0); } @@ -840,6 +848,110 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, return true; } +/// This function is called when we suspect that the update-chain of a phi node +/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, +/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime +/// predicate P under which the SCEV expression for the phi can be the +/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the +/// cast instructions that are involved in the update-chain of this induction. +/// A caller that adds the required runtime predicate can be free to drop these +/// cast instructions, and compute the phi using \p AR (instead of some scev +/// expression with casts). +/// +/// For example, without a predicate the scev expression can take the following +/// form: +/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) +/// +/// It corresponds to the following IR sequence: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %casted_phi = "ExtTrunc i64 %x" +/// %add = add i64 %casted_phi, %step +/// +/// where %x is given in \p PN, +/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, +/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of +/// several forms, for example, such as: +/// ExtTrunc1: %casted_phi = and %x, 2^n-1 +/// or: +/// ExtTrunc2: %t = shl %x, m +/// %casted_phi = ashr %t, m +/// +/// If we are able to find such sequence, we return the instructions +/// we found, namely %casted_phi and the instructions on its use-def chain up +/// to the phi (not including the phi). +bool getCastsForInductionPHI( + PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) { + + assert(CastInsts.empty() && "CastInsts is expected to be empty."); + auto *PN = cast<PHINode>(PhiScev->getValue()); + assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); + const Loop *L = AR->getLoop(); + + // Find any cast instructions that participate in the def-use chain of + // PhiScev in the loop. + // FORNOW/TODO: We currently expect the def-use chain to include only + // two-operand instructions, where one of the operands is an invariant. + // createAddRecFromPHIWithCasts() currently does not support anything more + // involved than that, so we keep the search simple. This can be + // extended/generalized as needed. + + auto getDef = [&](const Value *Val) -> Value * { + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); + if (!BinOp) + return nullptr; + Value *Op0 = BinOp->getOperand(0); + Value *Op1 = BinOp->getOperand(1); + Value *Def = nullptr; + if (L->isLoopInvariant(Op0)) + Def = Op1; + else if (L->isLoopInvariant(Op1)) + Def = Op0; + return Def; + }; + + // Look for the instruction that defines the induction via the + // loop backedge. + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + Value *Val = PN->getIncomingValueForBlock(Latch); + if (!Val) + return false; + + // Follow the def-use chain until the induction phi is reached. + // If on the way we encounter a Value that has the same SCEV Expr as the + // phi node, we can consider the instructions we visit from that point + // as part of the cast-sequence that can be ignored. + bool InCastSequence = false; + auto *Inst = dyn_cast<Instruction>(Val); + while (Val != PN) { + // If we encountered a phi node other than PN, or if we left the loop, + // we bail out. + if (!Inst || !L->contains(Inst)) { + return false; + } + auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); + if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) + InCastSequence = true; + if (InCastSequence) { + // Only the last instruction in the cast sequence is expected to have + // uses outside the induction def-use chain. + if (!CastInsts.empty()) + if (!Inst->hasOneUse()) + return false; + CastInsts.push_back(Inst); + } + Val = getDef(Val); + if (!Val) + return false; + Inst = dyn_cast<Instruction>(Val); + } + + return InCastSequence; +} + bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, PredicatedScalarEvolution &PSE, InductionDescriptor &D, @@ -869,13 +981,26 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return false; } + // Record any Cast instructions that participate in the induction update + const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); + // If we started from an UnknownSCEV, and managed to build an addRecurrence + // only after enabling Assume with PSCEV, this means we may have encountered + // cast instructions that required adding a runtime check in order to + // guarantee the correctness of the AddRecurence respresentation of the + // induction. + if (PhiScev != AR && SymbolicPhi) { + SmallVector<Instruction *, 2> Casts; + if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); + } + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); } -bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, - ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr) { +bool InductionDescriptor::isInductionPHI( + PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr, + SmallVectorImpl<Instruction *> *CastsToIgnore) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -894,7 +1019,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, // FIXME: We should treat this as a uniform. Unfortunately, we // don't currently know how to handled uniform PHIs. DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); - return false; + return false; } Value *StartValue = @@ -907,7 +1032,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return false; if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, Step); + D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, + CastsToIgnore); return true; } @@ -1115,6 +1241,149 @@ Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, return None; } +/// Does a BFS from a given node to all of its children inside a given loop. +/// The returned vector of nodes includes the starting point. +SmallVector<DomTreeNode *, 16> +llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { + SmallVector<DomTreeNode *, 16> Worklist; + auto AddRegionToWorklist = [&](DomTreeNode *DTN) { + // Only include subregions in the top level loop. + BasicBlock *BB = DTN->getBlock(); + if (CurLoop->contains(BB)) + Worklist.push_back(DTN); + }; + + AddRegionToWorklist(N); + + for (size_t I = 0; I < Worklist.size(); I++) + for (DomTreeNode *Child : Worklist[I]->getChildren()) + AddRegionToWorklist(Child); + + return Worklist; +} + +void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, + ScalarEvolution *SE = nullptr, + LoopInfo *LI = nullptr) { + assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + + // Now that we know the removal is safe, remove the loop by changing the + // branch from the preheader to go to the single exit block. + // + // Because we're deleting a large chunk of code at once, the sequence in which + // we remove things is very important to avoid invalidation issues. + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + if (SE) + SE->forgetLoop(L); + + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + + auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); + assert(OldBr && "Preheader must end with a branch"); + assert(OldBr->isUnconditional() && "Preheader must have a single successor"); + // Connect the preheader to the exit block. Keep the old edge to the header + // around to perform the dominator tree update in two separate steps + // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge + // preheader -> header. + // + // + // 0. Preheader 1. Preheader 2. Preheader + // | | | | + // V | V | + // Header <--\ | Header <--\ | Header <--\ + // | | | | | | | | | | | + // | V | | | V | | | V | + // | Body --/ | | Body --/ | | Body --/ + // V V V V V + // Exit Exit Exit + // + // By doing this is two separate steps we can perform the dominator tree + // update without using the batch update API. + // + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + IRBuilder<> Builder(OldBr); + Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); + // Remove the old branch. The conditional branch becomes a new terminator. + OldBr->eraseFromParent(); + + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P->setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the operand + // being deleted). + for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) + P->removeIncomingValue(e - i, false); + + assert((P->getNumIncomingValues() == 1 && + P->getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); + ++BI; + } + + // Disconnect the loop body by branching directly to its exit. + Builder.SetInsertPoint(Preheader->getTerminator()); + Builder.CreateBr(ExitBlock); + // Remove the old branch. + Preheader->getTerminator()->eraseFromParent(); + + if (DT) { + // Update the dominator tree by informing it about the new edge from the + // preheader to the exit. + DT->insertEdge(Preheader, ExitBlock); + // Inform the dominator tree about the removed edge. + DT->deleteEdge(Preheader, L->getHeader()); + } + + // Remove the block from the reference counting scheme, so that we can + // delete it freely later. + for (auto *Block : L->blocks()) + Block->dropAllReferences(); + + if (LI) { + // Erase the instructions and the blocks without having to worry + // about ordering because we already dropped the references. + // NOTE: This iteration is safe because erasing the block does not remove + // its entry from the loop's block list. We do that in the next section. + for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); + LpI != LpE; ++LpI) + (*LpI)->eraseFromParent(); + + // Finally, the blocks from loopinfo. This has to happen late because + // otherwise our loop iterators won't work. + + SmallPtrSet<BasicBlock *, 8> blocks; + blocks.insert(L->block_begin(), L->block_end()); + for (BasicBlock *BB : blocks) + LI->removeBlock(BB); + + // The last step is to update LoopInfo now that we've eliminated this loop. + LI->erase(L); + } +} + /// Returns true if the instruction in a loop is guaranteed to execute at least /// once. bool llvm::isGuaranteedToExecute(const Instruction &Inst, @@ -1194,7 +1463,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { static Value *addFastMathFlag(Value *V) { if (isa<FPMathOperator>(V)) { FastMathFlags Flags; - Flags.setUnsafeAlgebra(); + Flags.setFast(); cast<Instruction>(V)->setFastMathFlags(Flags); } return V; @@ -1256,8 +1525,8 @@ Value *llvm::createSimpleTargetReduction( using RD = RecurrenceDescriptor; RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; // TODO: Support creating ordered reductions. - FastMathFlags FMFUnsafe; - FMFUnsafe.setUnsafeAlgebra(); + FastMathFlags FMFFast; + FMFFast.setFast(); switch (Opcode) { case Instruction::Add: @@ -1278,14 +1547,14 @@ Value *llvm::createSimpleTargetReduction( case Instruction::FAdd: BuildFunc = [&]() { auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); - cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); return Rdx; }; break; case Instruction::FMul: BuildFunc = [&]() { auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); - cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); return Rdx; }; break; @@ -1321,55 +1590,39 @@ Value *llvm::createSimpleTargetReduction( } /// Create a vector reduction using a given recurrence descriptor. -Value *llvm::createTargetReduction(IRBuilder<> &Builder, +Value *llvm::createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, RecurrenceDescriptor &Desc, Value *Src, bool NoNaN) { // TODO: Support in-order reductions based on the recurrence descriptor. - RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); + using RD = RecurrenceDescriptor; + RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; - auto getSimpleRdx = [&](unsigned Opc) { - return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); - }; switch (RecKind) { - case RecurrenceDescriptor::RK_FloatAdd: - return getSimpleRdx(Instruction::FAdd); - case RecurrenceDescriptor::RK_FloatMult: - return getSimpleRdx(Instruction::FMul); - case RecurrenceDescriptor::RK_IntegerAdd: - return getSimpleRdx(Instruction::Add); - case RecurrenceDescriptor::RK_IntegerMult: - return getSimpleRdx(Instruction::Mul); - case RecurrenceDescriptor::RK_IntegerAnd: - return getSimpleRdx(Instruction::And); - case RecurrenceDescriptor::RK_IntegerOr: - return getSimpleRdx(Instruction::Or); - case RecurrenceDescriptor::RK_IntegerXor: - return getSimpleRdx(Instruction::Xor); - case RecurrenceDescriptor::RK_IntegerMinMax: { - switch (Desc.getMinMaxRecurrenceKind()) { - case RecurrenceDescriptor::MRK_SIntMax: - Flags.IsSigned = true; - Flags.IsMaxOp = true; - break; - case RecurrenceDescriptor::MRK_UIntMax: - Flags.IsMaxOp = true; - break; - case RecurrenceDescriptor::MRK_SIntMin: - Flags.IsSigned = true; - break; - case RecurrenceDescriptor::MRK_UIntMin: - break; - default: - llvm_unreachable("Unhandled MRK"); - } - return getSimpleRdx(Instruction::ICmp); + case RD::RK_FloatAdd: + return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); + case RD::RK_FloatMult: + return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); + case RD::RK_IntegerAdd: + return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); + case RD::RK_IntegerMult: + return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); + case RD::RK_IntegerAnd: + return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); + case RD::RK_IntegerOr: + return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); + case RD::RK_IntegerXor: + return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); + case RD::RK_IntegerMinMax: { + RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); + Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); + Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); + return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); } - case RecurrenceDescriptor::RK_FloatMinMax: { - Flags.IsMaxOp = - Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; - return getSimpleRdx(Instruction::FCmp); + case RD::RK_FloatMinMax: { + Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; + return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); } default: llvm_unreachable("Unhandled RecKind"); diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 900450b40061..57dc225e9dab 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -168,13 +168,14 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); assert(ILengthType && "expected size argument to memcpy to be an integer type!"); + Type *Int8Type = Type::getInt8Ty(Ctx); + bool LoopOpIsInt8 = LoopOpType == Int8Type; ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); - Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); - Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); - Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); - + Value *RuntimeLoopCount = LoopOpIsInt8 ? + CopyLen : + PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); BasicBlock *LoopBB = - BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); IRBuilder<> LoopBuilder(LoopBB); PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); @@ -189,11 +190,15 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); - Type *Int8Type = Type::getInt8Ty(Ctx); - if (LoopOpType != Int8Type) { + if (!LoopOpIsInt8) { + // Add in the + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + // Loop body for the residual copy. BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", - PreLoopBB->getParent(), nullptr); + PreLoopBB->getParent(), + PostLoopBB); // Residual loop header. BasicBlock *ResHeaderBB = BasicBlock::Create( Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); @@ -258,61 +263,6 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, } } -void llvm::createMemCpyLoop(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, Value *CopyLen, - unsigned SrcAlign, unsigned DestAlign, - bool SrcIsVolatile, bool DstIsVolatile) { - Type *TypeOfCopyLen = CopyLen->getType(); - - BasicBlock *OrigBB = InsertBefore->getParent(); - Function *F = OrigBB->getParent(); - BasicBlock *NewBB = - InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split"); - BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", - F, NewBB); - - IRBuilder<> Builder(OrigBB->getTerminator()); - - // SrcAddr and DstAddr are expected to be pointer types, - // so no check is made here. - unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); - unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - - // Cast pointers to (char *) - SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); - DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); - - Builder.CreateCondBr( - Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, - LoopBB); - OrigBB->getTerminator()->eraseFromParent(); - - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); - LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - - // load from SrcAddr+LoopIndex - // TODO: we can leverage the align parameter of llvm.memcpy for more efficient - // word-sized loads and stores. - Value *Element = - LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( - LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), - SrcIsVolatile); - // store at DstAddr+LoopIndex - LoopBuilder.CreateStore(Element, - LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), - DstAddr, LoopIndex), - DstIsVolatile); - - // The value for LoopIndex coming from backedge is (LoopIndex + 1) - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, - NewBB); -} - // Lower memmove to IR. memmove is required to correctly copy overlapping memory // regions; therefore, it has to check the relative positions of the source and // destination pointers and choose the copy direction accordingly. @@ -454,38 +404,26 @@ static void createMemSetLoop(Instruction *InsertBefore, void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, const TargetTransformInfo &TTI) { - // Original implementation - if (!TTI.useWideIRMemcpyLoopLowering()) { - createMemCpyLoop(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile()); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { + createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransformInfo */ TTI); } else { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { - createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ CI, + /* CopyLen */ Memcpy->getLength(), /* SrcAlign */ Memcpy->getAlignment(), /* DestAlign */ Memcpy->getAlignment(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), - /* TargetTransformInfo */ TTI); - } else { - createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile(), - /* TargetTransfomrInfo */ TTI); - } + /* TargetTransfomrInfo */ TTI); } } diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 890afbc46e63..344cb35df986 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -13,46 +13,65 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "lower-switch" namespace { + struct IntRange { int64_t Low, High; }; - // Return true iff R is covered by Ranges. - static bool IsInRanges(const IntRange &R, - const std::vector<IntRange> &Ranges) { - // Note: Ranges must be sorted, non-overlapping and non-adjacent. - - // Find the first range whose High field is >= R.High, - // then check if the Low field is <= R.Low. If so, we - // have a Range that covers R. - auto I = std::lower_bound( - Ranges.begin(), Ranges.end(), R, - [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); - return I != Ranges.end() && I->Low <= R.Low; - } + +} // end anonymous namespace + +// Return true iff R is covered by Ranges. +static bool IsInRanges(const IntRange &R, + const std::vector<IntRange> &Ranges) { + // Note: Ranges must be sorted, non-overlapping and non-adjacent. + + // Find the first range whose High field is >= R.High, + // then check if the Low field is <= R.Low. If so, we + // have a Range that covers R. + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), R, + [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); + return I != Ranges.end() && I->Low <= R.Low; +} + +namespace { /// Replace all SwitchInst instructions with chained branch instructions. class LowerSwitch : public FunctionPass { public: - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + LowerSwitch() : FunctionPass(ID) { initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); } @@ -68,8 +87,9 @@ namespace { : Low(low), High(high), BB(bb) {} }; - typedef std::vector<CaseRange> CaseVector; - typedef std::vector<CaseRange>::iterator CaseItr; + using CaseVector = std::vector<CaseRange>; + using CaseItr = std::vector<CaseRange>::iterator; + private: void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList); @@ -86,22 +106,24 @@ namespace { /// The comparison function for sorting the switch case values in the vector. /// WARNING: Case ranges should be disjoint! struct CaseCmp { - bool operator () (const LowerSwitch::CaseRange& C1, - const LowerSwitch::CaseRange& C2) { - + bool operator()(const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); return CI1->getValue().slt(CI2->getValue()); } }; -} + +} // end anonymous namespace char LowerSwitch::ID = 0; -INITIALIZE_PASS(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) // Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; + +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { return new LowerSwitch(); @@ -136,6 +158,7 @@ bool LowerSwitch::runOnFunction(Function &F) { static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) LLVM_ATTRIBUTE_USED; + static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) { O << "["; @@ -186,7 +209,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } // Remove incoming values in the reverse order to prevent invalidating // *successive* index. - for (unsigned III : reverse(Indices)) + for (unsigned III : llvm::reverse(Indices)) PN->removeIncomingValue(III); } } @@ -294,8 +317,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, /// value, so the jump to the "default" branch is warranted. BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* OrigBlock, - BasicBlock* Default) -{ + BasicBlock* Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); @@ -442,7 +464,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, unsigned MaxPop = 0; BasicBlock *PopSucc = nullptr; - IntRange R = { INT64_MIN, INT64_MAX }; + IntRange R = {std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::max()}; UnreachableRanges.push_back(R); for (const auto &I : Cases) { int64_t Low = I.Low->getSExtValue(); @@ -457,8 +480,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, assert(Low > LastRange.Low); LastRange.High = Low - 1; } - if (High != INT64_MAX) { - IntRange R = { High + 1, INT64_MAX }; + if (High != std::numeric_limits<int64_t>::max()) { + IntRange R = { High + 1, std::numeric_limits<int64_t>::max() }; UnreachableRanges.push_back(R); } @@ -487,8 +510,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, assert(MaxPop > 0 && PopSucc); Default = PopSucc; Cases.erase( - remove_if(Cases, - [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), + llvm::remove_if( + Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), Cases.end()); // If there are no cases left, just branch. diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index b659a2e4463f..29f289b62da0 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -15,12 +15,17 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include <vector> + using namespace llvm; #define DEBUG_TYPE "mem2reg" @@ -33,7 +38,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; - while (1) { + while (true) { Allocas.clear(); // Find allocas that are safe to promote, by looking at all instructions in @@ -65,15 +70,17 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { } namespace { + struct PromoteLegacyPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + PromoteLegacyPass() : FunctionPass(ID) { initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry()); } // runOnFunction - To run this pass, first we calculate the alloca // instructions that are safe for promotion, then we promote each one. - // bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -89,10 +96,12 @@ struct PromoteLegacyPass : public FunctionPass { AU.addRequired<DominatorTreeWrapperPass>(); AU.setPreservesCFG(); } - }; -} // end of anonymous namespace +}; + +} // end anonymous namespace char PromoteLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " "Register", false, false) @@ -102,7 +111,6 @@ INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", false, false) // createPromoteMemoryToRegister - Provide an entry point to create this pass. -// FunctionPass *llvm::createPromoteMemoryToRegisterPass() { return new PromoteLegacyPass(); } diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 9f2ad540c83d..0f7bd76c03ca 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -15,16 +15,30 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" + using namespace llvm; +static const char *const metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" +}; + namespace { // This PRNG is from the ISO C spec. It is intentionally simple and @@ -43,12 +57,6 @@ namespace { } }; - static const char *const metaNames[] = { - // See http://en.wikipedia.org/wiki/Metasyntactic_variable - "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", - "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" - }; - struct Renamer { Renamer(unsigned int seed) { prng.srand(seed); @@ -62,7 +70,9 @@ namespace { }; struct MetaRenamer : public ModulePass { - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + MetaRenamer() : ModulePass(ID) { initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); } @@ -123,7 +133,11 @@ namespace { TLI.getLibFunc(F, Tmp)) continue; - F.setName(renamer.newName()); + // Leave @main alone. The output of -metarenamer might be passed to + // lli for execution and the latter needs a main entry point. + if (Name != "main") + F.setName(renamer.newName()); + runOnFunction(F); } return true; @@ -144,14 +158,17 @@ namespace { return true; } }; -} + +} // end anonymous namespace char MetaRenamer::ID = 0; + INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer", "Assign new names to everything", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(MetaRenamer, "metarenamer", "Assign new names to everything", false, false) + //===----------------------------------------------------------------------===// // // MetaRenamer - Rename everything with metasyntactic names. diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 2ef3d6336ae2..ba4b7f3cc263 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -243,7 +243,7 @@ std::string llvm::getUniqueModuleId(Module *M) { bool ExportsSymbols = false; auto AddGlobal = [&](GlobalValue &GV) { if (GV.isDeclaration() || GV.getName().startswith("llvm.") || - !GV.hasExternalLinkage()) + !GV.hasExternalLinkage() || GV.hasComdat()) return; ExportsSymbols = true; Md5.update(GV.getName()); diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index d4cdaede6b86..d47be6ea566b 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -49,9 +49,10 @@ INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo", static cl::opt<bool> VerifyPredicateInfo( "verify-predicateinfo", cl::init(false), cl::Hidden, cl::desc("Verify PredicateInfo in legacy printer pass.")); -namespace { DEBUG_COUNTER(RenameCounter, "predicateinfo-rename", - "Controls which variables are renamed with predicateinfo") + "Controls which variables are renamed with predicateinfo"); + +namespace { // Given a predicate info that is a type of branching terminator, get the // branching block. const BasicBlock *getBranchBlock(const PredicateBase *PB) { @@ -610,7 +611,12 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { } convertUsesToDFSOrdered(Op, OrderedUses); - std::sort(OrderedUses.begin(), OrderedUses.end(), Compare); + // Here we require a stable sort because we do not bother to try to + // assign an order to the operands the uses represent. Thus, two + // uses in the same instruction do not have a strict sort order + // currently and will be considered equal. We could get rid of the + // stable sort by creating one if we wanted. + std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare); SmallVector<ValueDFS, 8> RenameStack; // For each use, sorted into dfs order, push values and replaces uses with // top of stack, which will represent the reaching def. diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index cdba982e6641..fcd3bd08482a 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -21,25 +21,38 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "mem2reg" @@ -103,7 +116,7 @@ struct AllocaInfo { bool OnlyUsedInOneBlock; Value *AllocaPointerVal; - DbgDeclareInst *DbgDeclare; + TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares; void clear() { DefiningBlocks.clear(); @@ -112,7 +125,7 @@ struct AllocaInfo { OnlyBlock = nullptr; OnlyUsedInOneBlock = true; AllocaPointerVal = nullptr; - DbgDeclare = nullptr; + DbgDeclares.clear(); } /// Scan the uses of the specified alloca, filling in the AllocaInfo used @@ -147,27 +160,21 @@ struct AllocaInfo { } } - DbgDeclare = FindAllocaDbgDeclare(AI); + DbgDeclares = FindDbgAddrUses(AI); } }; // Data package used by RenamePass() class RenamePassData { public: - typedef std::vector<Value *> ValVector; + using ValVector = std::vector<Value *>; + + RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V) + : BB(B), Pred(P), Values(std::move(V)) {} - RenamePassData() : BB(nullptr), Pred(nullptr), Values() {} - RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) - : BB(B), Pred(P), Values(V) {} BasicBlock *BB; BasicBlock *Pred; ValVector Values; - - void swap(RenamePassData &RHS) { - std::swap(BB, RHS.BB); - std::swap(Pred, RHS.Pred); - Values.swap(RHS.Values); - } }; /// \brief This assigns and keeps a per-bb relative ordering of load/store @@ -223,12 +230,15 @@ public: struct PromoteMem2Reg { /// The alloca instructions being promoted. std::vector<AllocaInst *> Allocas; + DominatorTree &DT; DIBuilder DIB; + /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; const SimplifyQuery SQ; + /// Reverse mapping of Allocas. DenseMap<AllocaInst *, unsigned> AllocaLookup; @@ -252,10 +262,9 @@ struct PromoteMem2Reg { /// For each alloca, we keep track of the dbg.declare intrinsic that /// describes it, if any, so that we can convert it to a dbg.value /// intrinsic if the alloca gets promoted. - SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; + SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares; /// The set of basic blocks the renamer has already visited. - /// SmallPtrSet<BasicBlock *, 16> Visited; /// Contains a stable numbering of basic blocks to avoid non-determinstic @@ -298,7 +307,7 @@ private: bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; -} // end of anonymous namespace +} // end anonymous namespace /// Given a LoadInst LI this adds assume(LI != null) after it. static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { @@ -345,8 +354,8 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI, DominatorTree &DT, - AssumptionCache *AC) { + LargeBlockInfo &LBI, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); @@ -380,7 +389,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, Info.UsingBlocks.push_back(StoreBB); continue; } - } else if (LI->getParent() != StoreBB && !DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to @@ -402,7 +410,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // that information when we erase this Load. So we preserve // it with an assume. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); @@ -416,11 +424,11 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Record debuginfo for the store and remove the declaration's // debuginfo. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { + for (DbgInfoIntrinsic *DII : Info.DbgDeclares) { DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); - DDI->eraseFromParent(); - LBI.deleteValue(DDI); + ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); + DII->eraseFromParent(); + LBI.deleteValue(DII); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); @@ -449,6 +457,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, /// } static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, + const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, @@ -457,7 +466,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // make it efficient to get the index of various operations in the block. // Walk the use-def list of the alloca, getting the locations of all stores. - typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; + using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>; StoresByIndexTy StoresByIndex; for (User *U : AI->users()) @@ -497,7 +506,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); @@ -511,9 +520,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { + for (DbgInfoIntrinsic *DII : Info.DbgDeclares) { DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + ConvertDebugDeclareToDebugValue(DII, SI, DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); @@ -523,9 +532,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DDI->eraseFromParent(); - LBI.deleteValue(DDI); + for (DbgInfoIntrinsic *DII : Info.DbgDeclares) { + DII->eraseFromParent(); + LBI.deleteValue(DII); } ++NumLocalPromoted; @@ -567,7 +576,7 @@ void PromoteMem2Reg::run() { // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -578,7 +587,7 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) { + promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -593,8 +602,8 @@ void PromoteMem2Reg::run() { } // Remember the dbg.declare intrinsic describing this alloca, if any. - if (Info.DbgDeclare) - AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; + if (!Info.DbgDeclares.empty()) + AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -604,7 +613,6 @@ void PromoteMem2Reg::run() { // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. - // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); @@ -629,8 +637,8 @@ void PromoteMem2Reg::run() { }); unsigned CurrentVersion = 0; - for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i) - QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion); + for (BasicBlock *BB : PHIBlocks) + QueuePhiNode(BB, AllocaNum, CurrentVersion); } if (Allocas.empty()) @@ -641,19 +649,16 @@ void PromoteMem2Reg::run() { // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. - // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary - // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); do { - RenamePassData RPD; - RPD.swap(RenamePassWorkList.back()); + RenamePassData RPD = std::move(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); @@ -663,9 +668,7 @@ void PromoteMem2Reg::run() { Visited.clear(); // Remove the allocas themselves from the function. - for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { - Instruction *A = Allocas[i]; - + for (Instruction *A : Allocas) { // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. @@ -675,9 +678,9 @@ void PromoteMem2Reg::run() { } // Remove alloca's dbg.declare instrinsics from the function. - for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) - if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) - DDI->eraseFromParent(); + for (auto &Declares : AllocaDbgDeclares) + for (auto *DII : Declares) + DII->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can @@ -714,7 +717,6 @@ void PromoteMem2Reg::run() { // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. - // for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); @@ -762,8 +764,8 @@ void PromoteMem2Reg::run() { while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); - for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) - SomePHI->addIncoming(UndefVal, Preds[pred]); + for (BasicBlock *Pred : Preds) + SomePHI->addIncoming(UndefVal, Pred); } } @@ -779,7 +781,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks( AllocaInst *AI, AllocaInfo &Info, const SmallPtrSetImpl<BasicBlock *> &DefBlocks, SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) { - // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. @@ -834,9 +835,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks( // Since the value is live into BB, it is either defined in a predecessor or // live into it to. Add the preds to the worklist unless they are a // defining block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - + for (BasicBlock *P : predecessors(BB)) { // The value is not live into a predecessor if it defines the value. if (DefBlocks.count(P)) continue; @@ -906,8 +905,8 @@ NextIteration: // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; - if (DbgDeclareInst *DDI = AllocaDbgDeclares[AllocaNo]) - ConvertDebugDeclareToDebugValue(DDI, APN, DIB); + for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo]) + ConvertDebugDeclareToDebugValue(DII, APN, DIB); // Get the next phi node. ++PNI; @@ -943,7 +942,7 @@ NextIteration: // that information when we erase this Load. So we preserve // it with an assume. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(V, LI, &DT)) + !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); // Anything using the load now uses the current value. @@ -963,8 +962,8 @@ NextIteration: // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second]) + ConvertDebugDeclareToDebugValue(DII, SI, DIB); BB->getInstList().erase(SI); } } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 6ccf54e49dd3..e4b20b0faa15 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/BasicBlock.h" @@ -39,12 +38,13 @@ using namespace llvm; #define DEBUG_TYPE "ssaupdater" -typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; +using AvailableValsTy = DenseMap<BasicBlock *, Value *>; + static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) +SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI) : InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { @@ -72,7 +72,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { } static bool IsEquivalentPHI(PHINode *PHI, - SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) { + SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) return false; @@ -100,7 +100,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we have the hard case. Get the live-in values for each // predecessor. - SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; + SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues; Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it @@ -145,8 +145,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa<PHINode>(BB->begin())) { - SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), - PredValues.end()); + SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(), + PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast<PHINode>(It)); ++It) { @@ -218,11 +218,11 @@ namespace llvm { template<> class SSAUpdaterTraits<SSAUpdater> { public: - typedef BasicBlock BlkT; - typedef Value *ValT; - typedef PHINode PhiT; + using BlkT = BasicBlock; + using ValT = Value *; + using PhiT = PHINode; + using BlkSucc_iterator = succ_iterator; - typedef succ_iterator BlkSucc_iterator; static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } @@ -253,7 +253,7 @@ public: /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds /// vector, set Info->NumPreds, and allocate space in Info->Preds. static void FindPredecessorBlocks(BasicBlock *BB, - SmallVectorImpl<BasicBlock*> *Preds) { + SmallVectorImpl<BasicBlock *> *Preds) { // We can get our predecessor info by walking the pred_iterator list, // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. @@ -293,7 +293,6 @@ public: } /// ValueIsPHI - Check if a value is a PHI. - /// static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { return dyn_cast<PHINode>(Val); } @@ -333,7 +332,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { //===----------------------------------------------------------------------===// LoadAndStorePromoter:: -LoadAndStorePromoter(ArrayRef<const Instruction*> Insts, +LoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; @@ -349,11 +348,11 @@ LoadAndStorePromoter(ArrayRef<const Instruction*> Insts, } void LoadAndStorePromoter:: -run(const SmallVectorImpl<Instruction*> &Insts) const { +run(const SmallVectorImpl<Instruction *> &Insts) const { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. - DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock; + DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock; for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); @@ -361,12 +360,12 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. - SmallVector<LoadInst*, 32> LiveInLoads; - DenseMap<Value*, Value*> ReplacedLoads; + SmallVector<LoadInst *, 32> LiveInLoads; + DenseMap<Value *, Value *> ReplacedLoads; for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); - TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; + TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; @@ -489,7 +488,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { bool LoadAndStorePromoter::isInstInList(Instruction *I, - const SmallVectorImpl<Instruction*> &Insts) + const SmallVectorImpl<Instruction *> &Insts) const { return is_contained(Insts, I); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 8784b9702141..f02f80cc1b78 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,12 +22,14 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -35,8 +37,8 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -53,6 +55,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -73,6 +76,7 @@ #include <iterator> #include <map> #include <set> +#include <tuple> #include <utility> #include <vector> @@ -141,12 +145,13 @@ namespace { // The first field contains the value that the switch produces when a certain // case group is selected, and the second field is a vector containing the // cases composing the case group. -typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2> - SwitchCaseResultVectorTy; +using SwitchCaseResultVectorTy = + SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>; + // The first field contains the phi node that generates a result of the switch // and the second field contains the value generated for a certain case in the // switch for that PHI. -typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy; +using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; /// ValueEqualityComparisonCase - Represents a case of a switch. struct ValueEqualityComparisonCase { @@ -167,11 +172,9 @@ struct ValueEqualityComparisonCase { class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout &DL; - unsigned BonusInstThreshold; - AssumptionCache *AC; SmallPtrSetImpl<BasicBlock *> *LoopHeaders; - // See comments in SimplifyCFGOpt::SimplifySwitch. - bool LateSimplifyCFG; + const SimplifyCFGOptions &Options; + Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases( TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -194,11 +197,9 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - unsigned BonusInstThreshold, AssumptionCache *AC, SmallPtrSetImpl<BasicBlock *> *LoopHeaders, - bool LateSimplifyCFG) - : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), - LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {} + const SimplifyCFGOptions &Opts) + : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {} bool run(BasicBlock *BB); }; @@ -438,18 +439,24 @@ namespace { /// fail. struct ConstantComparesGatherer { const DataLayout &DL; - Value *CompValue; /// Value found for the switch comparison - Value *Extra; /// Extra clause to be checked before the switch - SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch - unsigned UsedICmps; /// Number of comparisons matched in the and/or chain + + /// Value found for the switch comparison + Value *CompValue = nullptr; + + /// Extra clause to be checked before the switch + Value *Extra = nullptr; + + /// Set of integers to match in switch + SmallVector<ConstantInt *, 8> Vals; + + /// Number of comparisons matched in the and/or chain + unsigned UsedICmps = 0; /// Construct and compute the result for the comparison instruction Cond - ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) - : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) { + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) { gather(Cond); } - /// Prevent copy ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; ConstantComparesGatherer & operator=(const ConstantComparesGatherer &) = delete; @@ -487,7 +494,6 @@ private: // (x & ~2^z) == y --> x == y || x == y|2^z // This undoes a transformation done by instcombine to fuse 2 compares. if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { - // It's a little bit hard to see why the following transformations are // correct. Here is a CVC3 program to verify them for 64-bit values: @@ -770,6 +776,31 @@ static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, return false; } +// Set branch weights on SwitchInst. This sets the metadata if there is at +// least one non-zero weight. +static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) { + // Check that there is at least one non-zero weight. Otherwise, pass + // nullptr to setMetadata which will erase the existing metadata. + MDNode *N = nullptr; + if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; })) + N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights); + SI->setMetadata(LLVMContext::MD_prof, N); +} + +// Similar to the above, but for branch and select instructions that take +// exactly 2 weights. +static void setBranchWeights(Instruction *I, uint32_t TrueWeight, + uint32_t FalseWeight) { + assert(isa<BranchInst>(I) || isa<SelectInst>(I)); + // Check that there is at least one non-zero weight. Otherwise, pass + // nullptr to setMetadata which will erase the existing metadata. + MDNode *N = nullptr; + if (TrueWeight || FalseWeight) + N = MDBuilder(I->getParent()->getContext()) + .createBranchWeights(TrueWeight, FalseWeight); + I->setMetadata(LLVMContext::MD_prof, N); +} + /// If TI is known to be a terminator instruction and its block is known to /// only have a single predecessor block, check to see if that predecessor is /// also a value comparison with the same value, and if that comparison @@ -859,9 +890,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( } } if (HasWeight && Weights.size() >= 2) - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getParent()->getContext()) - .createBranchWeights(Weights)); + setBranchWeights(SI, Weights); DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -1166,9 +1195,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - NewSI->setMetadata( - LLVMContext::MD_prof, - MDBuilder(BB->getContext()).createBranchWeights(MDWeights)); + setBranchWeights(NewSI, MDWeights); } EraseTerminatorInstAndDCECond(PTI); @@ -1279,9 +1306,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, // I1 and I2 are being combined into a single instruction. Its debug // location is the merged locations of the original instructions. - if (!isa<CallInst>(I1)) - I1->setDebugLoc( - DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc())); + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); I2->eraseFromParent(); Changed = true; @@ -1535,20 +1560,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { I0->getOperandUse(O).set(NewOperands[O]); I0->moveBefore(&*BBEnd->getFirstInsertionPt()); - // The debug location for the "common" instruction is the merged locations of - // all the commoned instructions. We start with the original location of the - // "common" instruction and iteratively merge each location in the loop below. - const DILocation *Loc = I0->getDebugLoc(); - // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) if (I != I0) { - Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); + // The debug location for the "common" instruction is the merged locations + // of all the commoned instructions. We start with the original location + // of the "common" instruction and iteratively merge each location in the + // loop below. + // This is an N-way merge, which will be inefficient if I0 is a CallInst. + // However, as N-way merge for CallInst is rare, so we use simplified API + // instead of using complex API for N-way merge. + I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); combineMetadataForCSE(I0, I); I0->andIRFlags(I); } - if (!isa<CallInst>(I0)) - I0->setDebugLoc(Loc); if (!isa<StoreInst>(I0)) { // canSinkLastInstruction checked that all instructions were used by @@ -1582,9 +1607,9 @@ namespace { ArrayRef<BasicBlock*> Blocks; SmallVector<Instruction*,4> Insts; bool Fail; + public: - LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : - Blocks(Blocks) { + LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) { reset(); } @@ -1608,7 +1633,7 @@ namespace { return !Fail; } - void operator -- () { + void operator--() { if (Fail) return; for (auto *&Inst : Insts) { @@ -1916,6 +1941,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // - All of their uses are in CondBB. SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; + SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics; + unsigned SpeculationCost = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; @@ -1924,8 +1951,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, BBI != BBE; ++BBI) { Instruction *I = &*BBI; // Skip debug info. - if (isa<DbgInfoIntrinsic>(I)) + if (isa<DbgInfoIntrinsic>(I)) { + SpeculatedDbgIntrinsics.push_back(I); continue; + } // Only speculatively execute a single instruction (not counting the // terminator) for now. @@ -2030,11 +2059,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (Invert) std::swap(TrueV, FalseV); Value *S = Builder.CreateSelect( - BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); + BrCond, TrueV, FalseV, "spec.store.select", BI); SpeculatedStore->setOperand(0, S); - SpeculatedStore->setDebugLoc( - DILocation::getMergedLocation( - BI->getDebugLoc(), SpeculatedStore->getDebugLoc())); + SpeculatedStore->applyMergedLocation(BI->getDebugLoc(), + SpeculatedStore->getDebugLoc()); } // Metadata can be dependent on the condition we are hoisting above. @@ -2066,11 +2094,17 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (Invert) std::swap(TrueV, FalseV); Value *V = Builder.CreateSelect( - BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); + BrCond, TrueV, FalseV, "spec.select", BI); PN->setIncomingValue(OrigI, V); PN->setIncomingValue(ThenI, V); } + // Remove speculated dbg intrinsics. + // FIXME: Is it possible to do this in a more elegant way? Moving/merging the + // dbg value for the different flows and inserting it after the select. + for (Instruction *I : SpeculatedDbgIntrinsics) + I->eraseFromParent(); + ++NumSpeculations; return true; } @@ -2507,7 +2541,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { else { // For unconditional branch, check for a simple CFG pattern, where // BB has a single predecessor and BB's successor is also its predecessor's - // successor. If such pattern exisits, check for CSE between BB and its + // successor. If such pattern exists, check for CSE between BB and its // predecessor. if (BasicBlock *PB = BB->getSinglePredecessor()) if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator())) @@ -2725,9 +2759,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end()); - PBI->setMetadata( - LLVMContext::MD_prof, - MDBuilder(BI->getContext()).createBranchWeights(MDWeights)); + setBranchWeights(PBI, MDWeights[0], MDWeights[1]); } else PBI->setMetadata(LLVMContext::MD_prof, nullptr); } else { @@ -2860,7 +2892,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, - bool InvertPCond, bool InvertQCond) { + bool InvertPCond, bool InvertQCond, + const DataLayout &DL) { auto IsaBitcastOfPointerType = [](const Instruction &I) { return Operator::getOpcode(&I) == Instruction::BitCast && I.getType()->isPointerTy(); @@ -2887,7 +2920,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, else return false; } - return N <= PHINodeFoldingThreshold; + // The store we want to merge is counted in N, so add 1 to make sure + // we're counting the instructions that would be left. + return N <= (PHINodeFoldingThreshold + 1); }; if (!MergeCondStoresAggressively && @@ -2966,6 +3001,29 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, PStore->getAAMetadata(AAMD, /*Merge=*/false); PStore->getAAMetadata(AAMD, /*Merge=*/true); SI->setAAMetadata(AAMD); + unsigned PAlignment = PStore->getAlignment(); + unsigned QAlignment = QStore->getAlignment(); + unsigned TypeAlignment = + DL.getABITypeAlignment(SI->getValueOperand()->getType()); + unsigned MinAlignment; + unsigned MaxAlignment; + std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment); + // Choose the minimum alignment. If we could prove both stores execute, we + // could use biggest one. In this case, though, we only know that one of the + // stores executes. And we don't know it's safe to take the alignment from a + // store that doesn't execute. + if (MinAlignment != 0) { + // Choose the minimum of all non-zero alignments. + SI->setAlignment(MinAlignment); + } else if (MaxAlignment != 0) { + // Choose the minimal alignment between the non-zero alignment and the ABI + // default alignment for the type of the stored value. + SI->setAlignment(std::min(MaxAlignment, TypeAlignment)); + } else { + // If both alignments are zero, use ABI default alignment for the type of + // the stored value. + SI->setAlignment(TypeAlignment); + } QStore->eraseFromParent(); PStore->eraseFromParent(); @@ -2973,7 +3031,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, return true; } -static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, + const DataLayout &DL) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may @@ -3001,7 +3060,6 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { // We model triangles as a type of diamond with a nullptr "true" block. // Triangles are canonicalized so that the fallthrough edge is represented by // a true condition, as in the diagram above. - // BasicBlock *PTB = PBI->getSuccessor(0); BasicBlock *PFB = PBI->getSuccessor(1); BasicBlock *QTB = QBI->getSuccessor(0); @@ -3076,7 +3134,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond); + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); return Changed; } @@ -3141,7 +3199,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) return true; // If this is a conditional branch in an empty block, and if any @@ -3270,9 +3328,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // Halve the weights if any of them cannot fit in an uint32_t FitWeights(NewWeights); - PBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BI->getContext()) - .createBranchWeights(NewWeights[0], NewWeights[1])); + setBranchWeights(PBI, NewWeights[0], NewWeights[1]); } // OtherDest may have phi nodes. If so, add an entry from PBI's @@ -3310,9 +3366,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, FitWeights(NewWeights); - NV->setMetadata(LLVMContext::MD_prof, - MDBuilder(BI->getContext()) - .createBranchWeights(NewWeights[0], NewWeights[1])); + setBranchWeights(NV, NewWeights[0], NewWeights[1]); } } } @@ -3367,9 +3421,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); if (TrueWeight != FalseWeight) - NewBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(OldTerm->getContext()) - .createBranchWeights(TrueWeight, FalseWeight)); + setBranchWeights(NewBI, TrueWeight, FalseWeight); } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this @@ -3464,10 +3516,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. -static bool TryToSimplifyUncondBranchWithICmpInIt( +static bool tryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, - const TargetTransformInfo &TTI, unsigned BonusInstThreshold, - AssumptionCache *AC) { + const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -3502,7 +3553,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -3518,7 +3569,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -3556,9 +3607,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( Weights.push_back(Weights[0]); SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - SI->setMetadata( - LLVMContext::MD_prof, - MDBuilder(SI->getContext()).createBranchWeights(MDWeights)); + setBranchWeights(SI, MDWeights); } } SI->addCase(Cst, NewBB); @@ -4285,10 +4334,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { TrueWeight /= 2; FalseWeight /= 2; } - NewBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getContext()) - .createBranchWeights((uint32_t)TrueWeight, - (uint32_t)FalseWeight)); + setBranchWeights(NewBI, TrueWeight, FalseWeight); } } @@ -4316,7 +4362,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, +static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); @@ -4385,9 +4431,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, } if (HasWeight && Weights.size() >= 2) { SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getParent()->getContext()) - .createBranchWeights(MDWeights)); + setBranchWeights(SI, MDWeights); } return !DeadCases.empty(); @@ -4429,38 +4473,59 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, /// Try to forward the condition of a switch instruction to a phi node /// dominated by the switch, if that would mean that some of the destination -/// blocks of the switch can be folded away. -/// Returns true if a change is made. +/// blocks of the switch can be folded away. Return true if a change is made. static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { - typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap; - ForwardingNodesMap ForwardingNodes; + using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>; - for (auto Case : SI->cases()) { + ForwardingNodesMap ForwardingNodes; + BasicBlock *SwitchBlock = SI->getParent(); + bool Changed = false; + for (auto &Case : SI->cases()) { ConstantInt *CaseValue = Case.getCaseValue(); BasicBlock *CaseDest = Case.getCaseSuccessor(); - int PhiIndex; - PHINode *PHI = - FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex); - if (!PHI) - continue; + // Replace phi operands in successor blocks that are using the constant case + // value rather than the switch condition variable: + // switchbb: + // switch i32 %x, label %default [ + // i32 17, label %succ + // ... + // succ: + // %r = phi i32 ... [ 17, %switchbb ] ... + // --> + // %r = phi i32 ... [ %x, %switchbb ] ... + + for (Instruction &InstInCaseDest : *CaseDest) { + auto *Phi = dyn_cast<PHINode>(&InstInCaseDest); + if (!Phi) break; + + // This only works if there is exactly 1 incoming edge from the switch to + // a phi. If there is >1, that means multiple cases of the switch map to 1 + // value in the phi, and that phi value is not the switch condition. Thus, + // this transform would not make sense (the phi would be invalid because + // a phi can't have different incoming values from the same block). + int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock); + if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue && + count(Phi->blocks(), SwitchBlock) == 1) { + Phi->setIncomingValue(SwitchBBIdx, SI->getCondition()); + Changed = true; + } + } - ForwardingNodes[PHI].push_back(PhiIndex); + // Collect phi nodes that are indirectly using this switch's case constants. + int PhiIdx; + if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx)) + ForwardingNodes[Phi].push_back(PhiIdx); } - bool Changed = false; - - for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), - E = ForwardingNodes.end(); - I != E; ++I) { - PHINode *Phi = I->first; - SmallVectorImpl<int> &Indexes = I->second; - + for (auto &ForwardingNode : ForwardingNodes) { + PHINode *Phi = ForwardingNode.first; + SmallVectorImpl<int> &Indexes = ForwardingNode.second; if (Indexes.size() < 2) continue; - for (size_t I = 0, E = Indexes.size(); I != E; ++I) - Phi->setIncomingValue(Indexes[I], SI->getCondition()); + for (int Index : Indexes) + Phi->setIncomingValue(Index, SI->getCondition()); Changed = true; } @@ -4743,8 +4808,8 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. -static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - AssumptionCache *AC, const DataLayout &DL, +static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; @@ -4816,18 +4881,18 @@ private: } Kind; // For SingleValueKind, this is the single value. - Constant *SingleValue; + Constant *SingleValue = nullptr; // For BitMapKind, this is the bitmap. - ConstantInt *BitMap; - IntegerType *BitMapElementTy; + ConstantInt *BitMap = nullptr; + IntegerType *BitMapElementTy = nullptr; // For LinearMapKind, these are the constants used to derive the value. - ConstantInt *LinearOffset; - ConstantInt *LinearMultiplier; + ConstantInt *LinearOffset = nullptr; + ConstantInt *LinearMultiplier = nullptr; // For ArrayKind, this is the array. - GlobalVariable *Array; + GlobalVariable *Array = nullptr; }; } // end anonymous namespace @@ -4835,9 +4900,7 @@ private: SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) - : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), - LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); @@ -5083,7 +5146,6 @@ static void reuseTableCompare( User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) { - ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser); if (!CmpInst) return; @@ -5112,7 +5174,7 @@ static void reuseTableCompare( for (auto ValuePair : Values) { Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), ValuePair.second, CmpOp1, true); - if (!CaseConst || CaseConst == DefaultConst) + if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst)) return; assert((CaseConst == TrueConst || CaseConst == FalseConst) && "Expect true or false as compare result."); @@ -5151,8 +5213,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - // Only build lookup table when we have a target that supports it. - if (!TTI.shouldBuildLookupTables()) + Function *Fn = SI->getParent()->getParent(); + // Only build lookup table when we have a target that supports it or the + // attribute is not set. + if (!TTI.shouldBuildLookupTables() || + (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")) return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could @@ -5163,8 +5228,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // string and lookup indices into that. // Ignore switches with less than three cases. Lookup tables will not make - // them - // faster, so we don't analyze them. + // them faster, so we don't analyze them. if (SI->getNumCases() < 3) return false; @@ -5176,8 +5240,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, ConstantInt *MaxCaseVal = CI->getCaseValue(); BasicBlock *CommonDest = nullptr; - typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy; + + using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>; SmallDenseMap<PHINode *, ResultListTy> ResultLists; + SmallDenseMap<PHINode *, Constant *> DefaultResults; SmallDenseMap<PHINode *, Type *> ResultTypes; SmallVector<PHINode *, 4> PHIs; @@ -5190,7 +5256,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. - typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy; + using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, Results, DL, TTI)) @@ -5248,8 +5314,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Compute the table index value. Builder.SetInsertPoint(SI); - Value *TableIndex = - Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); + Value *TableIndex; + if (MinCaseVal->isNullValue()) + TableIndex = SI->getCondition(); + else + TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); // Compute the maximum table size representable by the integer type we are // switching upon. @@ -5282,15 +5352,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Builder.SetInsertPoint(LookupBB); if (NeedMask) { - // Before doing the lookup we do the hole check. - // The LookupBB is therefore re-purposed to do the hole check - // and we create a new LookupBB. + // Before doing the lookup, we do the hole check. The LookupBB is therefore + // re-purposed to do the hole check, and we create a new LookupBB. BasicBlock *MaskBB = LookupBB; MaskBB->setName("switch.hole_check"); LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); - // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid + // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid // unnecessary illegal types. uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); APInt MaskInt(TableSizePowOf2, 0); @@ -5320,7 +5389,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } if (!DefaultIsReachable || GeneratingCoveredLookupTable) { - // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later, + // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, // do not delete PHINodes here. SI->getDefaultDest()->removePredecessor(SI->getParent(), /*DontDeleteUselessPHIs=*/true); @@ -5333,7 +5402,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; - StringRef FuncName = SI->getParent()->getParent()->getName(); + StringRef FuncName = Fn->getName(); SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, FuncName); @@ -5391,14 +5460,14 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) { return NumCases * 100 >= Range * MinDensity; } -// Try and transform a switch that has "holes" in it to a contiguous sequence -// of cases. -// -// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be -// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. -// -// This converts a sparse switch into a dense switch which allows better -// lowering and could also allow transforming into a lookup table. +/// Try to transform a switch that has "holes" in it to a contiguous sequence +/// of cases. +/// +/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be +/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. +/// +/// This converts a sparse switch into a dense switch which allows better +/// lowering and could also allow transforming into a lookup table. static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { @@ -5427,7 +5496,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // First, transform the values such that they start at zero and ascend. int64_t Base = Values[0]; for (auto &V : Values) - V -= Base; + V -= (uint64_t)(Base); // Now we have signed numbers that have been shifted so that, given enough // precision, there are no negative values. Since the rest of the transform @@ -5492,12 +5561,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -5507,33 +5576,34 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, AC, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (eliminateDeadSwitchCases(SI, Options.AC, DL)) + return simplifyCFG(BB, TTI, Options) | true; - if (SwitchToSelect(SI, Builder, AC, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (switchToSelect(SI, Builder, DL, TTI)) + return simplifyCFG(BB, TTI, Options) | true; - if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) + return simplifyCFG(BB, TTI, Options) | true; - // The conversion from switch to lookup tables results in difficult - // to analyze code and makes pruning branches much harder. - // This is a problem of the switch expression itself can still be - // restricted as a result of inlining or CVP. There only apply this - // transformation during late steps of the optimisation chain. - if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + // The conversion from switch to lookup tables results in difficult-to-analyze + // code and makes pruning branches much harder. This is a problem if the + // switch expression itself can still be restricted as a result of inlining or + // CVP. Therefore, only apply this transformation during late stages of the + // optimisation pipeline. + if (Options.ConvertSwitchToLookupTable && + SwitchToLookupTable(SI, Builder, DL, TTI)) + return simplifyCFG(BB, TTI, Options) | true; if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; return false; } @@ -5571,7 +5641,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } return Changed; } @@ -5613,8 +5683,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); if (!LPad2 || !LPad2->isIdenticalTo(LPad)) continue; - for (++I; isa<DbgInfoIntrinsic>(I); ++I) { - } + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; BranchInst *BI2 = dyn_cast<BranchInst>(I); if (!BI2 || !BI2->isIdenticalTo(BI)) continue; @@ -5658,39 +5728,38 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, BasicBlock *BB = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); - if (SinkCommon && SinkThenElseCodeToEnd(BI)) + if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block or its successor is a loop - // header (This is for early invocations before loop simplify and + // If LoopHeader is provided, check if the block or its successor is a loop + // header. (This is for early invocations before loop simplify and // vectorization to keep canonical loop forms for nested loops. These blocks // can be eliminated when the pass is invoked later in the back-end.) bool NeedCanonicalLoop = - !LateSimplifyCFG && + Options.NeedCanonicalLoop && (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - // If the only instruction in the block is a seteq/setne comparison - // against a constant, try to simplify the block. + // If the only instruction in the block is a seteq/setne comparison against a + // constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, - BonusInstThreshold, AC)) + tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options)) return true; } // See if we can merge an empty landing pad block with another which is // equivalent. if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { - for (++I; isa<DbgInfoIntrinsic>(I); ++I) { - } + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) return true; } @@ -5699,8 +5768,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold)) + return simplifyCFG(BB, TTI, Options) | true; return false; } @@ -5725,7 +5794,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -5735,14 +5804,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } else if (&*I == cast<Instruction>(BI->getCondition())) { ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } } @@ -5758,9 +5827,9 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PBI && PBI->isConditional() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB); - bool CondIsFalse = PBI->getSuccessor(1) == BB; + bool CondIsTrue = PBI->getSuccessor(0) == BB; Optional<bool> Implication = isImpliedCondition( - PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); + PBI->getCondition(), BI->getCondition(), DL, CondIsTrue); if (Implication) { // Turn this into a branch on constant. auto *OldCond = BI->getCondition(); @@ -5769,7 +5838,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { : ConstantInt::getFalse(BB->getContext()); BI->setCondition(CI); RecursivelyDeleteTriviallyDeadInstructions(OldCond); - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } } } @@ -5777,8 +5846,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold)) + return simplifyCFG(BB, TTI, Options) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -5787,7 +5856,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistThenElseCodeToIf(BI, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -5795,7 +5864,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -5804,30 +5873,30 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; } // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (FoldCondBranchOnPHI(BI, DL, Options.AC)) + return simplifyCFG(BB, TTI, Options) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return simplifyCFG(BB, TTI, Options) | true; // Look for diamond patterns. if (MergeCondStores) if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (mergeConditionalStores(PBI, BI, DL)) + return simplifyCFG(BB, TTI, Options) | true; return false; } @@ -5936,7 +6005,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. - // if (MergeBlockIntoPredecessor(BB)) return true; @@ -5944,12 +6012,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // If there is a trivial two-entry PHI node in this basic block, and we can // eliminate it, do so now. - if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) + if (auto *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) Changed |= FoldTwoEntryPHINode(PN, TTI, DL); Builder.SetInsertPoint(BB->getTerminator()); - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) { if (BI->isUnconditional()) { if (SimplifyUncondBranch(BI, Builder)) return true; @@ -5957,25 +6025,22 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (SimplifyCondBranch(BI, Builder)) return true; } - } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + } else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { if (SimplifyReturn(RI, Builder)) return true; - } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + } else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { if (SimplifyResume(RI, Builder)) return true; - } else if (CleanupReturnInst *RI = - dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + } else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { if (SimplifyCleanupReturn(RI)) return true; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + } else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { if (SimplifySwitch(SI, Builder)) return true; - } else if (UnreachableInst *UI = - dyn_cast<UnreachableInst>(BB->getTerminator())) { + } else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) { if (SimplifyUnreachable(UI)) return true; - } else if (IndirectBrInst *IBI = - dyn_cast<IndirectBrInst>(BB->getTerminator())) { + } else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) { if (SimplifyIndirectBr(IBI)) return true; } @@ -5983,16 +6048,10 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { return Changed; } -/// This function is used to do simplification of a CFG. -/// For example, it adjusts branches to branches to eliminate the extra hop, -/// eliminates unreachable basic blocks, and does other "peephole" optimization -/// of the CFG. It returns true if a modification was made. -/// -bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders, - bool LateSimplifyCFG) { - return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG) +bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + const SimplifyCFGOptions &Options, + SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { + return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders, + Options) .run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 6d90e6b48358..ad1faea0a7ae 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -18,13 +18,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -35,10 +33,14 @@ using namespace llvm; STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumFoldedUser, "Number of IV users folded into a constant"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); STATISTIC( NumSimplifiedSDiv, "Number of IV signed division operations converted to unsigned division"); +STATISTIC( + NumSimplifiedSRem, + "Number of IV signed remainder operations converted to unsigned remainder"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); namespace { @@ -51,15 +53,17 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; - + SCEVExpander &Rewriter; SmallVectorImpl<WeakTrackingVH> &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) - : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { + LoopInfo *LI, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead), + Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -73,12 +77,17 @@ namespace { Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool eliminateOverflowIntrinsic(CallInst *CI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); - void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned); + void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned); + void replaceRemWithNumerator(BinaryOperator *Rem); + void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); + void replaceSRemWithURem(BinaryOperator *Rem); bool eliminateSDiv(BinaryOperator *SDiv); bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); @@ -151,6 +160,74 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) return IVSrc; } +bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, + Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands (in the specific context of the + // current loop) + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); + const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); + + ICmpInst::Predicate InvariantPredicate; + const SCEV *InvariantLHS, *InvariantRHS; + + auto *PN = dyn_cast<PHINode>(IVOperand); + if (!PN) + return false; + if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate, + InvariantLHS, InvariantRHS)) + return false; + + // Rewrite the comparison to a loop invariant comparison if it can be done + // cheaply, where cheaply means "we don't need to emit any new + // instructions". + + SmallDenseMap<const SCEV*, Value*> CheapExpansions; + CheapExpansions[S] = ICmp->getOperand(IVOperIdx); + CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx); + + // TODO: Support multiple entry loops? (We currently bail out of these in + // the IndVarSimplify pass) + if (auto *BB = L->getLoopPredecessor()) { + const int Idx = PN->getBasicBlockIndex(BB); + if (Idx >= 0) { + Value *Incoming = PN->getIncomingValue(Idx); + const SCEV *IncomingS = SE->getSCEV(Incoming); + CheapExpansions[IncomingS] = Incoming; + } + } + Value *NewLHS = CheapExpansions[InvariantLHS]; + Value *NewRHS = CheapExpansions[InvariantRHS]; + + if (!NewLHS) + if (auto *ConstLHS = dyn_cast<SCEVConstant>(InvariantLHS)) + NewLHS = ConstLHS->getValue(); + if (!NewRHS) + if (auto *ConstRHS = dyn_cast<SCEVConstant>(InvariantRHS)) + NewRHS = ConstRHS->getValue(); + + if (!NewLHS || !NewRHS) + // We could not find an existing value to replace either LHS or RHS. + // Generating new instructions has subtler tradeoffs, so avoid doing that + // for now. + return false; + + DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); + ICmp->setPredicate(InvariantPredicate); + ICmp->setOperand(0, NewLHS); + ICmp->setOperand(1, NewRHS); + return true; +} + /// SimplifyIVUsers helper for eliminating useless /// comparisons against an induction variable. void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { @@ -164,17 +241,11 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { Pred = ICmpInst::getSwappedPredicate(Pred); } - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); - const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); - - // Simplify unnecessary loops away. + // Get the SCEVs for the ICmp operands (in the specific context of the + // current loop) const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - ICmpInst::Predicate InvariantPredicate; - const SCEV *InvariantLHS, *InvariantRHS; + const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); + const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); // If the condition is always true or always false, replace it with // a constant value. @@ -186,85 +257,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); DeadInsts.emplace_back(ICmp); DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - } else if (isa<PHINode>(IVOperand) && - SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate, - InvariantLHS, InvariantRHS)) { - - // Rewrite the comparison to a loop invariant comparison if it can be done - // cheaply, where cheaply means "we don't need to emit any new - // instructions". - - Value *NewLHS = nullptr, *NewRHS = nullptr; - - if (S == InvariantLHS || X == InvariantLHS) - NewLHS = - ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx)); - - if (S == InvariantRHS || X == InvariantRHS) - NewRHS = - ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx)); - - auto *PN = cast<PHINode>(IVOperand); - for (unsigned i = 0, e = PN->getNumIncomingValues(); - i != e && (!NewLHS || !NewRHS); - ++i) { - - // If this is a value incoming from the backedge, then it cannot be a loop - // invariant value (since we know that IVOperand is an induction variable). - if (L->contains(PN->getIncomingBlock(i))) - continue; - - // NB! This following assert does not fundamentally have to be true, but - // it is true today given how SCEV analyzes induction variables. - // Specifically, today SCEV will *not* recognize %iv as an induction - // variable in the following case: - // - // define void @f(i32 %k) { - // entry: - // br i1 undef, label %r, label %l - // - // l: - // %k.inc.l = add i32 %k, 1 - // br label %loop - // - // r: - // %k.inc.r = add i32 %k, 1 - // br label %loop - // - // loop: - // %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ] - // %iv.inc = add i32 %iv, 1 - // br label %loop - // } - // - // but if it starts to, at some point, then the assertion below will have - // to be changed to a runtime check. - - Value *Incoming = PN->getIncomingValue(i); - -#ifndef NDEBUG - if (auto *I = dyn_cast<Instruction>(Incoming)) - assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!"); -#endif - - const SCEV *IncomingS = SE->getSCEV(Incoming); - - if (!NewLHS && IncomingS == InvariantLHS) - NewLHS = Incoming; - if (!NewRHS && IncomingS == InvariantRHS) - NewRHS = Incoming; - } - - if (!NewLHS || !NewRHS) - // We could not find an existing value to replace either LHS or RHS. - // Generating new instructions has subtler tradeoffs, so avoid doing that - // for now. - return; - - DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); - ICmp->setPredicate(InvariantPredicate); - ICmp->setOperand(0, NewLHS); - ICmp->setOperand(1, NewRHS); + } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { + // fallthrough to end of function } else if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { // If we were unable to make anything above, all we can is to canonicalize @@ -309,54 +303,90 @@ bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { return false; } -/// SimplifyIVUsers helper for eliminating useless -/// remainder operations operating on an induction variable. -void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, - Value *IVOperand, - bool IsSigned) { +// i %s n -> i %u n if i >= 0 and n >= 0 +void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { + auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); + auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, + Rem->getName() + ".urem", Rem); + Rem->replaceAllUsesWith(URem); + DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); + ++NumSimplifiedSRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +// i % n --> i if i is in [0,n). +void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { + Rem->replaceAllUsesWith(Rem->getOperand(0)); + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). +void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { + auto *T = Rem->getType(); + auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D); + SelectInst *Sel = + SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); + Rem->replaceAllUsesWith(Sel); + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +/// SimplifyIVUsers helper for eliminating useless remainder operations +/// operating on an induction variable or replacing srem by urem. +void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned) { + auto *NValue = Rem->getOperand(0); + auto *DValue = Rem->getOperand(1); // We're only interested in the case where we know something about - // the numerator. - if (IVOperand != Rem->getOperand(0)) + // the numerator, unless it is a srem, because we want to replace srem by urem + // in general. + bool UsedAsNumerator = IVOperand == NValue; + if (!UsedAsNumerator && !IsSigned) return; - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(Rem->getOperand(0)); - const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + const SCEV *N = SE->getSCEV(NValue); // Simplify unnecessary loops away. const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // i % n --> i if i is in [0,n). - if ((!IsSigned || SE->isKnownNonNegative(S)) && - SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - S, X)) - Rem->replaceAllUsesWith(Rem->getOperand(0)); - else { - // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType())); - if (IsSigned && !SE->isKnownNonNegative(LessOne)) - return; + N = SE->getSCEVAtScope(N, ICmpLoop); + + bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N); + + // Do not proceed if the Numerator may be negative + if (!IsNumeratorNonNegative) + return; - if (!SE->isKnownPredicate(IsSigned ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - LessOne, X)) + const SCEV *D = SE->getSCEV(DValue); + D = SE->getSCEVAtScope(D, ICmpLoop); + + if (UsedAsNumerator) { + auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + if (SE->isKnownPredicate(LT, N, D)) { + replaceRemWithNumerator(Rem); return; + } - ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, - Rem->getOperand(0), Rem->getOperand(1)); - SelectInst *Sel = - SelectInst::Create(ICmp, - ConstantInt::get(Rem->getType(), 0), - Rem->getOperand(0), "tmp", Rem); - Rem->replaceAllUsesWith(Sel); + auto *T = Rem->getType(); + const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T)); + if (SE->isKnownPredicate(LT, NLessOne, D)) { + replaceRemWithNumeratorOrZero(Rem); + return; + } } - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - ++NumElimRem; - Changed = true; - DeadInsts.emplace_back(Rem); + // Try to replace SRem with URem, if both N and D are known non-negative. + // Since we had already check N, we only need to check D now + if (!IsSigned || !SE->isKnownNonNegative(D)) + return; + + replaceSRemWithURem(Rem); } bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { @@ -474,7 +504,7 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) { bool IsSRem = Bin->getOpcode() == Instruction::SRem; if (IsSRem || Bin->getOpcode() == Instruction::URem) { - eliminateIVRemainder(Bin, IVOperand, IsSRem); + simplifyIVRemainder(Bin, IVOperand, IsSRem); return true; } @@ -492,6 +522,40 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return false; } +static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { + if (auto *BB = L->getLoopPreheader()) + return BB->getTerminator(); + + return Hint; +} + +/// Replace the UseInst with a constant if possible. +bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + if (!SE->isLoopInvariant(S, L)) + return false; + + // Do not generate something ridiculous even if S is loop invariant. + if (Rewriter.isHighCostExpansion(S, L, I)) + return false; + + auto *IP = GetLoopInvariantInsertPosition(L, I); + auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); + + I->replaceAllUsesWith(Invariant); + DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I + << " with loop invariant: " << *S << '\n'); + ++NumFoldedUser; + Changed = true; + DeadInsts.emplace_back(I); + return true; +} + /// Eliminate any operation that SCEV can prove is an identity function. bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand) { @@ -627,7 +691,7 @@ bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( - Instruction *Def, + Instruction *Def, Loop *L, SmallPtrSet<Instruction*,16> &Simplified, SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { @@ -638,8 +702,19 @@ static void pushIVUsers( // Also ensure unique worklist users. // If Def is a LoopPhi, it may not be in the Simplified set, so check for // self edges first. - if (UI != Def && Simplified.insert(UI).second) - SimpleIVUsers.push_back(std::make_pair(UI, Def)); + if (UI == Def) + continue; + + // Only change the current Loop, do not change the other parts (e.g. other + // Loops). + if (!L->contains(UI)) + continue; + + // Do not push the same instruction more than once. + if (!Simplified.insert(UI).second) + continue; + + SimpleIVUsers.push_back(std::make_pair(UI, Def)); } } @@ -689,7 +764,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { // Push users of the current LoopPhi. In rare cases, pushIVUsers may be // called multiple times for the same LoopPhi. This is the proper thing to // do for loop header phis that use each other. - pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); while (!SimpleIVUsers.empty()) { std::pair<Instruction*, Instruction*> UseOper = @@ -699,6 +774,11 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { // Bypass back edges to avoid extra work. if (UseInst == CurrIV) continue; + // Try to replace UseInst with a loop invariant before any other + // simplifications. + if (replaceIVUserWithLoopInvariant(UseInst)) + continue; + Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { assert(N <= Simplified.size() && "runaway iteration"); @@ -712,7 +792,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { continue; if (eliminateIVUser(UseOper.first, IVOperand)) { - pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); continue; } @@ -722,7 +802,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. - pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); } } @@ -732,7 +812,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { continue; } if (isSimpleIVUser(UseOper.first, L, SE)) { - pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers); } } } @@ -745,8 +825,9 @@ void IVVisitor::anchor() { } /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead, - IVVisitor *V) { - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); + SCEVExpander &Rewriter, IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter, + Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } @@ -755,9 +836,13 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) { + SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead, Rewriter); } return Changed; } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 2ea15f65cef9..f3d4f2ef38d7 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -20,7 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 77c0a41929ac..03a1d55ddc30 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -18,10 +18,11 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -484,10 +485,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); if (LenTrue && LenFalse) { - Function *Caller = CI->getParent()->getParent(); - emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller, - SI->getDebugLoc(), - "folded strlen(select) to select of constants"); + ORE.emit([&]() { + return OptimizationRemark("instcombine", "simplify-libcalls", CI) + << "folded strlen(select) to select of constants"; + }); return B.CreateSelect(SI->getCondition(), ConstantInt::get(CI->getType(), LenTrue - 1), ConstantInt::get(CI->getType(), LenFalse - 1)); @@ -509,6 +510,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { Module &M = *CI->getParent()->getParent()->getParent(); unsigned WCharSize = TLI->getWCharSize(M) * 8; + // We cannot perform this optimization without wchar_size metadata. + if (WCharSize == 0) + return nullptr; return optimizeStringLength(CI, B, WCharSize); } @@ -753,29 +757,44 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { } // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + // TODO: The case where both inputs are constants does not need to be limited + // to legal integers or equality comparison. See block below this. if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { - IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); - if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment && - getKnownAlignment(RHS, DL, CI) >= PrefAlignment) { - - Type *LHSPtrTy = - IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); - Type *RHSPtrTy = - IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - - Value *LHSV = - B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); - Value *RHSV = - B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + // First, see if we can fold either argument to a constant. + Value *LHSV = nullptr; + if (auto *LHSC = dyn_cast<Constant>(LHS)) { + LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo()); + LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL); + } + Value *RHSV = nullptr; + if (auto *RHSC = dyn_cast<Constant>(RHS)) { + RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo()); + RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL); + } + // Don't generate unaligned loads. If either source is constant data, + // alignment doesn't matter for that source because there is no load. + if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) && + (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) { + if (!LHSV) { + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); + } + if (!RHSV) { + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); + } return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } } - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const). + // TODO: This is limited to i8 arrays. StringRef LHSStr, RHSStr; if (getConstantStringInfo(LHS, LHSStr) && getConstantStringInfo(RHS, RHSStr)) { @@ -1014,6 +1033,35 @@ static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { return B.CreateFPExt(V, B.getDoubleTy()); } +// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z))) +Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) { + if (!CI->isFast()) + return nullptr; + + // Propagate fast-math flags from the existing call to new instructions. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Value *Real, *Imag; + if (CI->getNumArgOperands() == 1) { + Value *Op = CI->getArgOperand(0); + assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!"); + Real = B.CreateExtractValue(Op, 0, "real"); + Imag = B.CreateExtractValue(Op, 1, "imag"); + } else { + assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!"); + Real = CI->getArgOperand(0); + Imag = CI->getArgOperand(1); + } + + Value *RealReal = B.CreateFMul(Real, Real); + Value *ImagImag = B.CreateFMul(Imag, Imag); + + Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt, + CI->getType()); + return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"); +} + Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1055,6 +1103,51 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { return InnerChain[Exp]; } +/// Use square root in place of pow(x, +/-0.5). +Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { + // TODO: There is some subset of 'fast' under which these transforms should + // be allowed. + if (!Pow->isFast()) + return nullptr; + + const APFloat *Arg1C; + if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C))) + return nullptr; + if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5)) + return nullptr; + + // Fast-math flags from the pow() are propagated to all replacement ops. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(Pow->getFastMathFlags()); + Type *Ty = Pow->getType(); + Value *Sqrt; + if (Pow->hasFnAttr(Attribute::ReadNone)) { + // We know that errno is never set, so replace with an intrinsic: + // pow(x, 0.5) --> llvm.sqrt(x) + // llvm.pow(x, 0.5) --> llvm.sqrt(x) + auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty); + Sqrt = B.CreateCall(F, Pow->getArgOperand(0)); + } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, + LibFunc_sqrtl)) { + // Errno could be set, so we must use a sqrt libcall. + // TODO: We also should check that the target can in fact lower the sqrt + // libcall. We currently have no way to ask this question, so we ask + // whether the target has a sqrt libcall which is not exactly the same. + Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0), + TLI->getName(LibFunc_sqrt), B, + Pow->getCalledFunction()->getAttributes()); + } else { + // We can't replace with an intrinsic or a libcall. + return nullptr; + } + + // If this is pow(x, -0.5), get the reciprocal. + if (Arg1C->isExactlyValue(-0.5)) + Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt); + + return Sqrt; +} + Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1092,7 +1185,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). auto *OpC = dyn_cast<CallInst>(Op1); - if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { + if (OpC && OpC->isFast() && CI->isFast()) { LibFunc Func; Function *OpCCallee = OpC->getCalledFunction(); if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && @@ -1105,6 +1198,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { } } + if (Value *Sqrt = replacePowWithSqrt(CI, B)) + return Sqrt; + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); if (!Op2C) return Ret; @@ -1112,42 +1208,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); - if (Op2C->isExactlyValue(-0.5) && - hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) { - // If -ffast-math: - // pow(x, -0.5) -> 1.0 / sqrt(x) - if (CI->hasUnsafeAlgebra()) { - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - - // TODO: If the pow call is an intrinsic, we should lower to the sqrt - // intrinsic, so we match errno semantics. We also should check that the - // target can in fact lower the sqrt intrinsic -- we currently have no way - // to ask this question other than asking whether the target has a sqrt - // libcall, which is a sufficient but not necessary condition. - Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B, - Callee->getAttributes()); - - return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip"); - } - } - + // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). if (Op2C->isExactlyValue(0.5) && hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { - - // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (CI->hasUnsafeAlgebra()) { - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - - // TODO: As above, we should lower to the sqrt intrinsic if the pow is an - // intrinsic, to match errno semantics. - return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B, - Callee->getAttributes()); - } - // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero // and negative infinity correctly. @@ -1169,15 +1233,21 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return Sel; } - if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + // Propagate fast-math-flags from the call to any created instructions. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + // pow(x, 1.0) --> x + if (Op2C->isExactlyValue(1.0)) return Op1; - if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + // pow(x, 2.0) --> x * x + if (Op2C->isExactlyValue(2.0)) return B.CreateFMul(Op1, Op1, "pow2"); - if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + // pow(x, -1.0) --> 1.0 / x + if (Op2C->isExactlyValue(-1.0)) return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (CI->hasUnsafeAlgebra()) { + if (CI->isFast()) { APFloat V = abs(Op2C->getValueAPF()); // We limit to a max of 7 fmul(s). Thus max exponent is 32. // This transformation applies to integer exponents only. @@ -1185,10 +1255,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { !V.isInteger()) return nullptr; - // Propagate fast math flags. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; InnerChain[1] = Op1; @@ -1196,8 +1262,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // We cannot readily convert a non-double type (like float) to a double. // So we first convert V to something which could be converted to double. - bool ignored; - V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored); + bool Ignored; + V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); Value *FMul = getPow(InnerChain, V.convertToDouble(), B); // For negative exponents simply compute the reciprocal. @@ -1265,9 +1331,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; - if (CI->hasUnsafeAlgebra()) { - // Unsafe algebra sets all fast-math-flags to true. - FMF.setUnsafeAlgebra(); + if (CI->isFast()) { + // If the call is 'fast', then anything we create here will also be 'fast'. + FMF.setFast(); } else { // At a minimum, no-nans-fp-math must be true. if (!CI->hasNoNaNs()) @@ -1298,13 +1364,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { if (UnsafeFPShrink && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->hasUnsafeAlgebra()) + if (!CI->isFast()) return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); - // The earlier call must also be unsafe in order to do these transforms. - if (!OpC || !OpC->hasUnsafeAlgebra()) + // The earlier call must also be 'fast' in order to do these transforms. + if (!OpC || !OpC->isFast()) return Ret; // log(pow(x,y)) -> y*log(x) @@ -1314,7 +1380,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; - FMF.setUnsafeAlgebra(); + FMF.setFast(); B.setFastMathFlags(FMF); LibFunc Func; @@ -1346,11 +1412,11 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->hasUnsafeAlgebra()) + if (!CI->isFast()) return Ret; Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); - if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) return Ret; // We're looking for a repeated factor in a multiplication tree, @@ -1372,8 +1438,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Value *OtherMul0, *OtherMul1; if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { // Pattern: sqrt((x * y) * z) - if (OtherMul0 == OtherMul1 && - cast<Instruction>(Op0)->hasUnsafeAlgebra()) { + if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) { // Matched: sqrt((x * x) * z) RepeatOp = OtherMul0; OtherOp = Op1; @@ -1418,8 +1483,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { if (!OpC) return Ret; - // Both calls must allow unsafe optimizations in order to remove them. - if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) + // Both calls must be 'fast' in order to remove them. + if (!CI->isFast() || !OpC->isFast()) return Ret; // tan(atan(x)) -> x @@ -2043,13 +2108,107 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return nullptr; } +Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, + LibFunc Func, + IRBuilder<> &Builder) { + // Don't optimize calls that require strict floating point semantics. + if (CI->isStrictFP()) + return nullptr; + + switch (Func) { + case LibFunc_cosf: + case LibFunc_cos: + case LibFunc_cosl: + return optimizeCos(CI, Builder); + case LibFunc_sinpif: + case LibFunc_sinpi: + case LibFunc_cospif: + case LibFunc_cospi: + return optimizeSinCosPi(CI, Builder); + case LibFunc_powf: + case LibFunc_pow: + case LibFunc_powl: + return optimizePow(CI, Builder); + case LibFunc_exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + return optimizeExp2(CI, Builder); + case LibFunc_fabsf: + case LibFunc_fabs: + case LibFunc_fabsl: + return replaceUnaryCall(CI, Builder, Intrinsic::fabs); + case LibFunc_sqrtf: + case LibFunc_sqrt: + case LibFunc_sqrtl: + return optimizeSqrt(CI, Builder); + case LibFunc_log: + case LibFunc_log10: + case LibFunc_log1p: + case LibFunc_log2: + case LibFunc_logb: + return optimizeLog(CI, Builder); + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + return optimizeTan(CI, Builder); + case LibFunc_ceil: + return replaceUnaryCall(CI, Builder, Intrinsic::ceil); + case LibFunc_floor: + return replaceUnaryCall(CI, Builder, Intrinsic::floor); + case LibFunc_round: + return replaceUnaryCall(CI, Builder, Intrinsic::round); + case LibFunc_nearbyint: + return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); + case LibFunc_rint: + return replaceUnaryCall(CI, Builder, Intrinsic::rint); + case LibFunc_trunc: + return replaceUnaryCall(CI, Builder, Intrinsic::trunc); + case LibFunc_acos: + case LibFunc_acosh: + case LibFunc_asin: + case LibFunc_asinh: + case LibFunc_atan: + case LibFunc_atanh: + case LibFunc_cbrt: + case LibFunc_cosh: + case LibFunc_exp: + case LibFunc_exp10: + case LibFunc_expm1: + case LibFunc_sin: + case LibFunc_sinh: + case LibFunc_tanh: + if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName())) + return optimizeUnaryDoubleFP(CI, Builder, true); + return nullptr; + case LibFunc_copysign: + if (hasFloatVersion(CI->getCalledFunction()->getName())) + return optimizeBinaryDoubleFP(CI, Builder); + return nullptr; + case LibFunc_fminf: + case LibFunc_fmin: + case LibFunc_fminl: + case LibFunc_fmaxf: + case LibFunc_fmax: + case LibFunc_fmaxl: + return optimizeFMinFMax(CI, Builder); + case LibFunc_cabs: + case LibFunc_cabsf: + case LibFunc_cabsl: + return optimizeCAbs(CI, Builder); + default: + return nullptr; + } +} + Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + // TODO: Split out the code below that operates on FP calls so that + // we can all non-FP calls with the StrictFP attribute to be + // optimized. if (CI->isNoBuiltin()) return nullptr; LibFunc Func; Function *Callee = CI->getCalledFunction(); - StringRef FuncName = Callee->getName(); SmallVector<OperandBundleDef, 2> OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -2057,15 +2216,19 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { bool isCallingConvC = isCallingConvCCompatible(CI); // Command-line parameter overrides instruction attribute. + // This can't be moved to optimizeFloatingPointLibCall() because it may be + // used by the intrinsic optimizations. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) UnsafeFPShrink = EnableUnsafeFPShrink; - else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra()) + else if (isa<FPMathOperator>(CI) && CI->isFast()) UnsafeFPShrink = true; // First, check for intrinsics. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { if (!isCallingConvC) return nullptr; + // The FP intrinsics have corresponding constrained versions so we don't + // need to check for the StrictFP attribute here. switch (II->getIntrinsicID()) { case Intrinsic::pow: return optimizePow(CI, Builder); @@ -2106,32 +2269,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return nullptr; if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) return V; + if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder)) + return V; switch (Func) { - case LibFunc_cosf: - case LibFunc_cos: - case LibFunc_cosl: - return optimizeCos(CI, Builder); - case LibFunc_sinpif: - case LibFunc_sinpi: - case LibFunc_cospif: - case LibFunc_cospi: - return optimizeSinCosPi(CI, Builder); - case LibFunc_powf: - case LibFunc_pow: - case LibFunc_powl: - return optimizePow(CI, Builder); - case LibFunc_exp2l: - case LibFunc_exp2: - case LibFunc_exp2f: - return optimizeExp2(CI, Builder); - case LibFunc_fabsf: - case LibFunc_fabs: - case LibFunc_fabsl: - return replaceUnaryCall(CI, Builder, Intrinsic::fabs); - case LibFunc_sqrtf: - case LibFunc_sqrt: - case LibFunc_sqrtl: - return optimizeSqrt(CI, Builder); case LibFunc_ffs: case LibFunc_ffsl: case LibFunc_ffsll: @@ -2160,18 +2300,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeFWrite(CI, Builder); case LibFunc_fputs: return optimizeFPuts(CI, Builder); - case LibFunc_log: - case LibFunc_log10: - case LibFunc_log1p: - case LibFunc_log2: - case LibFunc_logb: - return optimizeLog(CI, Builder); case LibFunc_puts: return optimizePuts(CI, Builder); - case LibFunc_tan: - case LibFunc_tanf: - case LibFunc_tanl: - return optimizeTan(CI, Builder); case LibFunc_perror: return optimizeErrorReporting(CI, Builder); case LibFunc_vfprintf: @@ -2179,46 +2309,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeErrorReporting(CI, Builder, 0); case LibFunc_fputc: return optimizeErrorReporting(CI, Builder, 1); - case LibFunc_ceil: - return replaceUnaryCall(CI, Builder, Intrinsic::ceil); - case LibFunc_floor: - return replaceUnaryCall(CI, Builder, Intrinsic::floor); - case LibFunc_round: - return replaceUnaryCall(CI, Builder, Intrinsic::round); - case LibFunc_nearbyint: - return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); - case LibFunc_rint: - return replaceUnaryCall(CI, Builder, Intrinsic::rint); - case LibFunc_trunc: - return replaceUnaryCall(CI, Builder, Intrinsic::trunc); - case LibFunc_acos: - case LibFunc_acosh: - case LibFunc_asin: - case LibFunc_asinh: - case LibFunc_atan: - case LibFunc_atanh: - case LibFunc_cbrt: - case LibFunc_cosh: - case LibFunc_exp: - case LibFunc_exp10: - case LibFunc_expm1: - case LibFunc_sin: - case LibFunc_sinh: - case LibFunc_tanh: - if (UnsafeFPShrink && hasFloatVersion(FuncName)) - return optimizeUnaryDoubleFP(CI, Builder, true); - return nullptr; - case LibFunc_copysign: - if (hasFloatVersion(FuncName)) - return optimizeBinaryDoubleFP(CI, Builder); - return nullptr; - case LibFunc_fminf: - case LibFunc_fmin: - case LibFunc_fminl: - case LibFunc_fmaxf: - case LibFunc_fmax: - case LibFunc_fmaxl: - return optimizeFMinFMax(CI, Builder); default: return nullptr; } @@ -2228,9 +2318,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { LibCallSimplifier::LibCallSimplifier( const DataLayout &DL, const TargetLibraryInfo *TLI, + OptimizationRemarkEmitter &ORE, function_ref<void(Instruction *, Value *)> Replacer) - : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), - Replacer(Replacer) {} + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), + UnsafeFPShrink(false), Replacer(Replacer) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // Indirect through the replacer used in this instance. diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp index e9a368f4faa4..968eb0208f43 100644 --- a/lib/Transforms/Utils/SplitModule.cpp +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -13,32 +13,51 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "split-module" - #include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <memory> #include <queue> +#include <utility> +#include <vector> using namespace llvm; +#define DEBUG_TYPE "split-module" + namespace { -typedef EquivalenceClasses<const GlobalValue *> ClusterMapType; -typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType; -typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType; -} + +using ClusterMapType = EquivalenceClasses<const GlobalValue *>; +using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>; +using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>; + +} // end anonymous namespace static void addNonConstUser(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const User *U) { @@ -125,9 +144,9 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); }; - std::for_each(M->begin(), M->end(), recordGVSet); - std::for_each(M->global_begin(), M->global_end(), recordGVSet); - std::for_each(M->alias_begin(), M->alias_end(), recordGVSet); + llvm::for_each(M->functions(), recordGVSet); + llvm::for_each(M->globals(), recordGVSet); + llvm::for_each(M->aliases(), recordGVSet); // Assigned all GVs to merged clusters while balancing number of objects in // each. @@ -147,7 +166,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, for (unsigned i = 0; i < N; ++i) BalancinQueue.push(std::make_pair(i, 0)); - typedef std::pair<unsigned, ClusterMapType::iterator> SortType; + using SortType = std::pair<unsigned, ClusterMapType::iterator>; + SmallVector<SortType, 64> Sets; SmallPtrSet<const GlobalValue *, 32> Visited; diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 20107553665f..3640541e63cc 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -1,4 +1,4 @@ -//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===// +//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -57,25 +57,41 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "symbol-rewriter" #include "llvm/Transforms/Utils/SymbolRewriter.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" -#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <string> +#include <vector> using namespace llvm; using namespace SymbolRewriter; +#define DEBUG_TYPE "symbol-rewriter" + static cl::list<std::string> RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), - cl::value_desc("filename")); + cl::value_desc("filename"), + cl::Hidden); static void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, @@ -92,8 +108,9 @@ static void rewriteComdat(Module &M, GlobalObject *GO, } namespace { + template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(llvm::Module::*Get)(StringRef) const> + ValueType *(Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { public: const std::string Source; @@ -110,8 +127,10 @@ public: } }; +} // end anonymous namespace + template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(llvm::Module::*Get)(StringRef) const> + ValueType *(Module::*Get)(StringRef) const> bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { bool Changed = false; if (ValueType *S = (M.*Get)(Source)) { @@ -128,10 +147,12 @@ bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { return Changed; } +namespace { + template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(llvm::Module::*Get)(StringRef) const, + ValueType *(Module::*Get)(StringRef) const, iterator_range<typename iplist<ValueType>::iterator> - (llvm::Module::*Iterator)()> + (Module::*Iterator)()> class PatternRewriteDescriptor : public RewriteDescriptor { public: const std::string Pattern; @@ -147,10 +168,12 @@ public: } }; +} // end anonymous namespace + template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(llvm::Module::*Get)(StringRef) const, + ValueType *(Module::*Get)(StringRef) const, iterator_range<typename iplist<ValueType>::iterator> - (llvm::Module::*Iterator)()> + (Module::*Iterator)()> bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>:: performOnModule(Module &M) { bool Changed = false; @@ -178,55 +201,52 @@ performOnModule(Module &M) { return Changed; } +namespace { + /// Represents a rewrite for an explicitly named (function) symbol. Both the /// source function name and target function name of the transformation are /// explicitly spelt out. -typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, - llvm::Function, &llvm::Module::getFunction> - ExplicitRewriteFunctionDescriptor; +using ExplicitRewriteFunctionDescriptor = + ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function, + &Module::getFunction>; /// Represents a rewrite for an explicitly named (global variable) symbol. Both /// the source variable name and target variable name are spelt out. This /// applies only to module level variables. -typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, - llvm::GlobalVariable, - &llvm::Module::getGlobalVariable> - ExplicitRewriteGlobalVariableDescriptor; +using ExplicitRewriteGlobalVariableDescriptor = + ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, + GlobalVariable, &Module::getGlobalVariable>; /// Represents a rewrite for an explicitly named global alias. Both the source /// and target name are explicitly spelt out. -typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, - llvm::GlobalAlias, - &llvm::Module::getNamedAlias> - ExplicitRewriteNamedAliasDescriptor; +using ExplicitRewriteNamedAliasDescriptor = + ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, + &Module::getNamedAlias>; /// Represents a rewrite for a regular expression based pattern for functions. /// A pattern for the function name is provided and a transformation for that /// pattern to determine the target function name create the rewrite rule. -typedef PatternRewriteDescriptor<RewriteDescriptor::Type::Function, - llvm::Function, &llvm::Module::getFunction, - &llvm::Module::functions> - PatternRewriteFunctionDescriptor; +using PatternRewriteFunctionDescriptor = + PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function, + &Module::getFunction, &Module::functions>; /// Represents a rewrite for a global variable based upon a matching pattern. /// Each global variable matching the provided pattern will be transformed as /// described in the transformation pattern for the target. Applies only to /// module level variables. -typedef PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, - llvm::GlobalVariable, - &llvm::Module::getGlobalVariable, - &llvm::Module::globals> - PatternRewriteGlobalVariableDescriptor; +using PatternRewriteGlobalVariableDescriptor = + PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, + GlobalVariable, &Module::getGlobalVariable, + &Module::globals>; /// PatternRewriteNamedAliasDescriptor - represents a rewrite for global /// aliases which match a given pattern. The provided transformation will be /// applied to each of the matching names. -typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, - llvm::GlobalAlias, - &llvm::Module::getNamedAlias, - &llvm::Module::aliases> - PatternRewriteNamedAliasDescriptor; -} // namespace +using PatternRewriteNamedAliasDescriptor = + PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, + &Module::getNamedAlias, &Module::aliases>; + +} // end anonymous namespace bool RewriteMapParser::parse(const std::string &MapFile, RewriteDescriptorList *DL) { @@ -497,6 +517,7 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } namespace { + class RewriteSymbolsLegacyPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid @@ -510,9 +531,11 @@ private: RewriteSymbolPass Impl; }; +} // end anonymous namespace + char RewriteSymbolsLegacyPass::ID = 0; -RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() { +RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -523,9 +546,7 @@ RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( bool RewriteSymbolsLegacyPass::runOnModule(Module &M) { return Impl.runImpl(M); } -} -namespace llvm { PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) { if (!runImpl(M)) return PreservedAnalyses::all(); @@ -550,7 +571,6 @@ void RewriteSymbolPass::loadAndParseMapFiles() { for (const auto &MapFile : MapFiles) Parser.parse(MapFile, &Descriptors); } -} INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols", false, false) diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 9385f825523c..ed444e4cf43c 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 930972924c3c..8c9ecbc3503e 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,17 +13,36 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <limits> +#include <memory> +#include <utility> + using namespace llvm; // Out of line method to get vtable etc for class. @@ -85,7 +104,6 @@ struct MappingContext { : VM(&VM), Materializer(Materializer) {} }; -class MDNodeMapper; class Mapper { friend class MDNodeMapper; @@ -175,7 +193,7 @@ class MDNodeMapper { /// Data about a node in \a UniquedGraph. struct Data { bool HasChanged = false; - unsigned ID = ~0u; + unsigned ID = std::numeric_limits<unsigned>::max(); TempMDNode Placeholder; }; @@ -316,7 +334,7 @@ private: void remapOperands(MDNode &N, OperandMapper mapOperand); }; -} // end namespace +} // end anonymous namespace Value *Mapper::mapValue(const Value *V) { ValueToValueMapTy::iterator I = getVM().find(V); @@ -579,6 +597,7 @@ void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { } namespace { + /// An entry in the worklist for the post-order traversal. struct POTWorklistEntry { MDNode *N; ///< Current node. @@ -590,7 +609,8 @@ struct POTWorklistEntry { POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {} }; -} // end namespace + +} // end anonymous namespace bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) { assert(G.Info.empty() && "Expected a fresh traversal"); @@ -653,7 +673,7 @@ void MDNodeMapper::UniquedGraph::propagateChanges() { if (D.HasChanged) continue; - if (none_of(N->operands(), [&](const Metadata *Op) { + if (llvm::none_of(N->operands(), [&](const Metadata *Op) { auto Where = Info.find(Op); return Where != Info.end() && Where->second.HasChanged; })) @@ -752,10 +772,11 @@ struct MapMetadataDisabler { MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { VM.disableMapMetadata(); } + ~MapMetadataDisabler() { VM.enableMapMetadata(); } }; -} // end namespace +} // end anonymous namespace Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { // If the value already exists in the map, use it. @@ -1037,11 +1058,13 @@ public: explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) { assert(!M.hasWorkToDo() && "Expected to be flushed"); } + ~FlushingMapper() { M.flush(); } + Mapper *operator->() const { return &M; } }; -} // end namespace +} // end anonymous namespace ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, |