summaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Utils
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp9
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp39
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp56
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp228
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp87
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt3
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp328
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp2
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp1
-rw-r--r--lib/Transforms/Utils/CmpInstAnalysis.cpp108
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp259
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp1
-rw-r--r--lib/Transforms/Utils/EntryExitInstrumenter.cpp163
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp32
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp30
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp45
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp51
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp105
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp7
-rw-r--r--lib/Transforms/Utils/Local.cpp258
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp2
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp137
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp28
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp129
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp371
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp114
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp87
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp22
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp35
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp2
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp12
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp125
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp39
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp479
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp375
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp401
-rw-r--r--lib/Transforms/Utils/SplitModule.cpp46
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp104
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp1
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp39
41 files changed, 2875 insertions, 1487 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index df9d5da9e26e..364878dc588d 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -36,9 +36,11 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
static const size_t kMinAlignment = 16;
+// We want to add a full redzone after every variable.
// The larger the variable Size the larger is the redzone.
// The resulting frame size is a multiple of Alignment.
-static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
+static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
+ size_t Alignment) {
size_t Res = 0;
if (Size <= 4) Res = 16;
else if (Size <= 16) Res = 32;
@@ -46,7 +48,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
else if (Size <= 512) Res = Size + 64;
else if (Size <= 4096) Res = Size + 128;
else Res = Size + 256;
- return alignTo(Res, Alignment);
+ return alignTo(std::max(Res, 2 * Granularity), Alignment);
}
ASanStackFrameLayout
@@ -80,7 +82,8 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
assert(Size > 0);
size_t NextAlignment = IsLast ? Granularity
: std::max(Granularity, Vars[i + 1].Alignment);
- size_t SizeWithRedzone = VarAndRedzoneSize(Size, NextAlignment);
+ size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity,
+ NextAlignment);
Vars[i].Offset = Offset;
Offset += SizeWithRedzone;
}
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 4c9746b8c691..0f0668f24db5 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -50,31 +50,45 @@
//
// For more details about DWARF discriminators, please visit
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "add-discriminators"
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool> NoDiscriminators(
+ "no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
+
namespace {
+
// The legacy pass of AddDiscriminators.
struct AddDiscriminatorsLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
+
AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -85,18 +99,12 @@ struct AddDiscriminatorsLegacyPass : public FunctionPass {
} // end anonymous namespace
char AddDiscriminatorsLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
"Add DWARF path discriminators", false, false)
INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
"Add DWARF path discriminators", false, false)
-// Command line option to disable discriminator generation even in the
-// presence of debug information. This is only needed when debugging
-// debug info generation issues.
-static cl::opt<bool> NoDiscriminators(
- "no-discriminators", cl::init(false),
- cl::desc("Disable generation of discriminator information."));
-
// Create the legacy AddDiscriminatorsPass.
FunctionPass *llvm::createAddDiscriminatorsPass() {
return new AddDiscriminatorsLegacyPass();
@@ -166,11 +174,11 @@ static bool addDiscriminators(Function &F) {
bool Changed = false;
- typedef std::pair<StringRef, unsigned> Location;
- typedef DenseSet<const BasicBlock *> BBSet;
- typedef DenseMap<Location, BBSet> LocationBBMap;
- typedef DenseMap<Location, unsigned> LocationDiscriminatorMap;
- typedef DenseSet<Location> LocationSet;
+ using Location = std::pair<StringRef, unsigned>;
+ using BBSet = DenseSet<const BasicBlock *>;
+ using LocationBBMap = DenseMap<Location, BBSet>;
+ using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
+ using LocationSet = DenseSet<Location>;
LocationBBMap LBM;
LocationDiscriminatorMap LDM;
@@ -242,6 +250,7 @@ static bool addDiscriminators(Function &F) {
bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
return addDiscriminators(F);
}
+
PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!addDiscriminators(F))
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 3d5cbfc93f2e..606bd8baccaa 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1,4 +1,4 @@
-//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -13,22 +13,36 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
void llvm::DeleteDeadBlock(BasicBlock *BB) {
@@ -130,8 +144,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
}
// Begin by getting rid of unneeded PHIs.
- if (isa<PHINode>(BB->front()))
+ SmallVector<Value *, 4> IncomingValues;
+ if (isa<PHINode>(BB->front())) {
+ for (auto &I : *BB)
+ if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+ if (PN->getIncomingValue(0) != PN)
+ IncomingValues.push_back(PN->getIncomingValue(0));
+ } else
+ break;
FoldSingleEntryPHINodes(BB, MemDep);
+ }
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -143,6 +165,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Move all definitions in the successor to the predecessor...
PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ // Eliminate duplicate dbg.values describing the entry PHI node post-splice.
+ for (auto *Incoming : IncomingValues) {
+ if (isa<Instruction>(Incoming)) {
+ SmallVector<DbgValueInst *, 2> DbgValues;
+ SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>
+ DbgValueSet;
+ llvm::findDbgValues(DbgValues, Incoming);
+ for (auto &DVI : DbgValues) {
+ auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()});
+ if (!R.second)
+ DVI->eraseFromParent();
+ }
+ }
+ }
+
// Inherit predecessors name if it exists.
if (!PredBB->hasName())
PredBB->takeName(BB);
@@ -454,7 +491,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// node becomes an incoming value for BB's phi node. However, if the Preds
// list is empty, we need to insert dummy entries into the PHI nodes in BB to
// account for the newly created predecessor.
- if (Preds.size() == 0) {
+ if (Preds.empty()) {
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -675,7 +712,6 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
}
-
Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
BasicBlock *&IfFalse) {
PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 175cbd2ce0df..3653c307619b 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -16,9 +16,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
@@ -28,6 +30,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
#define DEBUG_TYPE "break-crit-edges"
@@ -198,59 +202,23 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (!DT && !LI)
return NewBB;
- // Now update analysis information. Since the only predecessor of NewBB is
- // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
- // anything, as there are other successors of DestBB. However, if all other
- // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
- // loop header) then NewBB dominates DestBB.
- SmallVector<BasicBlock*, 8> OtherPreds;
-
- // If there is a PHI in the block, loop over predecessors with it, which is
- // faster than iterating pred_begin/end.
- if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingBlock(i) != NewBB)
- OtherPreds.push_back(PN->getIncomingBlock(i));
- } else {
- for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
- I != E; ++I) {
- BasicBlock *P = *I;
- if (P != NewBB)
- OtherPreds.push_back(P);
- }
- }
-
- bool NewBBDominatesDestBB = true;
-
- // Should we update DominatorTree information?
if (DT) {
- DomTreeNode *TINode = DT->getNode(TIBB);
-
- // The new block is not the immediate dominator for any other nodes, but
- // TINode is the immediate dominator for the new node.
+ // Update the DominatorTree.
+ // ---> NewBB -----\
+ // / V
+ // TIBB -------\\------> DestBB
//
- if (TINode) { // Don't break unreachable code!
- DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
- DomTreeNode *DestBBNode = nullptr;
-
- // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
- if (!OtherPreds.empty()) {
- DestBBNode = DT->getNode(DestBB);
- while (!OtherPreds.empty() && NewBBDominatesDestBB) {
- if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
- NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
- OtherPreds.pop_back();
- }
- OtherPreds.clear();
- }
-
- // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
- // doesn't dominate anything.
- if (NewBBDominatesDestBB) {
- if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
- DT->changeImmediateDominator(DestBBNode, NewBBNode);
- }
- }
+ // First, inform the DT about the new path from TIBB to DestBB via NewBB,
+ // then delete the old edge from TIBB to DestBB. By doing this in that order
+ // DestBB stays reachable in the DT the whole time and its subtree doesn't
+ // get disconnected.
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
+ Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
+ if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
+ Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
+
+ DT->applyUpdates(Updates);
}
// Update LoopInfo if it is around.
@@ -326,3 +294,159 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
return NewBB;
}
+
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+ // If the block doesn't have any PHIs, we don't care about it, since there's
+ // no point in splitting it.
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return nullptr;
+
+ // Verify we have exactly one IBR predecessor.
+ // Conservatively bail out if one of the other predecessors is not a "regular"
+ // terminator (that is, not a switch or a br).
+ BasicBlock *IBB = nullptr;
+ for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
+ BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ switch (PredTerm->getOpcode()) {
+ case Instruction::IndirectBr:
+ if (IBB)
+ return nullptr;
+ IBB = PredBB;
+ break;
+ case Instruction::Br:
+ case Instruction::Switch:
+ OtherPreds.push_back(PredBB);
+ continue;
+ default:
+ return nullptr;
+ }
+ }
+
+ return IBB;
+}
+
+bool llvm::SplitIndirectBrCriticalEdges(Function &F,
+ BranchProbabilityInfo *BPI,
+ BlockFrequencyInfo *BFI) {
+ // Check whether the function has any indirectbrs, and collect which blocks
+ // they may jump to. Since most functions don't have indirect branches,
+ // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+ SmallSetVector<BasicBlock *, 16> Targets;
+ for (auto &BB : F) {
+ auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+ if (!IBI)
+ continue;
+
+ for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+ Targets.insert(IBI->getSuccessor(Succ));
+ }
+
+ if (Targets.empty())
+ return false;
+
+ bool ShouldUpdateAnalysis = BPI && BFI;
+ bool Changed = false;
+ for (BasicBlock *Target : Targets) {
+ SmallVector<BasicBlock *, 16> OtherPreds;
+ BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+ // If we did not found an indirectbr, or the indirectbr is the only
+ // incoming edge, this isn't the kind of edge we're looking for.
+ if (!IBRPred || OtherPreds.empty())
+ continue;
+
+ // Don't even think about ehpads/landingpads.
+ Instruction *FirstNonPHI = Target->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+ continue;
+
+ BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ if (ShouldUpdateAnalysis) {
+ // Copy the BFI/BPI from Target to BodyBlock.
+ for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
+ I < E; ++I)
+ BPI->setEdgeProbability(BodyBlock, I,
+ BPI->getEdgeProbability(Target, I));
+ BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
+ }
+ // It's possible Target was its own successor through an indirectbr.
+ // In this case, the indirectbr now comes from BodyBlock.
+ if (IBRPred == Target)
+ IBRPred = BodyBlock;
+
+ // At this point Target only has PHIs, and BodyBlock has the rest of the
+ // block's body. Create a copy of Target that will be used by the "direct"
+ // preds.
+ ValueToValueMapTy VMap;
+ BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+ BlockFrequency BlockFreqForDirectSucc;
+ for (BasicBlock *Pred : OtherPreds) {
+ // If the target is a loop to itself, then the terminator of the split
+ // block (BodyBlock) needs to be updated.
+ BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
+ Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ if (ShouldUpdateAnalysis)
+ BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
+ BPI->getEdgeProbability(Src, DirectSucc);
+ }
+ if (ShouldUpdateAnalysis) {
+ BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
+ BlockFrequency NewBlockFreqForTarget =
+ BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
+ BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
+ BPI->eraseBlock(Target);
+ }
+
+ // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+ // they are clones, so the number of PHIs are the same.
+ // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+ // (b) Leave that as the only edge in the "Indirect" PHI.
+ // (c) Merge the two in the body block.
+ BasicBlock::iterator Indirect = Target->begin(),
+ End = Target->getFirstNonPHI()->getIterator();
+ BasicBlock::iterator Direct = DirectSucc->begin();
+ BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+ assert(&*End == Target->getTerminator() &&
+ "Block was expected to only contain PHIs");
+
+ while (Indirect != End) {
+ PHINode *DirPHI = cast<PHINode>(Direct);
+ PHINode *IndPHI = cast<PHINode>(Indirect);
+
+ // Now, clean up - the direct block shouldn't get the indirect value,
+ // and vice versa.
+ DirPHI->removeIncomingValue(IBRPred);
+ Direct++;
+
+ // Advance the pointer here, to avoid invalidation issues when the old
+ // PHI is erased.
+ Indirect++;
+
+ PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+ NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+ IBRPred);
+
+ // Create a PHI in the body block, to merge the direct and indirect
+ // predecessors.
+ PHINode *MergePHI =
+ PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ MergePHI->addIncoming(NewIndPHI, Target);
+ MergePHI->addIncoming(DirPHI, DirectSucc);
+
+ IndPHI->replaceAllUsesWith(MergePHI);
+ IndPHI->eraseFromParent();
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 83ec7f55d1af..f711b192f604 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -1,4 +1,4 @@
-//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,27 +17,32 @@
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "bypass-slow-division"
namespace {
- struct DivOpInfo {
- bool SignedOp;
- Value *Dividend;
- Value *Divisor;
-
- DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
- : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
- };
struct QuotRemPair {
Value *Quotient;
@@ -55,38 +60,11 @@ namespace {
Value *Quotient = nullptr;
Value *Remainder = nullptr;
};
-}
-
-namespace llvm {
- template<>
- struct DenseMapInfo<DivOpInfo> {
- static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
- return Val1.SignedOp == Val2.SignedOp &&
- Val1.Dividend == Val2.Dividend &&
- Val1.Divisor == Val2.Divisor;
- }
-
- static DivOpInfo getEmptyKey() {
- return DivOpInfo(false, nullptr, nullptr);
- }
- static DivOpInfo getTombstoneKey() {
- return DivOpInfo(true, nullptr, nullptr);
- }
+using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
+using BypassWidthsTy = DenseMap<unsigned, unsigned>;
+using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
- static unsigned getHashValue(const DivOpInfo &Val) {
- return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
- reinterpret_cast<uintptr_t>(Val.Divisor)) ^
- (unsigned)Val.SignedOp;
- }
- };
-
- typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
- typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
- typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
-}
-
-namespace {
enum ValueRange {
/// Operand definitely fits into BypassType. No runtime checks are needed.
VALRNG_KNOWN_SHORT,
@@ -116,17 +94,21 @@ class FastDivInsertionTask {
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
SlowDivOrRem->getOpcode() == Instruction::SRem;
}
+
bool isDivisionOp() {
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
SlowDivOrRem->getOpcode() == Instruction::UDiv;
}
+
Type *getSlowType() { return SlowDivOrRem->getType(); }
public:
FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+
Value *getReplacement(DivCacheTy &Cache);
};
-} // anonymous namespace
+
+} // end anonymous namespace
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
const BypassWidthsTy &BypassWidths) {
@@ -175,7 +157,7 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
// Then, look for a value in Cache.
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
- DivOpInfo Key(isSignedOp(), Dividend, Divisor);
+ DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
auto CacheI = Cache.find(Key);
if (CacheI == Cache.end()) {
@@ -225,7 +207,7 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
}
- case Instruction::PHI: {
+ case Instruction::PHI:
// Stop IR traversal in case of a crazy input code. This limits recursion
// depth.
if (Visited.size() >= 16)
@@ -241,7 +223,6 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
isa<UndefValue>(V);
});
- }
default:
return false;
}
@@ -371,11 +352,6 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
Value *Dividend = SlowDivOrRem->getOperand(0);
Value *Divisor = SlowDivOrRem->getOperand(1);
- if (isa<ConstantInt>(Divisor)) {
- // Keep division by a constant for DAGCombiner.
- return None;
- }
-
VisitedSetTy SetL;
ValueRange DividendRange = getValueRange(Dividend, SetL);
if (DividendRange == VALRNG_LIKELY_LONG)
@@ -391,7 +367,9 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
if (DividendShort && DivisorShort) {
// If both operands are known to be short then just replace the long
- // division with a short one in-place.
+ // division with a short one in-place. Since we're not introducing control
+ // flow in this case, narrowing the division is always a win, even if the
+ // divisor is a constant (and will later get replaced by a multiplication).
IRBuilder<> Builder(SlowDivOrRem);
Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
@@ -401,7 +379,16 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
return QuotRemPair(ExtDiv, ExtRem);
- } else if (DividendShort && !isSignedOp()) {
+ }
+
+ if (isa<ConstantInt>(Divisor)) {
+ // If the divisor is not a constant, DAGCombiner will convert it to a
+ // multiplication by a magic constant. It isn't clear if it is worth
+ // introducing control flow to get a narrower multiply.
+ return None;
+ }
+
+ if (DividendShort && !isSignedOp()) {
// If the division is unsigned and Dividend is known to be short, then
// either
// 1) Divisor is less or equal to Dividend, and the result can be computed
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 83bc05d0311c..972e47f9270a 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -5,12 +5,13 @@ add_llvm_library(LLVMTransformUtils
BreakCriticalEdges.cpp
BuildLibCalls.cpp
BypassSlowDivision.cpp
+ CallPromotionUtils.cpp
CloneFunction.cpp
CloneModule.cpp
- CmpInstAnalysis.cpp
CodeExtractor.cpp
CtorUtils.cpp
DemoteRegToStack.cpp
+ EntryExitInstrumenter.cpp
EscapeEnumerator.cpp
Evaluator.cpp
FlattenCFG.cpp
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
new file mode 100644
index 000000000000..eb3139ce4293
--- /dev/null
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -0,0 +1,328 @@
+//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities useful for promoting indirect call sites to
+// direct call sites.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "call-promotion-utils"
+
+/// Fix-up phi nodes in an invoke instruction's normal destination.
+///
+/// After versioning an invoke instruction, values coming from the original
+/// block will now either be coming from the original block or the "else" block.
+static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
+ BasicBlock *ElseBlock,
+ Instruction *NewInst) {
+ for (auto &I : *Invoke->getNormalDest()) {
+ auto *Phi = dyn_cast<PHINode>(&I);
+ if (!Phi)
+ break;
+ int Idx = Phi->getBasicBlockIndex(OrigBlock);
+ if (Idx == -1)
+ continue;
+ Value *V = Phi->getIncomingValue(Idx);
+ if (dyn_cast<Instruction>(V) == Invoke) {
+ Phi->setIncomingBlock(Idx, ElseBlock);
+ Phi->addIncoming(NewInst, OrigBlock);
+ continue;
+ }
+ Phi->addIncoming(V, ElseBlock);
+ }
+}
+
+/// Fix-up phi nodes in an invoke instruction's unwind destination.
+///
+/// After versioning an invoke instruction, values coming from the original
+/// block will now be coming from either the "then" block or the "else" block.
+static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
+ BasicBlock *ThenBlock,
+ BasicBlock *ElseBlock) {
+ for (auto &I : *Invoke->getUnwindDest()) {
+ auto *Phi = dyn_cast<PHINode>(&I);
+ if (!Phi)
+ break;
+ int Idx = Phi->getBasicBlockIndex(OrigBlock);
+ if (Idx == -1)
+ continue;
+ auto *V = Phi->getIncomingValue(Idx);
+ Phi->setIncomingBlock(Idx, ThenBlock);
+ Phi->addIncoming(V, ElseBlock);
+ }
+}
+
+/// Get the phi node having the returned value of a call or invoke instruction
+/// as it's operand.
+static bool getRetPhiNode(Instruction *Inst, BasicBlock *Block) {
+ BasicBlock *FromBlock = Inst->getParent();
+ for (auto &I : *Block) {
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+ int Idx = PHI->getBasicBlockIndex(FromBlock);
+ if (Idx == -1)
+ continue;
+ auto *V = PHI->getIncomingValue(Idx);
+ if (V == Inst)
+ return true;
+ }
+ return false;
+}
+
+/// Create a phi node for the returned value of a call or invoke instruction.
+///
+/// After versioning a call or invoke instruction that returns a value, we have
+/// to merge the value of the original and new instructions. We do this by
+/// creating a phi node and replacing uses of the original instruction with this
+/// phi node.
+static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst) {
+
+ if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
+ return;
+
+ BasicBlock *RetValBB = NewInst->getParent();
+ if (auto *Invoke = dyn_cast<InvokeInst>(NewInst))
+ RetValBB = Invoke->getNormalDest();
+ BasicBlock *PhiBB = RetValBB->getSingleSuccessor();
+
+ if (getRetPhiNode(OrigInst, PhiBB))
+ return;
+
+ IRBuilder<> Builder(&PhiBB->front());
+ PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
+ SmallVector<User *, 16> UsersToUpdate;
+ for (User *U : OrigInst->users())
+ UsersToUpdate.push_back(U);
+ for (User *U : UsersToUpdate)
+ U->replaceUsesOfWith(OrigInst, Phi);
+ Phi->addIncoming(OrigInst, OrigInst->getParent());
+ Phi->addIncoming(NewInst, RetValBB);
+}
+
+/// Cast a call or invoke instruction to the given type.
+///
+/// When promoting a call site, the return type of the call site might not match
+/// that of the callee. If this is the case, we have to cast the returned value
+/// to the correct type. The location of the cast depends on if we have a call
+/// or invoke instruction.
+Instruction *createRetBitCast(CallSite CS, Type *RetTy) {
+
+ // Save the users of the calling instruction. These uses will be changed to
+ // use the bitcast after we create it.
+ SmallVector<User *, 16> UsersToUpdate;
+ for (User *U : CS.getInstruction()->users())
+ UsersToUpdate.push_back(U);
+
+ // Determine an appropriate location to create the bitcast for the return
+ // value. The location depends on if we have a call or invoke instruction.
+ Instruction *InsertBefore = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction()))
+ InsertBefore = &*Invoke->getNormalDest()->getFirstInsertionPt();
+ else
+ InsertBefore = &*std::next(CS.getInstruction()->getIterator());
+
+ // Bitcast the return value to the correct type.
+ auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
+ RetTy, "", InsertBefore);
+
+ // Replace all the original uses of the calling instruction with the bitcast.
+ for (User *U : UsersToUpdate)
+ U->replaceUsesOfWith(CS.getInstruction(), Cast);
+
+ return Cast;
+}
+
+/// Predicate and clone the given call site.
+///
+/// This function creates an if-then-else structure at the location of the call
+/// site. The "if" condition compares the call site's called value to the given
+/// callee. The original call site is moved into the "else" block, and a clone
+/// of the call site is placed in the "then" block. The cloned instruction is
+/// returned.
+static Instruction *versionCallSite(CallSite CS, Value *Callee,
+ MDNode *BranchWeights,
+ BasicBlock *&ThenBlock,
+ BasicBlock *&ElseBlock,
+ BasicBlock *&MergeBlock) {
+
+ IRBuilder<> Builder(CS.getInstruction());
+ Instruction *OrigInst = CS.getInstruction();
+
+ // Create the compare. The called value and callee must have the same type to
+ // be compared.
+ auto *LHS =
+ Builder.CreateBitCast(CS.getCalledValue(), Builder.getInt8PtrTy());
+ auto *RHS = Builder.CreateBitCast(Callee, Builder.getInt8PtrTy());
+ auto *Cond = Builder.CreateICmpEQ(LHS, RHS);
+
+ // Create an if-then-else structure. The original instruction is moved into
+ // the "else" block, and a clone of the original instruction is placed in the
+ // "then" block.
+ TerminatorInst *ThenTerm = nullptr;
+ TerminatorInst *ElseTerm = nullptr;
+ SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
+ BranchWeights);
+ ThenBlock = ThenTerm->getParent();
+ ElseBlock = ElseTerm->getParent();
+ MergeBlock = OrigInst->getParent();
+
+ ThenBlock->setName("if.true.direct_targ");
+ ElseBlock->setName("if.false.orig_indirect");
+ MergeBlock->setName("if.end.icp");
+
+ Instruction *NewInst = OrigInst->clone();
+ OrigInst->moveBefore(ElseTerm);
+ NewInst->insertBefore(ThenTerm);
+
+ // If the original call site is an invoke instruction, we have extra work to
+ // do since invoke instructions are terminating.
+ if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
+ auto *NewInvoke = cast<InvokeInst>(NewInst);
+
+ // Invoke instructions are terminating, so we don't need the terminator
+ // instructions that were just created.
+ ThenTerm->eraseFromParent();
+ ElseTerm->eraseFromParent();
+
+ // Branch from the "merge" block to the original normal destination.
+ Builder.SetInsertPoint(MergeBlock);
+ Builder.CreateBr(OrigInvoke->getNormalDest());
+
+ // Now set the normal destination of new the invoke instruction to be the
+ // "merge" block.
+ NewInvoke->setNormalDest(MergeBlock);
+ }
+
+ return NewInst;
+}
+
+bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
+ const char **FailureReason) {
+ assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+
+ // Check the return type. The callee's return value type must be bitcast
+ // compatible with the call site's type.
+ Type *CallRetTy = CS.getInstruction()->getType();
+ Type *FuncRetTy = Callee->getReturnType();
+ if (CallRetTy != FuncRetTy)
+ if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) {
+ if (FailureReason)
+ *FailureReason = "Return type mismatch";
+ return false;
+ }
+
+ // The number of formal arguments of the callee.
+ unsigned NumParams = Callee->getFunctionType()->getNumParams();
+
+ // Check the number of arguments. The callee and call site must agree on the
+ // number of arguments.
+ if (CS.arg_size() != NumParams && !Callee->isVarArg()) {
+ if (FailureReason)
+ *FailureReason = "The number of arguments mismatch";
+ return false;
+ }
+
+ // Check the argument types. The callee's formal argument types must be
+ // bitcast compatible with the corresponding actual argument types of the call
+ // site.
+ for (unsigned I = 0; I < NumParams; ++I) {
+ Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
+ Type *ActualTy = CS.getArgument(I)->getType();
+ if (FormalTy == ActualTy)
+ continue;
+ if (!CastInst::isBitCastable(ActualTy, FormalTy)) {
+ if (FailureReason)
+ *FailureReason = "Argument type mismatch";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void promoteCall(CallSite CS, Function *Callee, Instruction *&Cast) {
+ assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+
+ // Set the called function of the call site to be the given callee.
+ CS.setCalledFunction(Callee);
+
+ // Since the call site will no longer be direct, we must clear metadata that
+ // is only appropriate for indirect calls. This includes !prof and !callees
+ // metadata.
+ CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr);
+ CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr);
+
+ // If the function type of the call site matches that of the callee, no
+ // additional work is required.
+ if (CS.getFunctionType() == Callee->getFunctionType())
+ return;
+
+ // Save the return types of the call site and callee.
+ Type *CallSiteRetTy = CS.getInstruction()->getType();
+ Type *CalleeRetTy = Callee->getReturnType();
+
+ // Change the function type of the call site the match that of the callee.
+ CS.mutateFunctionType(Callee->getFunctionType());
+
+ // Inspect the arguments of the call site. If an argument's type doesn't
+ // match the corresponding formal argument's type in the callee, bitcast it
+ // to the correct type.
+ for (Use &U : CS.args()) {
+ unsigned ArgNo = CS.getArgumentNo(&U);
+ Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo);
+ Type *ActualTy = U.get()->getType();
+ if (FormalTy != ActualTy) {
+ auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "",
+ CS.getInstruction());
+ CS.setArgument(ArgNo, Cast);
+ }
+ }
+
+ // If the return type of the call site doesn't match that of the callee, cast
+ // the returned value to the appropriate type.
+ if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
+ Cast = createRetBitCast(CS, CallSiteRetTy);
+}
+
+Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
+ MDNode *BranchWeights) {
+
+ // Version the indirect call site. If the called value is equal to the given
+ // callee, 'NewInst' will be executed, otherwise the original call site will
+ // be executed.
+ BasicBlock *ThenBlock, *ElseBlock, *MergeBlock;
+ Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights, ThenBlock,
+ ElseBlock, MergeBlock);
+
+ // Promote 'NewInst' so that it directly calls the desired function.
+ Instruction *Cast = NewInst;
+ promoteCall(CallSite(NewInst), Callee, Cast);
+
+ // If the original call site is an invoke instruction, we have to fix-up phi
+ // nodes in the invoke's normal and unwind destinations.
+ if (auto *OrigInvoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ fixupPHINodeForNormalDest(OrigInvoke, MergeBlock, ElseBlock, Cast);
+ fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
+ }
+
+ // Create a phi node for the returned value of the call site.
+ createRetPHINode(CS.getInstruction(), Cast ? Cast : NewInst);
+
+ // Return the new direct call.
+ return NewInst;
+}
+
+#undef DEBUG_TYPE
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 9c4e13903ed7..3b19ba1b50f2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -747,7 +747,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
- Loop *NewLoop = new Loop();
+ Loop *NewLoop = LI->AllocateLoop();
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
else
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index e5392b53050d..8fee10854229 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-c/Core.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
deleted file mode 100644
index d9294c499309..000000000000
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file holds routines to help analyse compare instructions
-// and fold them into constants or other compare instructions
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-
-using namespace llvm;
-
-unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
- ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
- : ICI->getPredicate();
- switch (Pred) {
- // False -> 0
- case ICmpInst::ICMP_UGT: return 1; // 001
- case ICmpInst::ICMP_SGT: return 1; // 001
- case ICmpInst::ICMP_EQ: return 2; // 010
- case ICmpInst::ICMP_UGE: return 3; // 011
- case ICmpInst::ICMP_SGE: return 3; // 011
- case ICmpInst::ICMP_ULT: return 4; // 100
- case ICmpInst::ICMP_SLT: return 4; // 100
- case ICmpInst::ICMP_NE: return 5; // 101
- case ICmpInst::ICMP_ULE: return 6; // 110
- case ICmpInst::ICMP_SLE: return 6; // 110
- // True -> 7
- default:
- llvm_unreachable("Invalid ICmp predicate!");
- }
-}
-
-Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
- CmpInst::Predicate &NewICmpPred) {
- switch (Code) {
- default: llvm_unreachable("Illegal ICmp code!");
- case 0: // False.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
- case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
- case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
- case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
- case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
- case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
- case 7: // True.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
- }
- return nullptr;
-}
-
-bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
- return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
- (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
- (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
-}
-
-bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
- Value *&X, Value *&Y, Value *&Z) {
- ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
- if (!C)
- return false;
-
- switch (I->getPredicate()) {
- default:
- return false;
- case ICmpInst::ICMP_SLT:
- // X < 0 is equivalent to (X & SignMask) != 0.
- if (!C->isZero())
- return false;
- Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
- Pred = ICmpInst::ICMP_NE;
- break;
- case ICmpInst::ICMP_SGT:
- // X > -1 is equivalent to (X & SignMask) == 0.
- if (!C->isMinusOne())
- return false;
- Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
- Pred = ICmpInst::ICMP_EQ;
- break;
- case ICmpInst::ICMP_ULT:
- // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
- if (!C->getValue().isPowerOf2())
- return false;
- Y = ConstantInt::get(I->getContext(), -C->getValue());
- Pred = ICmpInst::ICMP_EQ;
- break;
- case ICmpInst::ICMP_UGT:
- // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
- if (!(C->getValue() + 1).isPowerOf2())
- return false;
- Y = ConstantInt::get(I->getContext(), ~C->getValue());
- Pred = ICmpInst::ICMP_NE;
- break;
- }
-
- X = I->getOperand(0);
- Z = ConstantInt::getNullValue(C->getType());
- return true;
-}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 1189714dfab1..7a404241cb14 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,34 +14,57 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <map>
#include <set>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "code-extractor"
@@ -55,7 +78,8 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
cl::desc("Aggregate arguments to code-extracted functions"));
/// \brief Test whether a block is valid for extraction.
-bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
+bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,
+ bool AllowVarArgs) {
// Landing pads must be in the function where they were inserted for cleanup.
if (BB.isEHPad())
return false;
@@ -87,14 +111,19 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
}
}
- // Don't hoist code containing allocas, invokes, or vastarts.
+ // Don't hoist code containing allocas or invokes. If explicitly requested,
+ // allow vastart.
for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
if (isa<AllocaInst>(I) || isa<InvokeInst>(I))
return false;
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (const Function *F = CI->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::vastart)
- return false;
+ if (F->getIntrinsicID() == Intrinsic::vastart) {
+ if (AllowVarArgs)
+ continue;
+ else
+ return false;
+ }
}
return true;
@@ -102,21 +131,21 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
/// \brief Build a set of blocks to extract if the input blocks are viable.
static SetVector<BasicBlock *>
-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+ bool AllowVarArgs) {
assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
SetVector<BasicBlock *> Result;
// Loop over the blocks, adding them to our set-vector, and aborting with an
// empty set if we encounter invalid blocks.
for (BasicBlock *BB : BBs) {
-
// If this block is dead, don't process it.
if (DT && !DT->isReachableFromEntry(BB))
continue;
if (!Result.insert(BB))
llvm_unreachable("Repeated basic blocks in extraction input");
- if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
+ if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) {
Result.clear();
return Result;
}
@@ -138,16 +167,18 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
+ BranchProbabilityInfo *BPI, bool AllowVarArgs)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}
+ BPI(BPI), AllowVarArgs(AllowVarArgs),
+ Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),
- NumExitBlocks(~0U) {}
+ BPI(BPI), AllowVarArgs(false),
+ Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
+ /* AllowVarArgs */ false)) {}
/// definedInRegion - Return true if the specified value is defined in the
/// extracted region.
@@ -202,7 +233,6 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
if (Blocks.count(&BB))
continue;
for (Instruction &II : BB) {
-
if (isa<DbgInfoIntrinsic>(II))
continue;
@@ -287,7 +317,9 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
CommonExitBlock->getFirstNonPHI()->getIterator());
- for (auto *Pred : predecessors(CommonExitBlock)) {
+ for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock);
+ PI != PE;) {
+ BasicBlock *Pred = *PI++;
if (Blocks.count(Pred))
continue;
Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
@@ -373,7 +405,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
// Follow the bitcast.
Instruction *MarkerAddr = nullptr;
for (User *U : AI->users()) {
-
if (U->stripInBoundsConstantOffsets() == AI) {
SinkLifeStart = false;
HoistLifeEnd = false;
@@ -407,7 +438,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
const ValueSet &SinkCands) const {
-
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
@@ -457,7 +487,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// containing PHI nodes merging values from outside of the region, and a
// second that contains all of the code for the block and merges back any
// incoming values from inside of the region.
- BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);
+ BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
// We only want to code extract the second block now, and it becomes the new
// header of the region.
@@ -525,7 +555,6 @@ void CodeExtractor::splitReturnBlocks() {
/// constructFunction - make a function based on inputs and outputs, as follows:
/// f(in0, ..., inN, out0, ..., outN)
-///
Function *CodeExtractor::constructFunction(const ValueSet &inputs,
const ValueSet &outputs,
BasicBlock *header,
@@ -544,7 +573,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
default: RetTy = Type::getInt16Ty(header->getContext()); break;
}
- std::vector<Type*> paramTy;
+ std::vector<Type *> paramTy;
// Add the types of the input values to the function's argument list
for (Value *value : inputs) {
@@ -575,7 +604,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
paramTy.push_back(PointerType::getUnqual(StructTy));
}
FunctionType *funcType =
- FunctionType::get(RetTy, paramTy, false);
+ FunctionType::get(RetTy, paramTy,
+ AllowVarArgs && oldFunction->isVarArg());
// Create the new function
Function *newFunction = Function::Create(funcType,
@@ -620,7 +650,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
} else
RewriteVal = &*AI++;
- std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
+ std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
for (User *use : Users)
if (Instruction *inst = dyn_cast<Instruction>(use))
if (Blocks.count(inst->getParent()))
@@ -639,7 +669,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Rewrite branches to basic blocks outside of the loop to new dummy blocks
// within the new function. This must be done before we lose track of which
// blocks were originally in the code region.
- std::vector<User*> Users(header->user_begin(), header->user_end());
+ std::vector<User *> Users(header->user_begin(), header->user_end());
for (unsigned i = 0, e = Users.size(); i != e; ++i)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
@@ -651,19 +681,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
return newFunction;
}
-/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
-/// that uses the value within the basic block, and return the predecessor
-/// block associated with that use, or return 0 if none is found.
-static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
- for (Use &U : Used->uses()) {
- PHINode *P = dyn_cast<PHINode>(U.getUser());
- if (P && P->getParent() == BB)
- return P->getIncomingBlock(U);
- }
-
- return nullptr;
-}
-
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
/// the call instruction, splitting any PHI nodes in the header block as
/// necessary.
@@ -672,7 +689,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
ValueSet &inputs, ValueSet &outputs) {
// Emit a call to the new function, passing in: *pointer to struct (if
// aggregating parameters), or plan inputs and allocated memory for outputs
- std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+ std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
Module *M = newFunction->getParent();
LLVMContext &Context = M->getContext();
@@ -702,7 +719,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructType *StructArgTy = nullptr;
AllocaInst *Struct = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
- std::vector<Type*> ArgTypes;
+ std::vector<Type *> ArgTypes;
for (ValueSet::iterator v = StructValues.begin(),
ve = StructValues.end(); v != ve; ++v)
ArgTypes.push_back((*v)->getType());
@@ -729,6 +746,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Emit the call to the function
CallInst *call = CallInst::Create(newFunction, params,
NumExitBlocks > 1 ? "targetBlock" : "");
+ // Add debug location to the new call, if the original function has debug
+ // info. In that case, the terminator of the entry block of the extracted
+ // function contains the first debug location of the extracted function,
+ // set in extractCodeRegion.
+ if (codeReplacer->getParent()->getSubprogram()) {
+ if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
+ call->setDebugLoc(DL);
+ }
codeReplacer->getInstList().push_back(call);
Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
@@ -736,7 +761,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
if (!AggregateArgs)
std::advance(OutputArgBegin, inputs.size());
- // Reload the outputs passed in by reference
+ // Reload the outputs passed in by reference.
+ Function::arg_iterator OAI = OutputArgBegin;
for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
Value *Output = nullptr;
if (AggregateArgs) {
@@ -753,12 +779,40 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
Reloads.push_back(load);
codeReplacer->getInstList().push_back(load);
- std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end());
+ std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
for (unsigned u = 0, e = Users.size(); u != e; ++u) {
Instruction *inst = cast<Instruction>(Users[u]);
if (!Blocks.count(inst->getParent()))
inst->replaceUsesOfWith(outputs[i], load);
}
+
+ // Store to argument right after the definition of output value.
+ auto *OutI = dyn_cast<Instruction>(outputs[i]);
+ if (!OutI)
+ continue;
+ // Find proper insertion point.
+ Instruction *InsertPt = OutI->getNextNode();
+ // Let's assume that there is no other guy interleave non-PHI in PHIs.
+ if (isa<PHINode>(InsertPt))
+ InsertPt = InsertPt->getParent()->getFirstNonPHI();
+
+ assert(OAI != newFunction->arg_end() &&
+ "Number of output arguments should match "
+ "the amount of defined values");
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt);
+ new StoreInst(outputs[i], GEP, InsertPt);
+ // Since there should be only one struct argument aggregating
+ // all the output values, we shouldn't increment OAI, which always
+ // points to the struct argument, in this case.
+ } else {
+ new StoreInst(outputs[i], &*OAI, InsertPt);
+ ++OAI;
+ }
}
// Now we can emit a switch statement using the call as a value.
@@ -771,7 +825,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// over all of the blocks in the extracted region, updating any terminator
// instructions in the to-be-extracted region that branch to blocks that are
// not in the region to be extracted.
- std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+ std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
unsigned switchVal = 0;
for (BasicBlock *Block : Blocks) {
@@ -801,75 +855,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
break;
}
- ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+ ReturnInst::Create(Context, brVal, NewTarget);
// Update the switch instruction.
TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
SuccNum),
OldTarget);
-
- // Restore values just before we exit
- Function::arg_iterator OAI = OutputArgBegin;
- for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
- // For an invoke, the normal destination is the only one that is
- // dominated by the result of the invocation
- BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
-
- bool DominatesDef = true;
-
- BasicBlock *NormalDest = nullptr;
- if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out]))
- NormalDest = Invoke->getNormalDest();
-
- if (NormalDest) {
- DefBlock = NormalDest;
-
- // Make sure we are looking at the original successor block, not
- // at a newly inserted exit block, which won't be in the dominator
- // info.
- for (const auto &I : ExitBlockMap)
- if (DefBlock == I.second) {
- DefBlock = I.first;
- break;
- }
-
- // In the extract block case, if the block we are extracting ends
- // with an invoke instruction, make sure that we don't emit a
- // store of the invoke value for the unwind block.
- if (!DT && DefBlock != OldTarget)
- DominatesDef = false;
- }
-
- if (DT) {
- DominatesDef = DT->dominates(DefBlock, OldTarget);
-
- // If the output value is used by a phi in the target block,
- // then we need to test for dominance of the phi's predecessor
- // instead. Unfortunately, this a little complicated since we
- // have already rewritten uses of the value to uses of the reload.
- BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],
- OldTarget);
- if (pred && DT && DT->dominates(DefBlock, pred))
- DominatesDef = true;
- }
-
- if (DominatesDef) {
- if (AggregateArgs) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
- FirstOut+out);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(),
- NTRet);
- new StoreInst(outputs[out], GEP, NTRet);
- } else {
- new StoreInst(outputs[out], &*OAI, NTRet);
- }
- }
- // Advance output iterator even if we don't emit a store
- if (!AggregateArgs) ++OAI;
- }
}
// rewrite the original branch instruction with this new target
@@ -940,8 +931,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
BasicBlock *CodeReplacer,
DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
BranchProbabilityInfo *BPI) {
- typedef BlockFrequencyInfoImplBase::Distribution Distribution;
- typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+ using Distribution = BlockFrequencyInfoImplBase::Distribution;
+ using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
// Update the branch weights for the exit block.
TerminatorInst *TI = CodeReplacer->getTerminator();
@@ -985,12 +976,31 @@ Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
- ValueSet inputs, outputs, SinkingCands, HoistingCands;
- BasicBlock *CommonExit = nullptr;
-
// Assumption: this is a single-entry code region, and the header is the first
// block in the region.
BasicBlock *header = *Blocks.begin();
+ Function *oldFunction = header->getParent();
+
+ // For functions with varargs, check that varargs handling is only done in the
+ // outlined function, i.e vastart and vaend are only used in outlined blocks.
+ if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
+ auto containsVarArgIntrinsic = [](Instruction &I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (const Function *F = CI->getCalledFunction())
+ return F->getIntrinsicID() == Intrinsic::vastart ||
+ F->getIntrinsicID() == Intrinsic::vaend;
+ return false;
+ };
+
+ for (auto &BB : *oldFunction) {
+ if (Blocks.count(&BB))
+ continue;
+ if (llvm::any_of(BB, containsVarArgIntrinsic))
+ return nullptr;
+ }
+ }
+ ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
// Calculate the entry frequency of the new function before we change the root
// block.
@@ -1012,8 +1022,6 @@ Function *CodeExtractor::extractCodeRegion() {
// that the return is not in the region.
splitReturnBlocks();
- Function *oldFunction = header->getParent();
-
// This takes place of the original loop
BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
@@ -1023,7 +1031,22 @@ Function *CodeExtractor::extractCodeRegion() {
// head of the region, but the entry node of a function cannot have preds.
BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
"newFuncRoot");
- newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+ auto *BranchI = BranchInst::Create(header);
+ // If the original function has debug info, we have to add a debug location
+ // to the new branch instruction from the artificial entry block.
+ // We use the debug location of the first instruction in the extracted
+ // blocks, as there is no other equivalent line in the source code.
+ if (oldFunction->getSubprogram()) {
+ any_of(Blocks, [&BranchI](const BasicBlock *BB) {
+ return any_of(*BB, [&BranchI](const Instruction &I) {
+ if (!I.getDebugLoc())
+ return false;
+ BranchI->setDebugLoc(I.getDebugLoc());
+ return true;
+ });
+ });
+ }
+ newFuncRoot->getInstList().push_back(BranchI);
findAllocas(SinkingCands, HoistingCands, CommonExit);
assert(HoistingCands.empty() || CommonExit);
@@ -1044,7 +1067,7 @@ Function *CodeExtractor::extractCodeRegion() {
}
// Calculate the exit blocks for the extracted region and the total exit
- // weights for each of those blocks.
+ // weights for each of those blocks.
DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
SmallPtrSet<BasicBlock *, 1> ExitBlocks;
for (BasicBlock *Block : Blocks) {
@@ -1097,8 +1120,8 @@ Function *CodeExtractor::extractCodeRegion() {
// Look at all successors of the codeReplacer block. If any of these blocks
// had PHI nodes in them, we need to update the "from" block to be the code
// replacer, not the original block in the extracted region.
- std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
- succ_end(codeReplacer));
+ std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
for (unsigned i = 0, e = Succs.size(); i != e; ++i)
for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 6642a97a29c2..82b67c293102 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -16,7 +16,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
new file mode 100644
index 000000000000..421663f82565
--- /dev/null
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -0,0 +1,163 @@
+//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+static void insertCall(Function &CurFn, StringRef Func,
+ Instruction *InsertionPt, DebugLoc DL) {
+ Module &M = *InsertionPt->getParent()->getParent()->getParent();
+ LLVMContext &C = InsertionPt->getParent()->getContext();
+
+ if (Func == "mcount" ||
+ Func == ".mcount" ||
+ Func == "\01__gnu_mcount_nc" ||
+ Func == "\01_mcount" ||
+ Func == "\01mcount" ||
+ Func == "__mcount" ||
+ Func == "_mcount" ||
+ Func == "__cyg_profile_func_enter_bare") {
+ Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
+ CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
+ Call->setDebugLoc(DL);
+ return;
+ }
+
+ if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
+ Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
+
+ Constant *Fn = M.getOrInsertFunction(
+ Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
+
+ Instruction *RetAddr = CallInst::Create(
+ Intrinsic::getDeclaration(&M, Intrinsic::returnaddress),
+ ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "",
+ InsertionPt);
+ RetAddr->setDebugLoc(DL);
+
+ Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
+ RetAddr};
+
+ CallInst *Call =
+ CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
+ Call->setDebugLoc(DL);
+ return;
+ }
+
+ // We only know how to call a fixed set of instrumentation functions, because
+ // they all expect different arguments, etc.
+ report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'");
+}
+
+static bool runOnFunction(Function &F, bool PostInlining) {
+ StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined"
+ : "instrument-function-entry";
+
+ StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined"
+ : "instrument-function-exit";
+
+ StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString();
+ StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString();
+
+ bool Changed = false;
+
+ // If the attribute is specified, insert instrumentation and then "consume"
+ // the attribute so that it's not inserted again if the pass should happen to
+ // run later for some reason.
+
+ if (!EntryFunc.empty()) {
+ DebugLoc DL;
+ if (auto SP = F.getSubprogram())
+ DL = DebugLoc::get(SP->getScopeLine(), 0, SP);
+
+ insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
+ Changed = true;
+ F.removeAttribute(AttributeList::FunctionIndex, EntryAttr);
+ }
+
+ if (!ExitFunc.empty()) {
+ for (BasicBlock &BB : F) {
+ TerminatorInst *T = BB.getTerminator();
+ DebugLoc DL;
+ if (DebugLoc TerminatorDL = T->getDebugLoc())
+ DL = TerminatorDL;
+ else if (auto SP = F.getSubprogram())
+ DL = DebugLoc::get(0, 0, SP);
+
+ if (isa<ReturnInst>(T)) {
+ insertCall(F, ExitFunc, T, DL);
+ Changed = true;
+ }
+ }
+ F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
+ }
+
+ return Changed;
+}
+
+namespace {
+struct EntryExitInstrumenter : public FunctionPass {
+ static char ID;
+ EntryExitInstrumenter() : FunctionPass(ID) {
+ initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+ bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); }
+};
+char EntryExitInstrumenter::ID = 0;
+
+struct PostInlineEntryExitInstrumenter : public FunctionPass {
+ static char ID;
+ PostInlineEntryExitInstrumenter() : FunctionPass(ID) {
+ initializePostInlineEntryExitInstrumenterPass(
+ *PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+ bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); }
+};
+char PostInlineEntryExitInstrumenter::ID = 0;
+}
+
+INITIALIZE_PASS(
+ EntryExitInstrumenter, "ee-instrument",
+ "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
+ false, false)
+INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
+ "Instrument function entry/exit with calls to e.g. mcount() "
+ "(post inlining)",
+ false, false)
+
+FunctionPass *llvm::createEntryExitInstrumenterPass() {
+ return new EntryExitInstrumenter();
+}
+
+FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() {
+ return new PostInlineEntryExitInstrumenter();
+}
+
+PreservedAnalyses
+llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
+ runOnFunction(F, PostInlining);
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 1328f2f3ec01..3c5e299fae98 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -12,19 +12,33 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Evaluator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <iterator>
#define DEBUG_TYPE "evaluator"
@@ -193,7 +207,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
BasicBlock *&NextBB) {
// This is the main evaluation loop.
- while (1) {
+ while (true) {
Constant *InstResult = nullptr;
DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
@@ -318,7 +332,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
<< "\n");
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
-
if (!LI->isSimple()) {
DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
return false; // no volatile/atomic accesses.
@@ -344,9 +357,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false; // Cannot handle array allocs.
}
Type *Ty = AI->getAllocatedType();
- AllocaTmps.push_back(
- make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
- UndefValue::get(Ty), AI->getName()));
+ AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(
+ Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
+ AI->getName()));
InstResult = AllocaTmps.back().get();
DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
@@ -420,6 +433,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
DEBUG(dbgs() << "Skipping assume intrinsic.\n");
++CurInst;
continue;
+ } else if (II->getIntrinsicID() == Intrinsic::sideeffect) {
+ DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
+ ++CurInst;
+ continue;
}
DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
@@ -559,7 +576,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
BasicBlock::iterator CurInst = CurBB->begin();
- while (1) {
+ while (true) {
BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
@@ -594,4 +611,3 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
CurBB = NextBB;
}
}
-
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 435eff3bef47..5fdcc6d1d727 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -1,4 +1,4 @@
-//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,25 +14,37 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+
using namespace llvm;
#define DEBUG_TYPE "flattencfg"
namespace {
+
class FlattenCFGOpt {
AliasAnalysis *AA;
+
/// \brief Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
+
/// \brief If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
+
/// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
@@ -43,9 +55,11 @@ class FlattenCFGOpt {
public:
FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+
bool run(BasicBlock *BB);
};
-}
+
+} // end anonymous namespace
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
@@ -120,7 +134,6 @@ public:
/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
/// as its predecessors.
-///
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
PHINode *PHI = dyn_cast<PHINode>(BB->begin());
if (PHI)
@@ -237,8 +250,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// Do branch inversion.
BasicBlock *CurrBlock = LastCondBlock;
bool EverChanged = false;
- for (;CurrBlock != FirstCondBlock;
- CurrBlock = CurrBlock->getSinglePredecessor()) {
+ for (; CurrBlock != FirstCondBlock;
+ CurrBlock = CurrBlock->getSinglePredecessor()) {
BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
if (!CI)
@@ -309,7 +322,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// in the 2nd if-region to compare. \returns true if \param Block1 and \param
/// Block2 have identical instructions and do not have memory reference alias
/// with \param Head2.
-///
bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1,
BasicBlock *Block2) {
@@ -330,7 +342,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock::iterator iter2 = Block2->begin();
BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
- while (1) {
+ while (true) {
if (iter1 == end1) {
if (iter2 != end2)
return false;
@@ -384,7 +396,6 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
/// To:
/// if (a || b)
/// statement;
-///
bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *IfTrue2, *IfFalse2;
Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
@@ -475,8 +486,7 @@ bool FlattenCFGOpt::run(BasicBlock *BB) {
/// FlattenCFG - This function is used to flatten a CFG. For
/// example, it uses parallel-and and parallel-or mode to collapse
-// if-conditions and merge if-regions with identical statements.
-///
+/// if-conditions and merge if-regions with identical statements.
bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
return FlattenCFGOpt(AA).run(BB);
}
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 4a2be3a53176..bddcbd86e914 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -13,13 +13,41 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
using namespace llvm;
@@ -160,7 +188,6 @@ int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
/// For more details see declaration comments.
int FunctionComparator::cmpConstants(const Constant *L,
const Constant *R) const {
-
Type *TyL = L->getType();
Type *TyR = R->getType();
@@ -226,8 +253,8 @@ int FunctionComparator::cmpConstants(const Constant *L,
if (!L->isNullValue() && R->isNullValue())
return -1;
- auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
- auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+ auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R));
if (GlobalValueL && GlobalValueR) {
return cmpGlobalValues(GlobalValueL, GlobalValueR);
}
@@ -401,10 +428,9 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
case Type::TokenTyID:
return 0;
- case Type::PointerTyID: {
+ case Type::PointerTyID:
assert(PTyL && PTyR && "Both types must be pointers here.");
return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
- }
case Type::StructTyID: {
StructType *STyL = cast<StructType>(TyL);
@@ -637,7 +663,6 @@ int FunctionComparator::cmpOperations(const Instruction *L,
// Read method declaration comments for more details.
int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
const GEPOperator *GEPR) const {
-
unsigned int ASL = GEPL->getPointerAddressSpace();
unsigned int ASR = GEPR->getPointerAddressSpace();
@@ -869,15 +894,19 @@ namespace {
// buffer.
class HashAccumulator64 {
uint64_t Hash;
+
public:
// Initialize to random constant, so the state isn't zero.
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+
void add(uint64_t V) {
- Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+ Hash = hashing::detail::hash_16_bytes(Hash, V);
}
+
// No finishing is required, because the entire hash value is used.
uint64_t getHash() { return Hash; }
};
+
} // end anonymous namespace
// A function hash is calculated by considering only the number of arguments and
@@ -919,5 +948,3 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
}
return H.getHash();
}
-
-
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index a98d07237b47..6b5f593073b4 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -13,9 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
-#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
using namespace llvm;
/// Checks if we should import SGV as a definition, otherwise import as a
@@ -23,21 +21,15 @@ using namespace llvm;
bool FunctionImportGlobalProcessing::doImportAsDefinition(
const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
- // For alias, we tie the definition to the base object. Extract it and recurse
- if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
- if (GA->isInterposable())
- return false;
- const GlobalObject *GO = GA->getBaseObject();
- if (!GO->hasLinkOnceODRLinkage())
- return false;
- return FunctionImportGlobalProcessing::doImportAsDefinition(
- GO, GlobalsToImport);
- }
// Only import the globals requested for importing.
- if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
- return true;
- // Otherwise no.
- return false;
+ if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
+ return false;
+
+ assert(!isa<GlobalAlias>(SGV) &&
+ "Unexpected global alias in the import list.");
+
+ // Otherwise yes.
+ return true;
}
bool FunctionImportGlobalProcessing::doImportAsDefinition(
@@ -132,8 +124,10 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
return SGV->getLinkage();
switch (SGV->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::ExternalLinkage:
- // External defnitions are converted to available_externally
+ // External and linkonce definitions are converted to available_externally
// definitions upon import, so that they are available for inlining
// and/or optimization, but are turned into declarations later
// during the EliminateAvailableExternally pass.
@@ -150,12 +144,6 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// An imported available_externally declaration stays that way.
return SGV->getLinkage();
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::LinkOnceODRLinkage:
- // These both stay the same when importing the definition.
- // The ThinLTO pass will eventually force-import their definitions.
- return SGV->getLinkage();
-
case GlobalValue::WeakAnyLinkage:
// Can't import weak_any definitions correctly, or we might change the
// program semantics, since the linker will pick the first weak_any
@@ -213,6 +201,23 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
}
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
+
+ // Check the summaries to see if the symbol gets resolved to a known local
+ // definition.
+ if (GV.hasName()) {
+ ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
+ if (VI) {
+ // Need to check all summaries are local in case of hash collisions.
+ bool IsLocal = VI.getSummaryList().size() &&
+ llvm::all_of(VI.getSummaryList(),
+ [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return Summary->isDSOLocal();
+ });
+ if (IsLocal)
+ GV.setDSOLocal(true);
+ }
+ }
+
bool DoPromote = false;
if (GV.hasLocalLinkage() &&
((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 2a18c140c788..fedf6e100d6c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -12,11 +12,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -26,25 +30,46 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -62,28 +87,37 @@ bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
AAResults *CalleeAAR, bool InsertLifetime) {
return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
}
+
bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
AAResults *CalleeAAR, bool InsertLifetime) {
return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
}
namespace {
+
/// A class for recording information about inlining a landing pad.
class LandingPadInliningInfo {
- BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
- BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
- LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke.
- PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts.
+ /// Destination of the invoke's unwind.
+ BasicBlock *OuterResumeDest;
+
+ /// Destination for the callee's resume.
+ BasicBlock *InnerResumeDest = nullptr;
+
+ /// LandingPadInst associated with the invoke.
+ LandingPadInst *CallerLPad = nullptr;
+
+ /// PHI for EH values from landingpad insts.
+ PHINode *InnerEHValuesPHI = nullptr;
+
SmallVector<Value*, 8> UnwindDestPHIValues;
public:
LandingPadInliningInfo(InvokeInst *II)
- : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
- CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
+ : OuterResumeDest(II->getUnwindDest()) {
// If there are PHI nodes in the unwind destination block, we need to keep
// track of which values came into them from the invoke before removing
// the edge from this block.
- llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock *InvokeBB = II->getParent();
BasicBlock::iterator I = OuterResumeDest->begin();
for (; isa<PHINode>(I); ++I) {
// Save the value to use for this edge.
@@ -126,7 +160,8 @@ namespace {
}
}
};
-} // anonymous namespace
+
+} // end anonymous namespace
/// Get or create a target for the branch from ResumeInsts.
BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
@@ -189,7 +224,7 @@ static Value *getParentPad(Value *EHPad) {
return cast<CatchSwitchInst>(EHPad)->getParentPad();
}
-typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy;
+using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
/// Helper for getUnwindDestToken that does the descendant-ward part of
/// the search.
@@ -617,7 +652,7 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
// track of which values came into them from the invoke before removing the
// edge from this block.
SmallVector<Value *, 8> UnwindDestPHIValues;
- llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock *InvokeBB = II->getParent();
for (Instruction &I : *UnwindDest) {
// Save the value to use for this edge.
PHINode *PHI = dyn_cast<PHINode>(&I);
@@ -1359,6 +1394,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
}
+
/// Update the block frequencies of the caller after a callee has been inlined.
///
/// Each block cloned into the caller has its block frequency scaled by the
@@ -1454,7 +1490,8 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime) {
+ AAResults *CalleeAAR, bool InsertLifetime,
+ Function *ForwardVarArgsTo) {
Instruction *TheCall = CS.getInstruction();
assert(TheCall->getParent() && TheCall->getFunction()
&& "Instruction not in function!");
@@ -1464,8 +1501,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
- CalledFunc->isDeclaration() || // call, or call to a vararg function!
- CalledFunc->getFunctionType()->isVarArg()) return false;
+ CalledFunc->isDeclaration() ||
+ (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function!
+ return false;
// The inliner does not know how to inline through calls with operand bundles
// in general ...
@@ -1592,8 +1630,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
auto &DL = Caller->getParent()->getDataLayout();
- assert(CalledFunc->arg_size() == CS.arg_size() &&
- "No varargs calls can be inlined!");
+ assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) &&
+ "Varargs calls can only be inlined if the Varargs are forwarded!");
// Calculate the vector of arguments to pass into the function cloner, which
// matches up the formal to the actual argument values.
@@ -1772,9 +1810,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Move any dbg.declares describing the allocas into the entry basic block.
DIBuilder DIB(*Caller->getParent());
for (auto &AI : IFI.StaticAllocas)
- replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
+ replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0,
+ DIExpression::NoDeref);
}
+ SmallVector<Value*,4> VarArgsToForward;
+ for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
+ i < CS.getNumArgOperands(); i++)
+ VarArgsToForward.push_back(CS.getArgOperand(i));
+
bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
if (InlinedFunctionInfo.ContainsCalls) {
CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
@@ -1783,7 +1827,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
++BB) {
- for (Instruction &I : *BB) {
+ for (auto II = BB->begin(); II != BB->end();) {
+ Instruction &I = *II++;
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
@@ -1806,7 +1851,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// f -> g -> musttail f ==> f -> f
// f -> g -> tail f ==> f -> f
CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
- ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+ if (ChildTCK != CallInst::TCK_NoTail)
+ ChildTCK = std::min(CallSiteTailKind, ChildTCK);
CI->setTailCallKind(ChildTCK);
InlinedMustTailCalls |= CI->isMustTailCall();
@@ -1814,6 +1860,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// 'nounwind'.
if (MarkNoUnwind)
CI->setDoesNotThrow();
+
+ if (ForwardVarArgsTo && !VarArgsToForward.empty() &&
+ CI->getCalledFunction() == ForwardVarArgsTo) {
+ SmallVector<Value*, 6> Params(CI->arg_operands());
+ Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
+ CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI);
+ Call->setDebugLoc(CI->getDebugLoc());
+ CI->replaceAllUsesWith(Call);
+ CI->eraseFromParent();
+ }
}
}
}
@@ -1848,8 +1904,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Check that array size doesn't saturate uint64_t and doesn't
// overflow when it's multiplied by type size.
- if (AllocaArraySize != ~0ULL &&
- UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
+ std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
+ AllocaTypeSize) {
AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
AllocaArraySize * AllocaTypeSize);
}
@@ -1980,7 +2037,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// match the callee's return type, we also need to change the return type of
// the intrinsic.
if (Caller->getReturnType() == TheCall->getType()) {
- auto NewEnd = remove_if(Returns, [](ReturnInst *RI) {
+ auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
});
Returns.erase(NewEnd, Returns.end());
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 089f2b5f3b18..ae0e2bb6c280 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -56,9 +56,10 @@ static bool VerifyLoopLCSSA = true;
#else
static bool VerifyLoopLCSSA = false;
#endif
-static cl::opt<bool,true>
-VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
- cl::desc("Verify loop lcssa form (time consuming)"));
+static cl::opt<bool, true>
+ VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
+ cl::Hidden,
+ cl::desc("Verify loop lcssa form (time consuming)"));
/// Return true if the specified block is in the list.
static bool isExitBlock(BasicBlock *BB,
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 74610613001c..a1961eecb391 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1,4 +1,4 @@
-//===-- Local.cpp - Functions to perform local transformations ------------===//
+//===- Local.cpp - Functions to perform local transformations -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,42 +13,74 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <utility>
+
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -282,7 +314,6 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
return false;
}
-
//===----------------------------------------------------------------------===//
// Local dead code elimination.
//
@@ -541,7 +572,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
// Control Flow Graph Restructuring.
//
-
/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
/// method is called when we're about to delete Pred as a predecessor of BB. If
/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
@@ -578,12 +608,10 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
}
}
-
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
/// predecessor is known to have one successor (DestBB!). Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
-///
void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
@@ -602,7 +630,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
if (DestBB->hasAddressTaken()) {
BlockAddress *BA = BlockAddress::get(DestBB);
Constant *Replacement =
- ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1);
BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
BA->getType()));
BA->destroyConstant();
@@ -621,9 +649,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
DestBB->moveAfter(PredBB);
if (DT) {
- BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
- DT->changeImmediateDominator(DestBB, PredBBIDom);
- DT->eraseNode(PredBB);
+ // For some irreducible CFG we end up having forward-unreachable blocks
+ // so check if getNode returns a valid node before updating the domtree.
+ if (DomTreeNode *DTN = DT->getNode(PredBB)) {
+ BasicBlock *PredBBIDom = DTN->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
}
// Nuke BB.
PredBB->eraseFromParent();
@@ -640,7 +672,6 @@ static bool CanMergeValues(Value *First, Value *Second) {
/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
-///
static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
@@ -696,8 +727,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
return true;
}
-typedef SmallVector<BasicBlock *, 16> PredBlockVector;
-typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+using PredBlockVector = SmallVector<BasicBlock *, 16>;
+using IncomingValueMap = DenseMap<BasicBlock *, Value *>;
/// \brief Determines the value to use as the phi node input for a block.
///
@@ -927,7 +958,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
/// nodes in this block. This doesn't try to be clever about PHI nodes
/// which differ only in the order of the incoming values, but instcombine
/// orders them so it usually won't matter.
-///
bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
@@ -937,9 +967,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
static PHINode *getEmptyKey() {
return DenseMapInfo<PHINode *>::getEmptyKey();
}
+
static PHINode *getTombstoneKey() {
return DenseMapInfo<PHINode *>::getTombstoneKey();
}
+
static unsigned getHashValue(PHINode *PN) {
// Compute a hash value on the operands. Instcombine will likely have
// sorted them, which helps expose duplicates, but we have to check all
@@ -948,6 +980,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
hash_combine_range(PN->value_op_begin(), PN->value_op_end()),
hash_combine_range(PN->block_begin(), PN->block_end())));
}
+
static bool isEqual(PHINode *LHS, PHINode *RHS) {
if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
RHS == getEmptyKey() || RHS == getTombstoneKey())
@@ -984,7 +1017,6 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// often possible though. If alignment is important, a more reliable approach
/// is to simply align all global variables and allocation instructions to
/// their preferred alignment from the beginning.
-///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
unsigned PrefAlign,
const DataLayout &DL) {
@@ -1068,12 +1100,11 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
// Since we can't guarantee that the original dbg.declare instrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
- llvm::BasicBlock::InstListType::iterator PrevI(I);
+ BasicBlock::InstListType::iterator PrevI(I);
if (PrevI != I->getParent()->getInstList().begin()) {
--PrevI;
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
if (DVI->getValue() == I->getOperand(0) &&
- DVI->getOffset() == 0 &&
DVI->getVariable() == DIVar &&
DVI->getExpression() == DIExpr)
return true;
@@ -1092,7 +1123,6 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
findDbgValues(DbgValues, APN);
for (auto *DVI : DbgValues) {
assert(DVI->getValue() == APN);
- assert(DVI->getOffset() == 0);
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
@@ -1100,12 +1130,13 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
-/// that has an associated llvm.dbg.decl intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
StoreInst *SI, DIBuilder &Builder) {
- auto *DIVar = DDI->getVariable();
+ assert(DII->isAddressOfVariable());
+ auto *DIVar = DII->getVariable();
assert(DIVar && "Missing variable");
- auto *DIExpr = DDI->getExpression();
+ auto *DIExpr = DII->getExpression();
Value *DV = SI->getOperand(0);
// If an argument is zero extended then use argument directly. The ZExt
@@ -1116,7 +1147,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
if (ExtendedArg) {
- // If this DDI was already describing only a fragment of a variable, ensure
+ // If this DII was already describing only a fragment of a variable, ensure
// that fragment is appropriately narrowed here.
// But if a fragment wasn't used, describe the value as the original
// argument (rather than the zext or sext) so that it remains described even
@@ -1129,23 +1160,23 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DIExpr->elements_end() - 3);
Ops.push_back(dwarf::DW_OP_LLVM_fragment);
Ops.push_back(FragmentOffset);
- const DataLayout &DL = DDI->getModule()->getDataLayout();
+ const DataLayout &DL = DII->getModule()->getDataLayout();
Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
DIExpr = Builder.createExpression(Ops);
}
DV = ExtendedArg;
}
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
SI);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
-/// that has an associated llvm.dbg.decl intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
LoadInst *LI, DIBuilder &Builder) {
- auto *DIVar = DDI->getVariable();
- auto *DIExpr = DDI->getExpression();
+ auto *DIVar = DII->getVariable();
+ auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
if (LdStHasDebugValue(DIVar, DIExpr, LI))
@@ -1156,16 +1187,16 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
// preferable to keep tracking both the loaded value and the original
// address in case the alloca can not be elided.
Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
- LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
+ LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr);
DbgValue->insertAfter(LI);
}
-/// Inserts a llvm.dbg.value intrinsic after a phi
-/// that has an associated llvm.dbg.decl intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
+/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
PHINode *APN, DIBuilder &Builder) {
- auto *DIVar = DDI->getVariable();
- auto *DIExpr = DDI->getExpression();
+ auto *DIVar = DII->getVariable();
+ auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
if (PhiHasDebugValue(DIVar, DIExpr, APN))
@@ -1178,7 +1209,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
// insertion point.
// FIXME: Insert dbg.value markers in the successors when appropriate.
if (InsertionPt != BB->end())
- Builder.insertDbgValueIntrinsic(APN, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+ Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(),
&*InsertionPt);
}
@@ -1222,7 +1253,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
- DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
+ DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(),
DDI->getExpression(), DDI->getDebugLoc(),
CI);
}
@@ -1233,16 +1264,25 @@ bool llvm::LowerDbgDeclare(Function &F) {
return true;
}
-/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
-/// alloca 'V', if any.
-DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
- if (auto *L = LocalAsMetadata::getIfExists(V))
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
- for (User *U : MDV->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- return DDI;
+/// Finds all intrinsics declaring local variables as living in the memory that
+/// 'V' points to. This may include a mix of dbg.declare and
+/// dbg.addr intrinsics.
+TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+ auto *L = LocalAsMetadata::getIfExists(V);
+ if (!L)
+ return {};
+ auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L);
+ if (!MDV)
+ return {};
+
+ TinyPtrVector<DbgInfoIntrinsic *> Declares;
+ for (User *U : MDV->users()) {
+ if (auto *DII = dyn_cast<DbgInfoIntrinsic>(U))
+ if (DII->isAddressOfVariable())
+ Declares.push_back(DII);
+ }
- return nullptr;
+ return Declares;
}
void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
@@ -1253,29 +1293,40 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
+static void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
+ Value *V) {
+ if (auto *L = LocalAsMetadata::getIfExists(V))
+ if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgInfoIntrinsic *DII = dyn_cast<DbgInfoIntrinsic>(U))
+ DbgUsers.push_back(DII);
+}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
- bool Deref, int Offset) {
- DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address);
- if (!DDI)
- return false;
- DebugLoc Loc = DDI->getDebugLoc();
- auto *DIVar = DDI->getVariable();
- auto *DIExpr = DDI->getExpression();
- assert(DIVar && "Missing variable");
- DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);
- // Insert llvm.dbg.declare immediately after the original alloca, and remove
- // old llvm.dbg.declare.
- Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
- DDI->eraseFromParent();
- return true;
+ bool DerefBefore, int Offset, bool DerefAfter) {
+ auto DbgAddrs = FindDbgAddrUses(Address);
+ for (DbgInfoIntrinsic *DII : DbgAddrs) {
+ DebugLoc Loc = DII->getDebugLoc();
+ auto *DIVar = DII->getVariable();
+ auto *DIExpr = DII->getExpression();
+ assert(DIVar && "Missing variable");
+ DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter);
+ // Insert llvm.dbg.declare immediately after InsertBefore, and remove old
+ // llvm.dbg.declare.
+ Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
+ if (DII == InsertBefore)
+ InsertBefore = &*std::next(InsertBefore->getIterator());
+ DII->eraseFromParent();
+ }
+ return !DbgAddrs.empty();
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool Deref, int Offset) {
+ DIBuilder &Builder, bool DerefBefore,
+ int Offset, bool DerefAfter) {
return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
- Deref, Offset);
+ DerefBefore, Offset, DerefAfter);
}
static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
@@ -1302,8 +1353,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
DIExpr = Builder.createExpression(Ops);
}
- Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
- Loc, DVI);
+ Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
DVI->eraseFromParent();
}
@@ -1322,17 +1372,28 @@ void llvm::salvageDebugInfo(Instruction &I) {
SmallVector<DbgValueInst *, 1> DbgValues;
auto &M = *I.getModule();
- auto MDWrap = [&](Value *V) {
+ auto wrapMD = [&](Value *V) {
return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
};
- if (isa<BitCastInst>(&I)) {
- findDbgValues(DbgValues, &I);
- for (auto *DVI : DbgValues) {
- // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
- // to use the cast's source.
- DVI->setOperand(0, MDWrap(I.getOperand(0)));
- DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) {
+ auto *DIExpr = DVI->getExpression();
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
+ DIExpression::NoDeref,
+ DIExpression::WithStackValue);
+ DVI->setOperand(0, wrapMD(I.getOperand(0)));
+ DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ };
+
+ if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) {
+ // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value,
+ // dbg.addr, and dbg.declare to use the cast's source.
+ SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ for (auto *DII : DbgUsers) {
+ DII->setOperand(0, wrapMD(I.getOperand(0)));
+ DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
}
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
findDbgValues(DbgValues, &I);
@@ -1343,27 +1404,27 @@ void llvm::salvageDebugInfo(Instruction &I) {
// Rewrite a constant GEP into a DIExpression. Since we are performing
// arithmetic to compute the variable's *value* in the DIExpression, we
// need to mark the expression with a DW_OP_stack_value.
- if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
- auto *DIExpr = DVI->getExpression();
- DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
// GEP offsets are i32 and thus always fit into an int64_t.
- DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref,
- Offset.getSExtValue(),
- DIExpression::WithStackValue);
- DVI->setOperand(0, MDWrap(I.getOperand(0)));
- DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
- DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
- }
+ applyOffset(DVI, Offset.getSExtValue());
}
+ } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
+ if (BI->getOpcode() == Instruction::Add)
+ if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)))
+ if (ConstInt->getBitWidth() <= 64) {
+ APInt Offset = ConstInt->getValue();
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues)
+ applyOffset(DVI, Offset.getSExtValue());
+ }
} else if (isa<LoadInst>(&I)) {
findDbgValues(DbgValues, &I);
for (auto *DVI : DbgValues) {
// Rewrite the load into DW_OP_deref.
auto *DIExpr = DVI->getExpression();
- DIBuilder DIB(M, /*AllowUnresolved*/ false);
DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
- DVI->setOperand(0, MDWrap(I.getOperand(0)));
- DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DVI->setOperand(0, wrapMD(I.getOperand(0)));
+ DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
}
}
@@ -1480,7 +1541,6 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
static bool markAliveBlocks(Function &F,
SmallPtrSetImpl<BasicBlock*> &Reachable) {
-
SmallVector<BasicBlock*, 128> Worklist;
BasicBlock *BB = &F.front();
Worklist.push_back(BB);
@@ -1586,13 +1646,16 @@ static bool markAliveBlocks(Function &F,
static CatchPadInst *getEmptyKey() {
return DenseMapInfo<CatchPadInst *>::getEmptyKey();
}
+
static CatchPadInst *getTombstoneKey() {
return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
}
+
static unsigned getHashValue(CatchPadInst *CatchPad) {
return static_cast<unsigned>(hash_combine_range(
CatchPad->value_op_begin(), CatchPad->value_op_end()));
}
+
static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
RHS == getEmptyKey() || RHS == getTombstoneKey())
@@ -1832,7 +1895,8 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
}
-bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
+bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
+ const TargetLibraryInfo &TLI) {
// Check if the function is specifically marked as a gc leaf function.
if (CS.hasFnAttr("gc-leaf-function"))
return true;
@@ -1846,6 +1910,14 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
IID != Intrinsic::experimental_deoptimize;
}
+ // Lib calls can be materialized by some passes, and won't be
+ // marked as 'gc-leaf-function.' All available Libcalls are
+ // GC-leaf.
+ LibFunc LF;
+ if (TLI.getLibFunc(CS, LF)) {
+ return TLI.has(LF);
+ }
+
return false;
}
@@ -1893,6 +1965,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
}
namespace {
+
/// A potential constituent of a bitreverse or bswap expression. See
/// collectBitParts for a fuller explanation.
struct BitPart {
@@ -1902,12 +1975,14 @@ struct BitPart {
/// The Value that this is a bitreverse/bswap of.
Value *Provider;
+
/// The "provenance" of each bit. Provenance[A] = B means that bit A
/// in Provider becomes bit B in the result of this expression.
SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
enum { Unset = -1 };
};
+
} // end anonymous namespace
/// Analyze the specified subexpression and see if it is capable of providing
@@ -1933,7 +2008,6 @@ struct BitPart {
///
/// Because we pass around references into \c BPS, we must use a container that
/// does not invalidate internal references (std::map instead of DenseMap).
-///
static const Optional<BitPart> &
collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
std::map<Value *, Optional<BitPart>> &BPS) {
@@ -2069,8 +2143,6 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
return From == BitWidth - To - 1;
}
-/// Given an OR instruction, check to see if this is a bitreverse
-/// idiom. If so, insert the new intrinsic and return true.
bool llvm::recognizeBSwapOrBitReverseIdiom(
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
SmallVectorImpl<Instruction *> &InsertedInsts) {
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e21e34df8ded..f43af9772771 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -258,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
// Create the new outer loop.
- Loop *NewOuter = new Loop();
+ Loop *NewOuter = LI->AllocateLoop();
// Change the parent loop to use the outer loop as its child now.
if (Loop *Parent = L->getParentLoop())
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index f2527f89e83e..dc98a39adcc5 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -21,8 +21,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -68,9 +67,23 @@ static inline void remapInstruction(Instruction *I,
ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
+
+ // Unwrap arguments of dbg.value intrinsics.
+ bool Wrapped = false;
+ if (auto *V = dyn_cast<MetadataAsValue>(Op))
+ if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) {
+ Op = Unwrapped->getValue();
+ Wrapped = true;
+ }
+
+ auto wrap = [&](Value *V) {
+ auto &C = I->getContext();
+ return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V;
+ };
+
ValueToValueMapTy::iterator It = VMap.find(Op);
if (It != VMap.end())
- I->setOperand(op, It->second);
+ I->setOperand(op, wrap(It->second));
}
if (PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -200,7 +213,7 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
assert(OriginalBB == OldLoop->getHeader() &&
"Header should be first in RPO");
- NewLoop = new Loop();
+ NewLoop = LI->AllocateLoop();
Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
if (NewLoopParent)
@@ -255,8 +268,7 @@ static bool isEpilogProfitable(Loop *L) {
return false;
}
-/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
-/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
@@ -285,37 +297,36 @@ static bool isEpilogProfitable(Loop *L) {
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
-/// If we want to perform PGO-based loop peeling, PeelCount is set to the
+/// If we want to perform PGO-based loop peeling, PeelCount is set to the
/// number of iterations we want to peel off.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
- bool AllowRuntime, bool AllowExpensiveTripCount,
- bool PreserveCondBr, bool PreserveOnlyFirst,
- unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
- bool PreserveLCSSA) {
+LoopUnrollResult llvm::UnrollLoop(
+ Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
+ bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
+ unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
BasicBlock *LatchBlock = L->getLoopLatch();
if (!LatchBlock) {
DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
// Loops with indirectbr cannot be cloned.
if (!L->isSafeToClone()) {
DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
// The current loop unroll pass can only unroll loops with a single latch
@@ -329,7 +340,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
" Can't unroll; loop not terminated by a conditional branch.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
@@ -339,14 +350,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
" exiting the loop can be unrolled\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
" Won't unroll loop: address of header block is taken.\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
if (TripCount != 0)
@@ -362,7 +373,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// Don't enter the unroll code if there is nothing to do.
if (TripCount == 0 && Count < 2 && PeelCount == 0) {
DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
assert(Count > 0);
@@ -395,8 +406,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"Did not expect runtime trip-count unrolling "
"and peeling for the same loop");
- if (PeelCount)
- peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+ if (PeelCount) {
+ bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+
+ // Successful peeling may result in a change in the loop preheader/trip
+ // counts. If we later unroll the loop, we want these to be updated.
+ if (Peeled) {
+ BasicBlock *ExitingBlock = L->getExitingBlock();
+ assert(ExitingBlock && "Loop without exiting block?");
+ Preheader = L->getLoopPreheader();
+ TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ }
+ }
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
@@ -418,15 +440,15 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- EpilogProfitability, LI, SE, DT,
- PreserveLCSSA)) {
+ EpilogProfitability, UnrollRemainder, LI, SE,
+ DT, AC, PreserveLCSSA)) {
if (Force)
RuntimeTripCount = false;
else {
DEBUG(
dbgs() << "Wont unroll; remainder loop could not be generated"
"when assuming runtime trip count\n");
- return false;
+ return LoopUnrollResult::Unmodified;
}
}
@@ -450,36 +472,53 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// Report the unrolling decision.
if (CompletelyUnroll) {
DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << TripCount << "!\n");
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
- L->getHeader())
- << "completely unrolled loop with "
- << NV("UnrollCount", TripCount) << " iterations");
+ << " with trip count " << TripCount << "!\n");
+ if (ORE)
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+ L->getHeader())
+ << "completely unrolled loop with "
+ << NV("UnrollCount", TripCount) << " iterations";
+ });
} else if (PeelCount) {
DEBUG(dbgs() << "PEELING loop %" << Header->getName()
<< " with iteration count " << PeelCount << "!\n");
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
- L->getHeader())
- << " peeled loop by " << NV("PeelCount", PeelCount)
- << " iterations");
+ if (ORE)
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
+ L->getHeader())
+ << " peeled loop by " << NV("PeelCount", PeelCount)
+ << " iterations";
+ });
} else {
- OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
- L->getHeader());
- Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count);
+ auto DiagBuilder = [&]() {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ return Diag << "unrolled loop by a factor of "
+ << NV("UnrollCount", Count);
+ };
DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
<< " by " << Count);
if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
- ORE->emit(Diag << " with a breakout at trip "
- << NV("BreakoutTrip", BreakoutTrip));
+ if (ORE)
+ ORE->emit([&]() {
+ return DiagBuilder() << " with a breakout at trip "
+ << NV("BreakoutTrip", BreakoutTrip);
+ });
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
- ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple)
- << " trips per branch");
+ if (ORE)
+ ORE->emit([&]() {
+ return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
+ << " trips per branch";
+ });
} else if (RuntimeTripCount) {
DEBUG(dbgs() << " with run-time trip count");
- ORE->emit(Diag << " with run-time trip count");
+ if (ORE)
+ ORE->emit(
+ [&]() { return DiagBuilder() << " with run-time trip count"; });
}
DEBUG(dbgs() << "!\n");
}
@@ -523,8 +562,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (Header->getParent()->isDebugInfoForProfiling())
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
- if (const DILocation *DIL = I.getDebugLoc())
- I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+ if (!isa<DbgInfoIntrinsic>(&I))
+ if (const DILocation *DIL = I.getDebugLoc())
+ I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
@@ -796,7 +836,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
Loop *OuterL = L->getParentLoop();
// Update LoopInfo if the loop is completely removed.
if (CompletelyUnroll)
- LI->markAsRemoved(L);
+ LI->erase(L);
// After complete unrolling most of the blocks should be contained in OuterL.
// However, some of them might happen to be out of OuterL (e.g. if they
@@ -821,7 +861,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (NeedToFixLCSSA) {
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop
- // after LoopInfo's been updated by markAsRemoved.
+ // after LoopInfo's been updated by LoopInfo::erase.
Loop *LatchLoop = LI->getLoopFor(Latches.back());
Loop *FixLCSSALoop = OuterL;
if (!FixLCSSALoop->contains(LatchLoop))
@@ -844,7 +884,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
}
}
- return true;
+ return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
+ : LoopUnrollResult::PartiallyUnrolled;
}
/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 5c21490793e7..4273ce0b6200 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -1,4 +1,4 @@
-//===-- UnrollLoopPeel.cpp - Loop peeling utilities -----------------------===//
+//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,29 +13,42 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
+
STATISTIC(NumPeeled, "Number of loops peeled");
static cl::opt<unsigned> UnrollPeelMaxCount(
@@ -49,7 +62,8 @@ static cl::opt<unsigned> UnrollForcePeelCount(
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
-static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+static const unsigned InfiniteIterationsToInvariance =
+ std::numeric_limits<unsigned>::max();
// Check whether we are capable of peeling this loop.
static bool canPeel(Loop *L) {
@@ -210,8 +224,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
}
}
-
- return;
}
/// \brief Update the branch weights of the latch of a peeled-off loop
@@ -236,7 +248,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
unsigned IterNumber, unsigned AvgIters,
uint64_t &PeeledHeaderWeight) {
-
// FIXME: Pick a more realistic distribution.
// Currently the proportion of weight we assign to the fall-through
// side of the branch drops linearly with the iteration number, and we use
@@ -272,7 +283,6 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
ValueToValueMapTy &LVMap, DominatorTree *DT,
LoopInfo *LI) {
-
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d43ce7abb7cd..efff06f79cb7 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -25,7 +25,6 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
@@ -294,7 +293,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// Return the new cloned loop that is created when CreateRemainderLoop is true.
static Loop *
CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ const bool UseEpilogRemainder, const bool UnrollRemainder,
+ BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
@@ -393,35 +393,14 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
if (CreateRemainderLoop) {
Loop *NewLoop = NewLoops[L];
assert(NewLoop && "L should have been cloned");
- // Add unroll disable metadata to disable future unrolling for this loop.
- SmallVector<Metadata *, 4> MDs;
- // Reserve first location for self reference to the LoopID metadata node.
- MDs.push_back(nullptr);
- MDNode *LoopID = NewLoop->getLoopID();
- if (LoopID) {
- // First remove any existing loop unrolling metadata.
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- bool IsUnrollMetadata = false;
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (MD) {
- const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
- }
- if (!IsUnrollMetadata)
- MDs.push_back(LoopID->getOperand(i));
- }
- }
- LLVMContext &Context = NewLoop->getHeader()->getContext();
- SmallVector<Metadata *, 1> DisableOperands;
- DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
- MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- MDs.push_back(DisableNode);
+ // Only add loop metadata if the loop is not going to be completely
+ // unrolled.
+ if (UnrollRemainder)
+ return NewLoop;
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
- NewLoop->setLoopID(NewLoopID);
+ // Add unroll disable metadata to disable future unrolling for this loop.
+ NewLoop->setLoopAlreadyUnrolled();
return NewLoop;
}
else
@@ -435,12 +414,9 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
BasicBlock *LatchExit, bool PreserveLCSSA,
bool UseEpilogRemainder) {
- // Support runtime unrolling for multiple exit blocks and multiple exiting
- // blocks.
- if (!UnrollRuntimeMultiExit)
- return false;
- // Even if runtime multi exit is enabled, we currently have some correctness
- // constrains in unrolling a multi-exit loop.
+ // We currently have some correctness constrains in unrolling a multi-exit
+ // loop. Check for these below.
+
// We rely on LCSSA form being preserved when the exit blocks are transformed.
if (!PreserveLCSSA)
return false;
@@ -470,7 +446,54 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
return true;
}
+/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
+/// we return true only if UnrollRuntimeMultiExit is set to true.
+static bool canProfitablyUnrollMultiExitLoop(
+ Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
+ bool PreserveLCSSA, bool UseEpilogRemainder) {
+
+#if !defined(NDEBUG)
+ SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
+ assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
+ PreserveLCSSA, UseEpilogRemainder) &&
+ "Should be safe to unroll before checking profitability!");
+#endif
+
+ // Priority goes to UnrollRuntimeMultiExit if it's supplied.
+ if (UnrollRuntimeMultiExit.getNumOccurrences())
+ return UnrollRuntimeMultiExit;
+
+ // The main pain point with multi-exit loop unrolling is that once unrolled,
+ // we will not be able to merge all blocks into a straight line code.
+ // There are branches within the unrolled loop that go to the OtherExits.
+ // The second point is the increase in code size, but this is true
+ // irrespective of multiple exits.
+
+ // Note: Both the heuristics below are coarse grained. We are essentially
+ // enabling unrolling of loops that have a single side exit other than the
+ // normal LatchExit (i.e. exiting into a deoptimize block).
+ // The heuristics considered are:
+ // 1. low number of branches in the unrolled version.
+ // 2. high predictability of these extra branches.
+ // We avoid unrolling loops that have more than two exiting blocks. This
+ // limits the total number of branches in the unrolled loop to be atmost
+ // the unroll factor (since one of the exiting blocks is the latch block).
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() > 2)
+ return false;
+ // The second heuristic is that L has one exit other than the latchexit and
+ // that exit is a deoptimize block. We know that deoptimize blocks are rarely
+ // taken, which also implies the branch leading to the deoptimize block is
+ // highly predictable.
+ return (OtherExits.size() == 1 &&
+ OtherExits[0]->getTerminatingDeoptimizeCall());
+ // TODO: These can be fine-tuned further to consider code size or deopt states
+ // that are captured by the deoptimize exit block.
+ // Also, we can extend this to support more cases, if we actually
+ // know of kinds of multiexit loops that would benefit from unrolling.
+}
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
@@ -513,10 +536,14 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
+ bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, bool PreserveLCSSA) {
+ DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA) {
DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
DEBUG(L->dump());
+ DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" :
+ dbgs() << "Using prolog remainder.\n");
// Make sure the loop is in canonical form.
if (!L->isLoopSimplifyForm()) {
@@ -538,8 +565,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
"one of the loop latch successors should be the exit block!");
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
- bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(
- L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder);
+ bool isMultiExitUnrollingEnabled =
+ canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+ UseEpilogRemainder) &&
+ canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+ UseEpilogRemainder);
// Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
if (!isMultiExitUnrollingEnabled &&
(!L->getExitingBlock() || OtherExits.size())) {
@@ -724,7 +754,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
+ InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
@@ -753,11 +784,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Add the incoming values from the remainder code to the end of the phi
// node.
for (unsigned i =0; i < oldNumOperands; i++){
- Value *newVal = VMap[Phi->getIncomingValue(i)];
+ Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
// newVal can be a constant or derived from values outside the loop, and
- // hence need not have a VMap value.
- if (!newVal)
+ // hence need not have a VMap value. Also, since lookup already generated
+ // a default "null" VMap entry for this value, we need to populate that
+ // VMap entry correctly, with the mapped entry being itself.
+ if (!newVal) {
newVal = Phi->getIncomingValue(i);
+ VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
+ }
Phi->addIncoming(newVal,
cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
}
@@ -868,6 +903,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
}
+ if (remainderLoop && UnrollRemainder) {
+ DEBUG(dbgs() << "Unrolling remainder loop\n");
+ UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+ /*Force*/ false, /*AllowRuntime*/ false,
+ /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+ /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+ /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+ /*ORE*/ nullptr, PreserveLCSSA);
+ }
+
NumRuntimeUnrolled++;
return true;
}
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 3c522786641a..c3fa05a11a24 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -432,7 +432,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {
bool FP = I->getType()->isFloatingPointTy();
Instruction *UAI = Prev.getUnsafeAlgebraInst();
- if (!UAI && FP && !I->hasUnsafeAlgebra())
+ if (!UAI && FP && !I->isFast())
UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
switch (I->getOpcode()) {
@@ -565,7 +565,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
auto *I = Phi->user_back();
if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
DT->dominates(Previous, I->user_back())) {
- SinkAfter[I] = Previous;
+ if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
+ SinkAfter[I] = Previous;
return true;
}
}
@@ -659,11 +660,11 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
break;
}
- // We only match FP sequences with unsafe algebra, so we can unconditionally
+ // We only match FP sequences that are 'fast', so we can unconditionally
// set it on any generated instructions.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
FastMathFlags FMF;
- FMF.setUnsafeAlgebra();
+ FMF.setFast();
Builder.setFastMathFlags(FMF);
Value *Cmp;
@@ -677,7 +678,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
}
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
- const SCEV *Step, BinaryOperator *BOp)
+ const SCEV *Step, BinaryOperator *BOp,
+ SmallVectorImpl<Instruction *> *Casts)
: StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
assert(IK != IK_NoInduction && "Not an induction");
@@ -704,6 +706,12 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
(InductionBinOp->getOpcode() == Instruction::FAdd ||
InductionBinOp->getOpcode() == Instruction::FSub))) &&
"Binary opcode should be specified for FP induction");
+
+ if (Casts) {
+ for (auto &Inst : *Casts) {
+ RedundantCasts.push_back(Inst);
+ }
+ }
}
int InductionDescriptor::getConsecutiveDirection() const {
@@ -767,7 +775,7 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
// Floating point operations had to be 'fast' to enable the induction.
FastMathFlags Flags;
- Flags.setUnsafeAlgebra();
+ Flags.setFast();
Value *MulExp = B.CreateFMul(StepValue, Index);
if (isa<Instruction>(MulExp))
@@ -807,7 +815,7 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
StartValue = Phi->getIncomingValue(1);
} else {
assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
- "Unexpected Phi node in the loop");
+ "Unexpected Phi node in the loop");
BEValue = Phi->getIncomingValue(1);
StartValue = Phi->getIncomingValue(0);
}
@@ -840,6 +848,110 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
return true;
}
+/// This function is called when we suspect that the update-chain of a phi node
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
+/// predicate P under which the SCEV expression for the phi can be the
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
+/// cast instructions that are involved in the update-chain of this induction.
+/// A caller that adds the required runtime predicate can be free to drop these
+/// cast instructions, and compute the phi using \p AR (instead of some scev
+/// expression with casts).
+///
+/// For example, without a predicate the scev expression can take the following
+/// form:
+/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+///
+/// It corresponds to the following IR sequence:
+/// %for.body:
+/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+/// %casted_phi = "ExtTrunc i64 %x"
+/// %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN,
+/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
+/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
+/// several forms, for example, such as:
+/// ExtTrunc1: %casted_phi = and %x, 2^n-1
+/// or:
+/// ExtTrunc2: %t = shl %x, m
+/// %casted_phi = ashr %t, m
+///
+/// If we are able to find such sequence, we return the instructions
+/// we found, namely %casted_phi and the instructions on its use-def chain up
+/// to the phi (not including the phi).
+bool getCastsForInductionPHI(
+ PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
+ const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
+
+ assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+ auto *PN = cast<PHINode>(PhiScev->getValue());
+ assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
+ const Loop *L = AR->getLoop();
+
+ // Find any cast instructions that participate in the def-use chain of
+ // PhiScev in the loop.
+ // FORNOW/TODO: We currently expect the def-use chain to include only
+ // two-operand instructions, where one of the operands is an invariant.
+ // createAddRecFromPHIWithCasts() currently does not support anything more
+ // involved than that, so we keep the search simple. This can be
+ // extended/generalized as needed.
+
+ auto getDef = [&](const Value *Val) -> Value * {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
+ if (!BinOp)
+ return nullptr;
+ Value *Op0 = BinOp->getOperand(0);
+ Value *Op1 = BinOp->getOperand(1);
+ Value *Def = nullptr;
+ if (L->isLoopInvariant(Op0))
+ Def = Op1;
+ else if (L->isLoopInvariant(Op1))
+ Def = Op0;
+ return Def;
+ };
+
+ // Look for the instruction that defines the induction via the
+ // loop backedge.
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+ Value *Val = PN->getIncomingValueForBlock(Latch);
+ if (!Val)
+ return false;
+
+ // Follow the def-use chain until the induction phi is reached.
+ // If on the way we encounter a Value that has the same SCEV Expr as the
+ // phi node, we can consider the instructions we visit from that point
+ // as part of the cast-sequence that can be ignored.
+ bool InCastSequence = false;
+ auto *Inst = dyn_cast<Instruction>(Val);
+ while (Val != PN) {
+ // If we encountered a phi node other than PN, or if we left the loop,
+ // we bail out.
+ if (!Inst || !L->contains(Inst)) {
+ return false;
+ }
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
+ if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
+ InCastSequence = true;
+ if (InCastSequence) {
+ // Only the last instruction in the cast sequence is expected to have
+ // uses outside the induction def-use chain.
+ if (!CastInsts.empty())
+ if (!Inst->hasOneUse())
+ return false;
+ CastInsts.push_back(Inst);
+ }
+ Val = getDef(Val);
+ if (!Val)
+ return false;
+ Inst = dyn_cast<Instruction>(Val);
+ }
+
+ return InCastSequence;
+}
+
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
PredicatedScalarEvolution &PSE,
InductionDescriptor &D,
@@ -869,13 +981,26 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
return false;
}
+ // Record any Cast instructions that participate in the induction update
+ const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+ // If we started from an UnknownSCEV, and managed to build an addRecurrence
+ // only after enabling Assume with PSCEV, this means we may have encountered
+ // cast instructions that required adding a runtime check in order to
+ // guarantee the correctness of the AddRecurence respresentation of the
+ // induction.
+ if (PhiScev != AR && SymbolicPhi) {
+ SmallVector<Instruction *, 2> Casts;
+ if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
+ }
+
return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
}
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
- ScalarEvolution *SE,
- InductionDescriptor &D,
- const SCEV *Expr) {
+bool InductionDescriptor::isInductionPHI(
+ PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+ InductionDescriptor &D, const SCEV *Expr,
+ SmallVectorImpl<Instruction *> *CastsToIgnore) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -894,7 +1019,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
// FIXME: We should treat this as a uniform. Unfortunately, we
// don't currently know how to handled uniform PHIs.
DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
- return false;
+ return false;
}
Value *StartValue =
@@ -907,7 +1032,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
return false;
if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, Step);
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
+ CastsToIgnore);
return true;
}
@@ -1115,6 +1241,149 @@ Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
return None;
}
+/// Does a BFS from a given node to all of its children inside a given loop.
+/// The returned vector of nodes includes the starting point.
+SmallVector<DomTreeNode *, 16>
+llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
+ SmallVector<DomTreeNode *, 16> Worklist;
+ auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
+ // Only include subregions in the top level loop.
+ BasicBlock *BB = DTN->getBlock();
+ if (CurLoop->contains(BB))
+ Worklist.push_back(DTN);
+ };
+
+ AddRegionToWorklist(N);
+
+ for (size_t I = 0; I < Worklist.size(); I++)
+ for (DomTreeNode *Child : Worklist[I]->getChildren())
+ AddRegionToWorklist(Child);
+
+ return Worklist;
+}
+
+void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
+ ScalarEvolution *SE = nullptr,
+ LoopInfo *LI = nullptr) {
+ assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
+ auto *Preheader = L->getLoopPreheader();
+ assert(Preheader && "Preheader should exist!");
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ //
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues.
+
+ // Tell ScalarEvolution that the loop is deleted. Do this before
+ // deleting the loop so that ScalarEvolution can look at the loop
+ // to determine what it needs to clean up.
+ if (SE)
+ SE->forgetLoop(L);
+
+ auto *ExitBlock = L->getUniqueExitBlock();
+ assert(ExitBlock && "Should have a unique exit block!");
+ assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
+ auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
+ assert(OldBr && "Preheader must end with a branch");
+ assert(OldBr->isUnconditional() && "Preheader must have a single successor");
+ // Connect the preheader to the exit block. Keep the old edge to the header
+ // around to perform the dominator tree update in two separate steps
+ // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
+ // preheader -> header.
+ //
+ //
+ // 0. Preheader 1. Preheader 2. Preheader
+ // | | | |
+ // V | V |
+ // Header <--\ | Header <--\ | Header <--\
+ // | | | | | | | | | | |
+ // | V | | | V | | | V |
+ // | Body --/ | | Body --/ | | Body --/
+ // V V V V V
+ // Exit Exit Exit
+ //
+ // By doing this is two separate steps we can perform the dominator tree
+ // update without using the batch update API.
+ //
+ // Even when the loop is never executed, we cannot remove the edge from the
+ // source block to the exit block. Consider the case where the unexecuted loop
+ // branches back to an outer loop. If we deleted the loop and removed the edge
+ // coming to this inner loop, this will break the outer loop structure (by
+ // deleting the backedge of the outer loop). If the outer loop is indeed a
+ // non-loop, it will be deleted in a future iteration of loop deletion pass.
+ IRBuilder<> Builder(OldBr);
+ Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
+ // Remove the old branch. The conditional branch becomes a new terminator.
+ OldBr->eraseFromParent();
+
+ // Rewrite phis in the exit block to get their inputs from the Preheader
+ // instead of the exiting block.
+ BasicBlock::iterator BI = ExitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ // Set the zero'th element of Phi to be from the preheader and remove all
+ // other incoming values. Given the loop has dedicated exits, all other
+ // incoming values must be from the exiting blocks.
+ int PredIndex = 0;
+ P->setIncomingBlock(PredIndex, Preheader);
+ // Removes all incoming values from all other exiting blocks (including
+ // duplicate values from an exiting block).
+ // Nuke all entries except the zero'th entry which is the preheader entry.
+ // NOTE! We need to remove Incoming Values in the reverse order as done
+ // below, to keep the indices valid for deletion (removeIncomingValues
+ // updates getNumIncomingValues and shifts all values down into the operand
+ // being deleted).
+ for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i)
+ P->removeIncomingValue(e - i, false);
+
+ assert((P->getNumIncomingValues() == 1 &&
+ P->getIncomingBlock(PredIndex) == Preheader) &&
+ "Should have exactly one value and that's from the preheader!");
+ ++BI;
+ }
+
+ // Disconnect the loop body by branching directly to its exit.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ Builder.CreateBr(ExitBlock);
+ // Remove the old branch.
+ Preheader->getTerminator()->eraseFromParent();
+
+ if (DT) {
+ // Update the dominator tree by informing it about the new edge from the
+ // preheader to the exit.
+ DT->insertEdge(Preheader, ExitBlock);
+ // Inform the dominator tree about the removed edge.
+ DT->deleteEdge(Preheader, L->getHeader());
+ }
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ for (auto *Block : L->blocks())
+ Block->dropAllReferences();
+
+ if (LI) {
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove
+ // its entry from the loop's block list. We do that in the next section.
+ for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
+ LpI != LpE; ++LpI)
+ (*LpI)->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+
+ SmallPtrSet<BasicBlock *, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (BasicBlock *BB : blocks)
+ LI->removeBlock(BB);
+
+ // The last step is to update LoopInfo now that we've eliminated this loop.
+ LI->erase(L);
+ }
+}
+
/// Returns true if the instruction in a loop is guaranteed to execute at least
/// once.
bool llvm::isGuaranteedToExecute(const Instruction &Inst,
@@ -1194,7 +1463,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
static Value *addFastMathFlag(Value *V) {
if (isa<FPMathOperator>(V)) {
FastMathFlags Flags;
- Flags.setUnsafeAlgebra();
+ Flags.setFast();
cast<Instruction>(V)->setFastMathFlags(Flags);
}
return V;
@@ -1256,8 +1525,8 @@ Value *llvm::createSimpleTargetReduction(
using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
// TODO: Support creating ordered reductions.
- FastMathFlags FMFUnsafe;
- FMFUnsafe.setUnsafeAlgebra();
+ FastMathFlags FMFFast;
+ FMFFast.setFast();
switch (Opcode) {
case Instruction::Add:
@@ -1278,14 +1547,14 @@ Value *llvm::createSimpleTargetReduction(
case Instruction::FAdd:
BuildFunc = [&]() {
auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+ cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
return Rdx;
};
break;
case Instruction::FMul:
BuildFunc = [&]() {
auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+ cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
return Rdx;
};
break;
@@ -1321,55 +1590,39 @@ Value *llvm::createSimpleTargetReduction(
}
/// Create a vector reduction using a given recurrence descriptor.
-Value *llvm::createTargetReduction(IRBuilder<> &Builder,
+Value *llvm::createTargetReduction(IRBuilder<> &B,
const TargetTransformInfo *TTI,
RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN) {
// TODO: Support in-order reductions based on the recurrence descriptor.
- RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind();
+ using RD = RecurrenceDescriptor;
+ RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
TargetTransformInfo::ReductionFlags Flags;
Flags.NoNaN = NoNaN;
- auto getSimpleRdx = [&](unsigned Opc) {
- return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags);
- };
switch (RecKind) {
- case RecurrenceDescriptor::RK_FloatAdd:
- return getSimpleRdx(Instruction::FAdd);
- case RecurrenceDescriptor::RK_FloatMult:
- return getSimpleRdx(Instruction::FMul);
- case RecurrenceDescriptor::RK_IntegerAdd:
- return getSimpleRdx(Instruction::Add);
- case RecurrenceDescriptor::RK_IntegerMult:
- return getSimpleRdx(Instruction::Mul);
- case RecurrenceDescriptor::RK_IntegerAnd:
- return getSimpleRdx(Instruction::And);
- case RecurrenceDescriptor::RK_IntegerOr:
- return getSimpleRdx(Instruction::Or);
- case RecurrenceDescriptor::RK_IntegerXor:
- return getSimpleRdx(Instruction::Xor);
- case RecurrenceDescriptor::RK_IntegerMinMax: {
- switch (Desc.getMinMaxRecurrenceKind()) {
- case RecurrenceDescriptor::MRK_SIntMax:
- Flags.IsSigned = true;
- Flags.IsMaxOp = true;
- break;
- case RecurrenceDescriptor::MRK_UIntMax:
- Flags.IsMaxOp = true;
- break;
- case RecurrenceDescriptor::MRK_SIntMin:
- Flags.IsSigned = true;
- break;
- case RecurrenceDescriptor::MRK_UIntMin:
- break;
- default:
- llvm_unreachable("Unhandled MRK");
- }
- return getSimpleRdx(Instruction::ICmp);
+ case RD::RK_FloatAdd:
+ return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
+ case RD::RK_FloatMult:
+ return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
+ case RD::RK_IntegerAdd:
+ return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
+ case RD::RK_IntegerMult:
+ return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
+ case RD::RK_IntegerAnd:
+ return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
+ case RD::RK_IntegerOr:
+ return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
+ case RD::RK_IntegerXor:
+ return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
+ case RD::RK_IntegerMinMax: {
+ RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
+ Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
+ Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
+ return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
}
- case RecurrenceDescriptor::RK_FloatMinMax: {
- Flags.IsMaxOp =
- Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax;
- return getSimpleRdx(Instruction::FCmp);
+ case RD::RK_FloatMinMax: {
+ Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
+ return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
}
default:
llvm_unreachable("Unhandled RecKind");
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 900450b40061..57dc225e9dab 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -168,13 +168,14 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
assert(ILengthType &&
"expected size argument to memcpy to be an integer type!");
+ Type *Int8Type = Type::getInt8Ty(Ctx);
+ bool LoopOpIsInt8 = LoopOpType == Int8Type;
ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
- Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
- Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
- Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
-
+ Value *RuntimeLoopCount = LoopOpIsInt8 ?
+ CopyLen :
+ PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
BasicBlock *LoopBB =
- BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr);
+ BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
IRBuilder<> LoopBuilder(LoopBB);
PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
@@ -189,11 +190,15 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
LoopIndex->addIncoming(NewIndex, LoopBB);
- Type *Int8Type = Type::getInt8Ty(Ctx);
- if (LoopOpType != Int8Type) {
+ if (!LoopOpIsInt8) {
+ // Add in the
+ Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+ Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+
// Loop body for the residual copy.
BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
- PreLoopBB->getParent(), nullptr);
+ PreLoopBB->getParent(),
+ PostLoopBB);
// Residual loop header.
BasicBlock *ResHeaderBB = BasicBlock::Create(
Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
@@ -258,61 +263,6 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
}
}
-void llvm::createMemCpyLoop(Instruction *InsertBefore,
- Value *SrcAddr, Value *DstAddr, Value *CopyLen,
- unsigned SrcAlign, unsigned DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile) {
- Type *TypeOfCopyLen = CopyLen->getType();
-
- BasicBlock *OrigBB = InsertBefore->getParent();
- Function *F = OrigBB->getParent();
- BasicBlock *NewBB =
- InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split");
- BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
- F, NewBB);
-
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // SrcAddr and DstAddr are expected to be pointer types,
- // so no check is made here.
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
- // Cast pointers to (char *)
- SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
- DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
-
- Builder.CreateCondBr(
- Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
- LoopBB);
- OrigBB->getTerminator()->eraseFromParent();
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
-
- // load from SrcAddr+LoopIndex
- // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
- // word-sized loads and stores.
- Value *Element =
- LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
- LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
- SrcIsVolatile);
- // store at DstAddr+LoopIndex
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
- DstAddr, LoopIndex),
- DstIsVolatile);
-
- // The value for LoopIndex coming from backedge is (LoopIndex + 1)
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
// Lower memmove to IR. memmove is required to correctly copy overlapping memory
// regions; therefore, it has to check the relative positions of the source and
// destination pointers and choose the copy direction accordingly.
@@ -454,38 +404,26 @@ static void createMemSetLoop(Instruction *InsertBefore,
void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
const TargetTransformInfo &TTI) {
- // Original implementation
- if (!TTI.useWideIRMemcpyLoopLowering()) {
- createMemCpyLoop(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+ createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransformInfo */ TTI);
} else {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
- createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+ createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
/* SrcAddr */ Memcpy->getRawSource(),
/* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ CI,
+ /* CopyLen */ Memcpy->getLength(),
/* SrcAlign */ Memcpy->getAlignment(),
/* DestAlign */ Memcpy->getAlignment(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransformInfo */ TTI);
- } else {
- createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransfomrInfo */ TTI);
- }
+ /* TargetTransfomrInfo */ TTI);
}
}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 890afbc46e63..344cb35df986 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -13,46 +13,65 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "lower-switch"
namespace {
+
struct IntRange {
int64_t Low, High;
};
- // Return true iff R is covered by Ranges.
- static bool IsInRanges(const IntRange &R,
- const std::vector<IntRange> &Ranges) {
- // Note: Ranges must be sorted, non-overlapping and non-adjacent.
-
- // Find the first range whose High field is >= R.High,
- // then check if the Low field is <= R.Low. If so, we
- // have a Range that covers R.
- auto I = std::lower_bound(
- Ranges.begin(), Ranges.end(), R,
- [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
- return I != Ranges.end() && I->Low <= R.Low;
- }
+
+} // end anonymous namespace
+
+// Return true iff R is covered by Ranges.
+static bool IsInRanges(const IntRange &R,
+ const std::vector<IntRange> &Ranges) {
+ // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+ // Find the first range whose High field is >= R.High,
+ // then check if the Low field is <= R.Low. If so, we
+ // have a Range that covers R.
+ auto I = std::lower_bound(
+ Ranges.begin(), Ranges.end(), R,
+ [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+ return I != Ranges.end() && I->Low <= R.Low;
+}
+
+namespace {
/// Replace all SwitchInst instructions with chained branch instructions.
class LowerSwitch : public FunctionPass {
public:
- static char ID; // Pass identification, replacement for typeid
+ // Pass identification, replacement for typeid
+ static char ID;
+
LowerSwitch() : FunctionPass(ID) {
initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
}
@@ -68,8 +87,9 @@ namespace {
: Low(low), High(high), BB(bb) {}
};
- typedef std::vector<CaseRange> CaseVector;
- typedef std::vector<CaseRange>::iterator CaseItr;
+ using CaseVector = std::vector<CaseRange>;
+ using CaseItr = std::vector<CaseRange>::iterator;
+
private:
void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
@@ -86,22 +106,24 @@ namespace {
/// The comparison function for sorting the switch case values in the vector.
/// WARNING: Case ranges should be disjoint!
struct CaseCmp {
- bool operator () (const LowerSwitch::CaseRange& C1,
- const LowerSwitch::CaseRange& C2) {
-
+ bool operator()(const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
return CI1->getValue().slt(CI2->getValue());
}
};
-}
+
+} // end anonymous namespace
char LowerSwitch::ID = 0;
-INITIALIZE_PASS(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
// Publicly exposed interface to pass...
char &llvm::LowerSwitchID = LowerSwitch::ID;
+
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
return new LowerSwitch();
@@ -136,6 +158,7 @@ bool LowerSwitch::runOnFunction(Function &F) {
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C)
LLVM_ATTRIBUTE_USED;
+
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
@@ -186,7 +209,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
// Remove incoming values in the reverse order to prevent invalidating
// *successive* index.
- for (unsigned III : reverse(Indices))
+ for (unsigned III : llvm::reverse(Indices))
PN->removeIncomingValue(III);
}
}
@@ -294,8 +317,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
/// value, so the jump to the "default" branch is warranted.
BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
BasicBlock* OrigBlock,
- BasicBlock* Default)
-{
+ BasicBlock* Default) {
Function* F = OrigBlock->getParent();
BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
@@ -442,7 +464,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
unsigned MaxPop = 0;
BasicBlock *PopSucc = nullptr;
- IntRange R = { INT64_MIN, INT64_MAX };
+ IntRange R = {std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max()};
UnreachableRanges.push_back(R);
for (const auto &I : Cases) {
int64_t Low = I.Low->getSExtValue();
@@ -457,8 +480,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
assert(Low > LastRange.Low);
LastRange.High = Low - 1;
}
- if (High != INT64_MAX) {
- IntRange R = { High + 1, INT64_MAX };
+ if (High != std::numeric_limits<int64_t>::max()) {
+ IntRange R = { High + 1, std::numeric_limits<int64_t>::max() };
UnreachableRanges.push_back(R);
}
@@ -487,8 +510,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
assert(MaxPop > 0 && PopSucc);
Default = PopSucc;
Cases.erase(
- remove_if(Cases,
- [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
+ llvm::remove_if(
+ Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
Cases.end());
// If there are no cases left, just branch.
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index b659a2e4463f..29f289b62da0 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -15,12 +15,17 @@
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mem2reg"
@@ -33,7 +38,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
bool Changed = false;
- while (1) {
+ while (true) {
Allocas.clear();
// Find allocas that are safe to promote, by looking at all instructions in
@@ -65,15 +70,17 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
}
namespace {
+
struct PromoteLegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
+ // Pass identification, replacement for typeid
+ static char ID;
+
PromoteLegacyPass() : FunctionPass(ID) {
initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
}
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
- //
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
@@ -89,10 +96,12 @@ struct PromoteLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
- };
-} // end of anonymous namespace
+};
+
+} // end anonymous namespace
char PromoteLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to "
"Register",
false, false)
@@ -102,7 +111,6 @@ INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
false, false)
// createPromoteMemoryToRegister - Provide an entry point to create this pass.
-//
FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
return new PromoteLegacyPass();
}
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 9f2ad540c83d..0f7bd76c03ca 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -15,16 +15,30 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
+
using namespace llvm;
+static const char *const metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+};
+
namespace {
// This PRNG is from the ISO C spec. It is intentionally simple and
@@ -43,12 +57,6 @@ namespace {
}
};
- static const char *const metaNames[] = {
- // See http://en.wikipedia.org/wiki/Metasyntactic_variable
- "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
- "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
- };
-
struct Renamer {
Renamer(unsigned int seed) {
prng.srand(seed);
@@ -62,7 +70,9 @@ namespace {
};
struct MetaRenamer : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
+ // Pass identification, replacement for typeid
+ static char ID;
+
MetaRenamer() : ModulePass(ID) {
initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
}
@@ -123,7 +133,11 @@ namespace {
TLI.getLibFunc(F, Tmp))
continue;
- F.setName(renamer.newName());
+ // Leave @main alone. The output of -metarenamer might be passed to
+ // lli for execution and the latter needs a main entry point.
+ if (Name != "main")
+ F.setName(renamer.newName());
+
runOnFunction(F);
}
return true;
@@ -144,14 +158,17 @@ namespace {
return true;
}
};
-}
+
+} // end anonymous namespace
char MetaRenamer::ID = 0;
+
INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
"Assign new names to everything", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
"Assign new names to everything", false, false)
+
//===----------------------------------------------------------------------===//
//
// MetaRenamer - Rename everything with metasyntactic names.
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 2ef3d6336ae2..ba4b7f3cc263 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -243,7 +243,7 @@ std::string llvm::getUniqueModuleId(Module *M) {
bool ExportsSymbols = false;
auto AddGlobal = [&](GlobalValue &GV) {
if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
- !GV.hasExternalLinkage())
+ !GV.hasExternalLinkage() || GV.hasComdat())
return;
ExportsSymbols = true;
Md5.update(GV.getName());
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index d4cdaede6b86..d47be6ea566b 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -49,9 +49,10 @@ INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
static cl::opt<bool> VerifyPredicateInfo(
"verify-predicateinfo", cl::init(false), cl::Hidden,
cl::desc("Verify PredicateInfo in legacy printer pass."));
-namespace {
DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
- "Controls which variables are renamed with predicateinfo")
+ "Controls which variables are renamed with predicateinfo");
+
+namespace {
// Given a predicate info that is a type of branching terminator, get the
// branching block.
const BasicBlock *getBranchBlock(const PredicateBase *PB) {
@@ -610,7 +611,12 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
}
convertUsesToDFSOrdered(Op, OrderedUses);
- std::sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+ // Here we require a stable sort because we do not bother to try to
+ // assign an order to the operands the uses represent. Thus, two
+ // uses in the same instruction do not have a strict sort order
+ // currently and will be considered equal. We could get rid of the
+ // stable sort by creating one if we wanted.
+ std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare);
SmallVector<ValueDFS, 8> RenameStack;
// For each use, sorted into dfs order, push values and replaces uses with
// top of stack, which will represent the reaching def.
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index cdba982e6641..fcd3bd08482a 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -21,25 +21,38 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mem2reg"
@@ -103,7 +116,7 @@ struct AllocaInfo {
bool OnlyUsedInOneBlock;
Value *AllocaPointerVal;
- DbgDeclareInst *DbgDeclare;
+ TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares;
void clear() {
DefiningBlocks.clear();
@@ -112,7 +125,7 @@ struct AllocaInfo {
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
AllocaPointerVal = nullptr;
- DbgDeclare = nullptr;
+ DbgDeclares.clear();
}
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -147,27 +160,21 @@ struct AllocaInfo {
}
}
- DbgDeclare = FindAllocaDbgDeclare(AI);
+ DbgDeclares = FindDbgAddrUses(AI);
}
};
// Data package used by RenamePass()
class RenamePassData {
public:
- typedef std::vector<Value *> ValVector;
+ using ValVector = std::vector<Value *>;
+
+ RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V)
+ : BB(B), Pred(P), Values(std::move(V)) {}
- RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
- RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
- : BB(B), Pred(P), Values(V) {}
BasicBlock *BB;
BasicBlock *Pred;
ValVector Values;
-
- void swap(RenamePassData &RHS) {
- std::swap(BB, RHS.BB);
- std::swap(Pred, RHS.Pred);
- Values.swap(RHS.Values);
- }
};
/// \brief This assigns and keeps a per-bb relative ordering of load/store
@@ -223,12 +230,15 @@ public:
struct PromoteMem2Reg {
/// The alloca instructions being promoted.
std::vector<AllocaInst *> Allocas;
+
DominatorTree &DT;
DIBuilder DIB;
+
/// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
AssumptionCache *AC;
const SimplifyQuery SQ;
+
/// Reverse mapping of Allocas.
DenseMap<AllocaInst *, unsigned> AllocaLookup;
@@ -252,10 +262,9 @@ struct PromoteMem2Reg {
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
- SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+ SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares;
/// The set of basic blocks the renamer has already visited.
- ///
SmallPtrSet<BasicBlock *, 16> Visited;
/// Contains a stable numbering of basic blocks to avoid non-determinstic
@@ -298,7 +307,7 @@ private:
bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
};
-} // end of anonymous namespace
+} // end anonymous namespace
/// Given a LoadInst LI this adds assume(LI != null) after it.
static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
@@ -345,8 +354,8 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
/// and thus must be phi-ed with undef. We fall back to the standard alloca
/// promotion algorithm in that case.
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI, DominatorTree &DT,
- AssumptionCache *AC) {
+ LargeBlockInfo &LBI, const DataLayout &DL,
+ DominatorTree &DT, AssumptionCache *AC) {
StoreInst *OnlyStore = Info.OnlyStore;
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
BasicBlock *StoreBB = OnlyStore->getParent();
@@ -380,7 +389,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
Info.UsingBlocks.push_back(StoreBB);
continue;
}
-
} else if (LI->getParent() != StoreBB &&
!DT.dominates(StoreBB, LI->getParent())) {
// If the load and store are in different blocks, use BB dominance to
@@ -402,7 +410,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// that information when we erase this Load. So we preserve
// it with an assume.
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
addAssumeNonNull(AC, LI);
LI->replaceAllUsesWith(ReplVal);
@@ -416,11 +424,11 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
- DDI->eraseFromParent();
- LBI.deleteValue(DDI);
+ ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
+ DII->eraseFromParent();
+ LBI.deleteValue(DII);
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
@@ -449,6 +457,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
/// }
static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LargeBlockInfo &LBI,
+ const DataLayout &DL,
DominatorTree &DT,
AssumptionCache *AC) {
// The trickiest case to handle is when we have large blocks. Because of this,
@@ -457,7 +466,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// make it efficient to get the index of various operations in the block.
// Walk the use-def list of the alloca, getting the locations of all stores.
- typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+ using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
StoresByIndexTy StoresByIndex;
for (User *U : AI->users())
@@ -497,7 +506,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// information when we erase it. So we preserve it with an assume.
Value *ReplVal = std::prev(I)->second->getOperand(0);
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
addAssumeNonNull(AC, LI);
LI->replaceAllUsesWith(ReplVal);
@@ -511,9 +520,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
}
SI->eraseFromParent();
LBI.deleteValue(SI);
@@ -523,9 +532,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DDI->eraseFromParent();
- LBI.deleteValue(DDI);
+ for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ DII->eraseFromParent();
+ LBI.deleteValue(DII);
}
++NumLocalPromoted;
@@ -567,7 +576,7 @@ void PromoteMem2Reg::run() {
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
++NumSingleStore;
@@ -578,7 +587,7 @@ void PromoteMem2Reg::run() {
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock &&
- promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) {
+ promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
@@ -593,8 +602,8 @@ void PromoteMem2Reg::run() {
}
// Remember the dbg.declare intrinsic describing this alloca, if any.
- if (Info.DbgDeclare)
- AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+ if (!Info.DbgDeclares.empty())
+ AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares;
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -604,7 +613,6 @@ void PromoteMem2Reg::run() {
// nodes and see if we can optimize out some work by avoiding insertion of
// dead phi nodes.
-
// Unique the set of defining blocks for efficient lookup.
SmallPtrSet<BasicBlock *, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
@@ -629,8 +637,8 @@ void PromoteMem2Reg::run() {
});
unsigned CurrentVersion = 0;
- for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
- QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
+ for (BasicBlock *BB : PHIBlocks)
+ QueuePhiNode(BB, AllocaNum, CurrentVersion);
}
if (Allocas.empty())
@@ -641,19 +649,16 @@ void PromoteMem2Reg::run() {
// Set the incoming values for the basic block to be null values for all of
// the alloca's. We do this in case there is a load of a value that has not
// been stored yet. In this case, it will get this null value.
- //
RenamePassData::ValVector Values(Allocas.size());
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
// Walks all basic blocks in the function performing the SSA rename algorithm
// and inserting the phi nodes we marked as necessary
- //
std::vector<RenamePassData> RenamePassWorkList;
RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
do {
- RenamePassData RPD;
- RPD.swap(RenamePassWorkList.back());
+ RenamePassData RPD = std::move(RenamePassWorkList.back());
RenamePassWorkList.pop_back();
// RenamePass may add new worklist entries.
RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
@@ -663,9 +668,7 @@ void PromoteMem2Reg::run() {
Visited.clear();
// Remove the allocas themselves from the function.
- for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
- Instruction *A = Allocas[i];
-
+ for (Instruction *A : Allocas) {
// If there are any uses of the alloca instructions left, they must be in
// unreachable basic blocks that were not processed by walking the dominator
// tree. Just delete the users now.
@@ -675,9 +678,9 @@ void PromoteMem2Reg::run() {
}
// Remove alloca's dbg.declare instrinsics from the function.
- for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
- if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
- DDI->eraseFromParent();
+ for (auto &Declares : AllocaDbgDeclares)
+ for (auto *DII : Declares)
+ DII->eraseFromParent();
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
@@ -714,7 +717,6 @@ void PromoteMem2Reg::run() {
// hasn't traversed. If this is the case, the PHI nodes may not
// have incoming values for all predecessors. Loop over all PHI nodes we have
// created, inserting undef values if they are missing any incoming values.
- //
for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
I = NewPhiNodes.begin(),
E = NewPhiNodes.end();
@@ -762,8 +764,8 @@ void PromoteMem2Reg::run() {
while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
SomePHI->getNumIncomingValues() == NumBadPreds) {
Value *UndefVal = UndefValue::get(SomePHI->getType());
- for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
- SomePHI->addIncoming(UndefVal, Preds[pred]);
+ for (BasicBlock *Pred : Preds)
+ SomePHI->addIncoming(UndefVal, Pred);
}
}
@@ -779,7 +781,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
AllocaInst *AI, AllocaInfo &Info,
const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
-
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// check their predecessors. Start with all the using blocks.
@@ -834,9 +835,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
// Since the value is live into BB, it is either defined in a predecessor or
// live into it to. Add the preds to the worklist unless they are a
// defining block.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
-
+ for (BasicBlock *P : predecessors(BB)) {
// The value is not live into a predecessor if it defines the value.
if (DefBlocks.count(P))
continue;
@@ -906,8 +905,8 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
- if (DbgDeclareInst *DDI = AllocaDbgDeclares[AllocaNo])
- ConvertDebugDeclareToDebugValue(DDI, APN, DIB);
+ for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
+ ConvertDebugDeclareToDebugValue(DII, APN, DIB);
// Get the next phi node.
++PNI;
@@ -943,7 +942,7 @@ NextIteration:
// that information when we erase this Load. So we preserve
// it with an assume.
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !llvm::isKnownNonNullAt(V, LI, &DT))
+ !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
addAssumeNonNull(AC, LI);
// Anything using the load now uses the current value.
@@ -963,8 +962,8 @@ NextIteration:
// what value were we writing?
IncomingVals[ai->second] = SI->getOperand(0);
// Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
BB->getInstList().erase(SI);
}
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 6ccf54e49dd3..e4b20b0faa15 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/BasicBlock.h"
@@ -39,12 +38,13 @@ using namespace llvm;
#define DEBUG_TYPE "ssaupdater"
-typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+using AvailableValsTy = DenseMap<BasicBlock *, Value *>;
+
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
-SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI)
: InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
@@ -72,7 +72,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
}
static bool IsEquivalentPHI(PHINode *PHI,
- SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
+ SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) {
unsigned PHINumValues = PHI->getNumIncomingValues();
if (PHINumValues != ValueMapping.size())
return false;
@@ -100,7 +100,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we have the hard case. Get the live-in values for each
// predecessor.
- SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+ SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues;
Value *SingularValue = nullptr;
// We can get our predecessor info by walking the pred_iterator list, but it
@@ -145,8 +145,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we do need a PHI: check to see if we already have one available
// in this block that produces the right value.
if (isa<PHINode>(BB->begin())) {
- SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
- PredValues.end());
+ SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
PHINode *SomePHI;
for (BasicBlock::iterator It = BB->begin();
(SomePHI = dyn_cast<PHINode>(It)); ++It) {
@@ -218,11 +218,11 @@ namespace llvm {
template<>
class SSAUpdaterTraits<SSAUpdater> {
public:
- typedef BasicBlock BlkT;
- typedef Value *ValT;
- typedef PHINode PhiT;
+ using BlkT = BasicBlock;
+ using ValT = Value *;
+ using PhiT = PHINode;
+ using BlkSucc_iterator = succ_iterator;
- typedef succ_iterator BlkSucc_iterator;
static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
@@ -253,7 +253,7 @@ public:
/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
/// vector, set Info->NumPreds, and allocate space in Info->Preds.
static void FindPredecessorBlocks(BasicBlock *BB,
- SmallVectorImpl<BasicBlock*> *Preds) {
+ SmallVectorImpl<BasicBlock *> *Preds) {
// We can get our predecessor info by walking the pred_iterator list,
// but it is relatively slow. If we already have PHI nodes in this
// block, walk one of them to get the predecessor list instead.
@@ -293,7 +293,6 @@ public:
}
/// ValueIsPHI - Check if a value is a PHI.
- ///
static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
return dyn_cast<PHINode>(Val);
}
@@ -333,7 +332,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
//===----------------------------------------------------------------------===//
LoadAndStorePromoter::
-LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
+LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
SSAUpdater &S, StringRef BaseName) : SSA(S) {
if (Insts.empty()) return;
@@ -349,11 +348,11 @@ LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
}
void LoadAndStorePromoter::
-run(const SmallVectorImpl<Instruction*> &Insts) const {
+run(const SmallVectorImpl<Instruction *> &Insts) const {
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
- DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock;
+ DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock;
for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
@@ -361,12 +360,12 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Okay, now we can iterate over all the blocks in the function with uses,
// processing them. Keep track of which loads are loading a live-in value.
// Walk the uses in the use-list order to be determinstic.
- SmallVector<LoadInst*, 32> LiveInLoads;
- DenseMap<Value*, Value*> ReplacedLoads;
+ SmallVector<LoadInst *, 32> LiveInLoads;
+ DenseMap<Value *, Value *> ReplacedLoads;
for (Instruction *User : Insts) {
BasicBlock *BB = User->getParent();
- TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
+ TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
// If this block has already been processed, ignore this repeat use.
if (BlockUses.empty()) continue;
@@ -489,7 +488,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
bool
LoadAndStorePromoter::isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &Insts)
+ const SmallVectorImpl<Instruction *> &Insts)
const {
return is_contained(Insts, I);
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 8784b9702141..f02f80cc1b78 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -22,12 +22,14 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
@@ -35,8 +37,8 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
@@ -53,6 +55,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
@@ -73,6 +76,7 @@
#include <iterator>
#include <map>
#include <set>
+#include <tuple>
#include <utility>
#include <vector>
@@ -141,12 +145,13 @@ namespace {
// The first field contains the value that the switch produces when a certain
// case group is selected, and the second field is a vector containing the
// cases composing the case group.
-typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
- SwitchCaseResultVectorTy;
+using SwitchCaseResultVectorTy =
+ SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
+
// The first field contains the phi node that generates a result of the switch
// and the second field contains the value generated for a certain case in the
// switch for that PHI.
-typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
+using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
/// ValueEqualityComparisonCase - Represents a case of a switch.
struct ValueEqualityComparisonCase {
@@ -167,11 +172,9 @@ struct ValueEqualityComparisonCase {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
const DataLayout &DL;
- unsigned BonusInstThreshold;
- AssumptionCache *AC;
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
- // See comments in SimplifyCFGOpt::SimplifySwitch.
- bool LateSimplifyCFG;
+ const SimplifyCFGOptions &Options;
+
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(
TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
@@ -194,11 +197,9 @@ class SimplifyCFGOpt {
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
- unsigned BonusInstThreshold, AssumptionCache *AC,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
- bool LateSimplifyCFG)
- : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
- LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {}
+ const SimplifyCFGOptions &Opts)
+ : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
bool run(BasicBlock *BB);
};
@@ -438,18 +439,24 @@ namespace {
/// fail.
struct ConstantComparesGatherer {
const DataLayout &DL;
- Value *CompValue; /// Value found for the switch comparison
- Value *Extra; /// Extra clause to be checked before the switch
- SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
- unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
+
+ /// Value found for the switch comparison
+ Value *CompValue = nullptr;
+
+ /// Extra clause to be checked before the switch
+ Value *Extra = nullptr;
+
+ /// Set of integers to match in switch
+ SmallVector<ConstantInt *, 8> Vals;
+
+ /// Number of comparisons matched in the and/or chain
+ unsigned UsedICmps = 0;
/// Construct and compute the result for the comparison instruction Cond
- ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
- : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+ ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
gather(Cond);
}
- /// Prevent copy
ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
ConstantComparesGatherer &
operator=(const ConstantComparesGatherer &) = delete;
@@ -487,7 +494,6 @@ private:
// (x & ~2^z) == y --> x == y || x == y|2^z
// This undoes a transformation done by instcombine to fuse 2 compares.
if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
-
// It's a little bit hard to see why the following transformations are
// correct. Here is a CVC3 program to verify them for 64-bit values:
@@ -770,6 +776,31 @@ static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
return false;
}
+// Set branch weights on SwitchInst. This sets the metadata if there is at
+// least one non-zero weight.
+static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
+ // Check that there is at least one non-zero weight. Otherwise, pass
+ // nullptr to setMetadata which will erase the existing metadata.
+ MDNode *N = nullptr;
+ if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
+ N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
+ SI->setMetadata(LLVMContext::MD_prof, N);
+}
+
+// Similar to the above, but for branch and select instructions that take
+// exactly 2 weights.
+static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
+ uint32_t FalseWeight) {
+ assert(isa<BranchInst>(I) || isa<SelectInst>(I));
+ // Check that there is at least one non-zero weight. Otherwise, pass
+ // nullptr to setMetadata which will erase the existing metadata.
+ MDNode *N = nullptr;
+ if (TrueWeight || FalseWeight)
+ N = MDBuilder(I->getParent()->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight);
+ I->setMetadata(LLVMContext::MD_prof, N);
+}
+
/// If TI is known to be a terminator instruction and its block is known to
/// only have a single predecessor block, check to see if that predecessor is
/// also a value comparison with the same value, and if that comparison
@@ -859,9 +890,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
}
}
if (HasWeight && Weights.size() >= 2)
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getParent()->getContext())
- .createBranchWeights(Weights));
+ setBranchWeights(SI, Weights);
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
@@ -1166,9 +1195,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- NewSI->setMetadata(
- LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).createBranchWeights(MDWeights));
+ setBranchWeights(NewSI, MDWeights);
}
EraseTerminatorInstAndDCECond(PTI);
@@ -1279,9 +1306,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
// I1 and I2 are being combined into a single instruction. Its debug
// location is the merged locations of the original instructions.
- if (!isa<CallInst>(I1))
- I1->setDebugLoc(
- DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()));
+ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
I2->eraseFromParent();
Changed = true;
@@ -1535,20 +1560,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
I0->getOperandUse(O).set(NewOperands[O]);
I0->moveBefore(&*BBEnd->getFirstInsertionPt());
- // The debug location for the "common" instruction is the merged locations of
- // all the commoned instructions. We start with the original location of the
- // "common" instruction and iteratively merge each location in the loop below.
- const DILocation *Loc = I0->getDebugLoc();
-
// Update metadata and IR flags, and merge debug locations.
for (auto *I : Insts)
if (I != I0) {
- Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
+ // The debug location for the "common" instruction is the merged locations
+ // of all the commoned instructions. We start with the original location
+ // of the "common" instruction and iteratively merge each location in the
+ // loop below.
+ // This is an N-way merge, which will be inefficient if I0 is a CallInst.
+ // However, as N-way merge for CallInst is rare, so we use simplified API
+ // instead of using complex API for N-way merge.
+ I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
combineMetadataForCSE(I0, I);
I0->andIRFlags(I);
}
- if (!isa<CallInst>(I0))
- I0->setDebugLoc(Loc);
if (!isa<StoreInst>(I0)) {
// canSinkLastInstruction checked that all instructions were used by
@@ -1582,9 +1607,9 @@ namespace {
ArrayRef<BasicBlock*> Blocks;
SmallVector<Instruction*,4> Insts;
bool Fail;
+
public:
- LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) :
- Blocks(Blocks) {
+ LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
reset();
}
@@ -1608,7 +1633,7 @@ namespace {
return !Fail;
}
- void operator -- () {
+ void operator--() {
if (Fail)
return;
for (auto *&Inst : Insts) {
@@ -1916,6 +1941,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// - All of their uses are in CondBB.
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+ SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
+
unsigned SpeculationCost = 0;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
@@ -1924,8 +1951,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
BBI != BBE; ++BBI) {
Instruction *I = &*BBI;
// Skip debug info.
- if (isa<DbgInfoIntrinsic>(I))
+ if (isa<DbgInfoIntrinsic>(I)) {
+ SpeculatedDbgIntrinsics.push_back(I);
continue;
+ }
// Only speculatively execute a single instruction (not counting the
// terminator) for now.
@@ -2030,11 +2059,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (Invert)
std::swap(TrueV, FalseV);
Value *S = Builder.CreateSelect(
- BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
+ BrCond, TrueV, FalseV, "spec.store.select", BI);
SpeculatedStore->setOperand(0, S);
- SpeculatedStore->setDebugLoc(
- DILocation::getMergedLocation(
- BI->getDebugLoc(), SpeculatedStore->getDebugLoc()));
+ SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
+ SpeculatedStore->getDebugLoc());
}
// Metadata can be dependent on the condition we are hoisting above.
@@ -2066,11 +2094,17 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (Invert)
std::swap(TrueV, FalseV);
Value *V = Builder.CreateSelect(
- BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
+ BrCond, TrueV, FalseV, "spec.select", BI);
PN->setIncomingValue(OrigI, V);
PN->setIncomingValue(ThenI, V);
}
+ // Remove speculated dbg intrinsics.
+ // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
+ // dbg value for the different flows and inserting it after the select.
+ for (Instruction *I : SpeculatedDbgIntrinsics)
+ I->eraseFromParent();
+
++NumSpeculations;
return true;
}
@@ -2507,7 +2541,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
else {
// For unconditional branch, check for a simple CFG pattern, where
// BB has a single predecessor and BB's successor is also its predecessor's
- // successor. If such pattern exisits, check for CSE between BB and its
+ // successor. If such pattern exists, check for CSE between BB and its
// predecessor.
if (BasicBlock *PB = BB->getSinglePredecessor())
if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
@@ -2725,9 +2759,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
NewWeights.end());
- PBI->setMetadata(
- LLVMContext::MD_prof,
- MDBuilder(BI->getContext()).createBranchWeights(MDWeights));
+ setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
} else
PBI->setMetadata(LLVMContext::MD_prof, nullptr);
} else {
@@ -2860,7 +2892,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
BasicBlock *QTB, BasicBlock *QFB,
BasicBlock *PostBB, Value *Address,
- bool InvertPCond, bool InvertQCond) {
+ bool InvertPCond, bool InvertQCond,
+ const DataLayout &DL) {
auto IsaBitcastOfPointerType = [](const Instruction &I) {
return Operator::getOpcode(&I) == Instruction::BitCast &&
I.getType()->isPointerTy();
@@ -2887,7 +2920,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
else
return false;
}
- return N <= PHINodeFoldingThreshold;
+ // The store we want to merge is counted in N, so add 1 to make sure
+ // we're counting the instructions that would be left.
+ return N <= (PHINodeFoldingThreshold + 1);
};
if (!MergeCondStoresAggressively &&
@@ -2966,6 +3001,29 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
PStore->getAAMetadata(AAMD, /*Merge=*/false);
PStore->getAAMetadata(AAMD, /*Merge=*/true);
SI->setAAMetadata(AAMD);
+ unsigned PAlignment = PStore->getAlignment();
+ unsigned QAlignment = QStore->getAlignment();
+ unsigned TypeAlignment =
+ DL.getABITypeAlignment(SI->getValueOperand()->getType());
+ unsigned MinAlignment;
+ unsigned MaxAlignment;
+ std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment);
+ // Choose the minimum alignment. If we could prove both stores execute, we
+ // could use biggest one. In this case, though, we only know that one of the
+ // stores executes. And we don't know it's safe to take the alignment from a
+ // store that doesn't execute.
+ if (MinAlignment != 0) {
+ // Choose the minimum of all non-zero alignments.
+ SI->setAlignment(MinAlignment);
+ } else if (MaxAlignment != 0) {
+ // Choose the minimal alignment between the non-zero alignment and the ABI
+ // default alignment for the type of the stored value.
+ SI->setAlignment(std::min(MaxAlignment, TypeAlignment));
+ } else {
+ // If both alignments are zero, use ABI default alignment for the type of
+ // the stored value.
+ SI->setAlignment(TypeAlignment);
+ }
QStore->eraseFromParent();
PStore->eraseFromParent();
@@ -2973,7 +3031,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
return true;
}
-static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
+ const DataLayout &DL) {
// The intention here is to find diamonds or triangles (see below) where each
// conditional block contains a store to the same address. Both of these
// stores are conditional, so they can't be unconditionally sunk. But it may
@@ -3001,7 +3060,6 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
// We model triangles as a type of diamond with a nullptr "true" block.
// Triangles are canonicalized so that the fallthrough edge is represented by
// a true condition, as in the diagram above.
- //
BasicBlock *PTB = PBI->getSuccessor(0);
BasicBlock *PFB = PBI->getSuccessor(1);
BasicBlock *QTB = QBI->getSuccessor(0);
@@ -3076,7 +3134,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
bool Changed = false;
for (auto *Address : CommonAddresses)
Changed |= mergeConditionalStoreToAddress(
- PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond);
+ PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL);
return Changed;
}
@@ -3141,7 +3199,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
- if (MergeCondStores && mergeConditionalStores(PBI, BI))
+ if (MergeCondStores && mergeConditionalStores(PBI, BI, DL))
return true;
// If this is a conditional branch in an empty block, and if any
@@ -3270,9 +3328,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Halve the weights if any of them cannot fit in an uint32_t
FitWeights(NewWeights);
- PBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BI->getContext())
- .createBranchWeights(NewWeights[0], NewWeights[1]));
+ setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
}
// OtherDest may have phi nodes. If so, add an entry from PBI's
@@ -3310,9 +3366,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
FitWeights(NewWeights);
- NV->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BI->getContext())
- .createBranchWeights(NewWeights[0], NewWeights[1]));
+ setBranchWeights(NV, NewWeights[0], NewWeights[1]);
}
}
}
@@ -3367,9 +3421,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// Create a conditional branch sharing the condition of the select.
BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
if (TrueWeight != FalseWeight)
- NewBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(OldTerm->getContext())
- .createBranchWeights(TrueWeight, FalseWeight));
+ setBranchWeights(NewBI, TrueWeight, FalseWeight);
}
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
// Neither of the selected blocks were successors, so this
@@ -3464,10 +3516,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
-static bool TryToSimplifyUncondBranchWithICmpInIt(
+static bool tryToSimplifyUncondBranchWithICmpInIt(
ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
- const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
- AssumptionCache *AC) {
+ const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -3502,7 +3553,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -3518,7 +3569,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -3556,9 +3607,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
Weights.push_back(Weights[0]);
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- SI->setMetadata(
- LLVMContext::MD_prof,
- MDBuilder(SI->getContext()).createBranchWeights(MDWeights));
+ setBranchWeights(SI, MDWeights);
}
}
SI->addCase(Cst, NewBB);
@@ -4285,10 +4334,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
TrueWeight /= 2;
FalseWeight /= 2;
}
- NewBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getContext())
- .createBranchWeights((uint32_t)TrueWeight,
- (uint32_t)FalseWeight));
+ setBranchWeights(NewBI, TrueWeight, FalseWeight);
}
}
@@ -4316,7 +4362,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
-static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
@@ -4385,9 +4431,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
}
if (HasWeight && Weights.size() >= 2) {
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getParent()->getContext())
- .createBranchWeights(MDWeights));
+ setBranchWeights(SI, MDWeights);
}
return !DeadCases.empty();
@@ -4429,38 +4473,59 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
/// Try to forward the condition of a switch instruction to a phi node
/// dominated by the switch, if that would mean that some of the destination
-/// blocks of the switch can be folded away.
-/// Returns true if a change is made.
+/// blocks of the switch can be folded away. Return true if a change is made.
static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
- typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap;
- ForwardingNodesMap ForwardingNodes;
+ using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
- for (auto Case : SI->cases()) {
+ ForwardingNodesMap ForwardingNodes;
+ BasicBlock *SwitchBlock = SI->getParent();
+ bool Changed = false;
+ for (auto &Case : SI->cases()) {
ConstantInt *CaseValue = Case.getCaseValue();
BasicBlock *CaseDest = Case.getCaseSuccessor();
- int PhiIndex;
- PHINode *PHI =
- FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex);
- if (!PHI)
- continue;
+ // Replace phi operands in successor blocks that are using the constant case
+ // value rather than the switch condition variable:
+ // switchbb:
+ // switch i32 %x, label %default [
+ // i32 17, label %succ
+ // ...
+ // succ:
+ // %r = phi i32 ... [ 17, %switchbb ] ...
+ // -->
+ // %r = phi i32 ... [ %x, %switchbb ] ...
+
+ for (Instruction &InstInCaseDest : *CaseDest) {
+ auto *Phi = dyn_cast<PHINode>(&InstInCaseDest);
+ if (!Phi) break;
+
+ // This only works if there is exactly 1 incoming edge from the switch to
+ // a phi. If there is >1, that means multiple cases of the switch map to 1
+ // value in the phi, and that phi value is not the switch condition. Thus,
+ // this transform would not make sense (the phi would be invalid because
+ // a phi can't have different incoming values from the same block).
+ int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock);
+ if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue &&
+ count(Phi->blocks(), SwitchBlock) == 1) {
+ Phi->setIncomingValue(SwitchBBIdx, SI->getCondition());
+ Changed = true;
+ }
+ }
- ForwardingNodes[PHI].push_back(PhiIndex);
+ // Collect phi nodes that are indirectly using this switch's case constants.
+ int PhiIdx;
+ if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
+ ForwardingNodes[Phi].push_back(PhiIdx);
}
- bool Changed = false;
-
- for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
- E = ForwardingNodes.end();
- I != E; ++I) {
- PHINode *Phi = I->first;
- SmallVectorImpl<int> &Indexes = I->second;
-
+ for (auto &ForwardingNode : ForwardingNodes) {
+ PHINode *Phi = ForwardingNode.first;
+ SmallVectorImpl<int> &Indexes = ForwardingNode.second;
if (Indexes.size() < 2)
continue;
- for (size_t I = 0, E = Indexes.size(); I != E; ++I)
- Phi->setIncomingValue(Indexes[I], SI->getCondition());
+ for (int Index : Indexes)
+ Phi->setIncomingValue(Index, SI->getCondition());
Changed = true;
}
@@ -4743,8 +4808,8 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
/// If the switch is only used to initialize one or more
/// phi nodes in a common successor block with only two different
/// constant values, replace the switch with select.
-static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- AssumptionCache *AC, const DataLayout &DL,
+static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
@@ -4816,18 +4881,18 @@ private:
} Kind;
// For SingleValueKind, this is the single value.
- Constant *SingleValue;
+ Constant *SingleValue = nullptr;
// For BitMapKind, this is the bitmap.
- ConstantInt *BitMap;
- IntegerType *BitMapElementTy;
+ ConstantInt *BitMap = nullptr;
+ IntegerType *BitMapElementTy = nullptr;
// For LinearMapKind, these are the constants used to derive the value.
- ConstantInt *LinearOffset;
- ConstantInt *LinearMultiplier;
+ ConstantInt *LinearOffset = nullptr;
+ ConstantInt *LinearMultiplier = nullptr;
// For ArrayKind, this is the array.
- GlobalVariable *Array;
+ GlobalVariable *Array = nullptr;
};
} // end anonymous namespace
@@ -4835,9 +4900,7 @@ private:
SwitchLookupTable::SwitchLookupTable(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
- : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
- LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
@@ -5083,7 +5146,6 @@ static void reuseTableCompare(
User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
Constant *DefaultValue,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
-
ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
if (!CmpInst)
return;
@@ -5112,7 +5174,7 @@ static void reuseTableCompare(
for (auto ValuePair : Values) {
Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
ValuePair.second, CmpOp1, true);
- if (!CaseConst || CaseConst == DefaultConst)
+ if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
return;
assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
"Expect true or false as compare result.");
@@ -5151,8 +5213,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
- // Only build lookup table when we have a target that supports it.
- if (!TTI.shouldBuildLookupTables())
+ Function *Fn = SI->getParent()->getParent();
+ // Only build lookup table when we have a target that supports it or the
+ // attribute is not set.
+ if (!TTI.shouldBuildLookupTables() ||
+ (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true"))
return false;
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
@@ -5163,8 +5228,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// string and lookup indices into that.
// Ignore switches with less than three cases. Lookup tables will not make
- // them
- // faster, so we don't analyze them.
+ // them faster, so we don't analyze them.
if (SI->getNumCases() < 3)
return false;
@@ -5176,8 +5240,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
ConstantInt *MaxCaseVal = CI->getCaseValue();
BasicBlock *CommonDest = nullptr;
- typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy;
+
+ using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
SmallDenseMap<PHINode *, ResultListTy> ResultLists;
+
SmallDenseMap<PHINode *, Constant *> DefaultResults;
SmallDenseMap<PHINode *, Type *> ResultTypes;
SmallVector<PHINode *, 4> PHIs;
@@ -5190,7 +5256,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
MaxCaseVal = CaseVal;
// Resulting value at phi nodes for this case value.
- typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
+ using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
@@ -5248,8 +5314,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Compute the table index value.
Builder.SetInsertPoint(SI);
- Value *TableIndex =
- Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
+ Value *TableIndex;
+ if (MinCaseVal->isNullValue())
+ TableIndex = SI->getCondition();
+ else
+ TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
// Compute the maximum table size representable by the integer type we are
// switching upon.
@@ -5282,15 +5352,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Builder.SetInsertPoint(LookupBB);
if (NeedMask) {
- // Before doing the lookup we do the hole check.
- // The LookupBB is therefore re-purposed to do the hole check
- // and we create a new LookupBB.
+ // Before doing the lookup, we do the hole check. The LookupBB is therefore
+ // re-purposed to do the hole check, and we create a new LookupBB.
BasicBlock *MaskBB = LookupBB;
MaskBB->setName("switch.hole_check");
LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
CommonDest->getParent(), CommonDest);
- // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid
+ // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
// unnecessary illegal types.
uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
APInt MaskInt(TableSizePowOf2, 0);
@@ -5320,7 +5389,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
- // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
+ // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
// do not delete PHINodes here.
SI->getDefaultDest()->removePredecessor(SI->getParent(),
/*DontDeleteUselessPHIs=*/true);
@@ -5333,7 +5402,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
- StringRef FuncName = SI->getParent()->getParent()->getName();
+ StringRef FuncName = Fn->getName();
SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
FuncName);
@@ -5391,14 +5460,14 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) {
return NumCases * 100 >= Range * MinDensity;
}
-// Try and transform a switch that has "holes" in it to a contiguous sequence
-// of cases.
-//
-// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
-// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
-//
-// This converts a sparse switch into a dense switch which allows better
-// lowering and could also allow transforming into a lookup table.
+/// Try to transform a switch that has "holes" in it to a contiguous sequence
+/// of cases.
+///
+/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
+/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
+///
+/// This converts a sparse switch into a dense switch which allows better
+/// lowering and could also allow transforming into a lookup table.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
@@ -5427,7 +5496,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// First, transform the values such that they start at zero and ascend.
int64_t Base = Values[0];
for (auto &V : Values)
- V -= Base;
+ V -= (uint64_t)(Base);
// Now we have signed numbers that have been shifted so that, given enough
// precision, there are no negative values. Since the rest of the transform
@@ -5492,12 +5561,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -5507,33 +5576,34 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
// Remove unreachable cases.
- if (EliminateDeadSwitchCases(SI, AC, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (eliminateDeadSwitchCases(SI, Options.AC, DL))
+ return simplifyCFG(BB, TTI, Options) | true;
- if (SwitchToSelect(SI, Builder, AC, DL, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (switchToSelect(SI, Builder, DL, TTI))
+ return simplifyCFG(BB, TTI, Options) | true;
- if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
+ return simplifyCFG(BB, TTI, Options) | true;
- // The conversion from switch to lookup tables results in difficult
- // to analyze code and makes pruning branches much harder.
- // This is a problem of the switch expression itself can still be
- // restricted as a result of inlining or CVP. There only apply this
- // transformation during late steps of the optimisation chain.
- if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ // The conversion from switch to lookup tables results in difficult-to-analyze
+ // code and makes pruning branches much harder. This is a problem if the
+ // switch expression itself can still be restricted as a result of inlining or
+ // CVP. Therefore, only apply this transformation during late stages of the
+ // optimisation pipeline.
+ if (Options.ConvertSwitchToLookupTable &&
+ SwitchToLookupTable(SI, Builder, DL, TTI))
+ return simplifyCFG(BB, TTI, Options) | true;
if (ReduceSwitchRange(SI, Builder, DL, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
return false;
}
@@ -5571,7 +5641,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
return Changed;
}
@@ -5613,8 +5683,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
if (!LPad2 || !LPad2->isIdenticalTo(LPad))
continue;
- for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
- }
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
BranchInst *BI2 = dyn_cast<BranchInst>(I);
if (!BI2 || !BI2->isIdenticalTo(BI))
continue;
@@ -5658,39 +5728,38 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
BasicBlock *BB = BI->getParent();
BasicBlock *Succ = BI->getSuccessor(0);
- if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
- // if LoopHeader is provided, check if the block or its successor is a loop
- // header (This is for early invocations before loop simplify and
+ // If LoopHeader is provided, check if the block or its successor is a loop
+ // header. (This is for early invocations before loop simplify and
// vectorization to keep canonical loop forms for nested loops. These blocks
// can be eliminated when the pass is invoked later in the back-end.)
bool NeedCanonicalLoop =
- !LateSimplifyCFG &&
+ Options.NeedCanonicalLoop &&
(LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
- // If the only instruction in the block is a seteq/setne comparison
- // against a constant, try to simplify the block.
+ // If the only instruction in the block is a seteq/setne comparison against a
+ // constant, try to simplify the block.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
- BonusInstThreshold, AC))
+ tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options))
return true;
}
// See if we can merge an empty landing pad block with another which is
// equivalent.
if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
- for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
- }
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
return true;
}
@@ -5699,8 +5768,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+ return simplifyCFG(BB, TTI, Options) | true;
return false;
}
@@ -5725,7 +5794,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -5735,14 +5804,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())) {
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
}
@@ -5758,9 +5827,9 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PBI && PBI->isConditional() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
- bool CondIsFalse = PBI->getSuccessor(1) == BB;
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
Optional<bool> Implication = isImpliedCondition(
- PBI->getCondition(), BI->getCondition(), DL, CondIsFalse);
+ PBI->getCondition(), BI->getCondition(), DL, CondIsTrue);
if (Implication) {
// Turn this into a branch on constant.
auto *OldCond = BI->getCondition();
@@ -5769,7 +5838,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
: ConstantInt::getFalse(BB->getContext());
BI->setCondition(CI);
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
}
}
@@ -5777,8 +5846,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+ return simplifyCFG(BB, TTI, Options) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -5787,7 +5856,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
if (HoistThenElseCodeToIf(BI, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
@@ -5795,7 +5864,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -5804,30 +5873,30 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
}
// If this is a branch on a phi node in the current block, thread control
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DL, AC))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (FoldCondBranchOnPHI(BI, DL, Options.AC))
+ return simplifyCFG(BB, TTI, Options) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return simplifyCFG(BB, TTI, Options) | true;
// Look for diamond patterns.
if (MergeCondStores)
if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (mergeConditionalStores(PBI, BI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ if (mergeConditionalStores(PBI, BI, DL))
+ return simplifyCFG(BB, TTI, Options) | true;
return false;
}
@@ -5936,7 +6005,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
- //
if (MergeBlockIntoPredecessor(BB))
return true;
@@ -5944,12 +6012,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// If there is a trivial two-entry PHI node in this basic block, and we can
// eliminate it, do so now.
- if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
Builder.SetInsertPoint(BB->getTerminator());
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
if (BI->isUnconditional()) {
if (SimplifyUncondBranch(BI, Builder))
return true;
@@ -5957,25 +6025,22 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (SimplifyCondBranch(BI, Builder))
return true;
}
- } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ } else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
if (SimplifyReturn(RI, Builder))
return true;
- } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ } else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
if (SimplifyResume(RI, Builder))
return true;
- } else if (CleanupReturnInst *RI =
- dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ } else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
if (SimplifyCleanupReturn(RI))
return true;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ } else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
if (SimplifySwitch(SI, Builder))
return true;
- } else if (UnreachableInst *UI =
- dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ } else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) {
if (SimplifyUnreachable(UI))
return true;
- } else if (IndirectBrInst *IBI =
- dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ } else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) {
if (SimplifyIndirectBr(IBI))
return true;
}
@@ -5983,16 +6048,10 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
return Changed;
}
-/// This function is used to do simplification of a CFG.
-/// For example, it adjusts branches to branches to eliminate the extra hop,
-/// eliminates unreachable basic blocks, and does other "peephole" optimization
-/// of the CFG. It returns true if a modification was made.
-///
-bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, AssumptionCache *AC,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
- bool LateSimplifyCFG) {
- return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
- BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG)
+bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+ const SimplifyCFGOptions &Options,
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
+ return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders,
+ Options)
.run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6d90e6b48358..ad1faea0a7ae 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -18,13 +18,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -35,10 +33,14 @@ using namespace llvm;
STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
STATISTIC(
NumSimplifiedSDiv,
"Number of IV signed division operations converted to unsigned division");
+STATISTIC(
+ NumSimplifiedSRem,
+ "Number of IV signed remainder operations converted to unsigned remainder");
STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
namespace {
@@ -51,15 +53,17 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
-
+ SCEVExpander &Rewriter;
SmallVectorImpl<WeakTrackingVH> &DeadInsts;
bool Changed;
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)
- : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
+ LoopInfo *LI, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &Dead)
+ : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead),
+ Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -73,12 +77,17 @@ namespace {
Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+ bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
bool eliminateOverflowIntrinsic(CallInst *CI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
- bool IsSigned);
+ void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned);
+ void replaceRemWithNumerator(BinaryOperator *Rem);
+ void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
+ void replaceSRemWithURem(BinaryOperator *Rem);
bool eliminateSDiv(BinaryOperator *SDiv);
bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
@@ -151,6 +160,74 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
return IVSrc;
}
+bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
+ Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands (in the specific context of the
+ // current loop)
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
+ const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
+
+ ICmpInst::Predicate InvariantPredicate;
+ const SCEV *InvariantLHS, *InvariantRHS;
+
+ auto *PN = dyn_cast<PHINode>(IVOperand);
+ if (!PN)
+ return false;
+ if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
+ InvariantLHS, InvariantRHS))
+ return false;
+
+ // Rewrite the comparison to a loop invariant comparison if it can be done
+ // cheaply, where cheaply means "we don't need to emit any new
+ // instructions".
+
+ SmallDenseMap<const SCEV*, Value*> CheapExpansions;
+ CheapExpansions[S] = ICmp->getOperand(IVOperIdx);
+ CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx);
+
+ // TODO: Support multiple entry loops? (We currently bail out of these in
+ // the IndVarSimplify pass)
+ if (auto *BB = L->getLoopPredecessor()) {
+ const int Idx = PN->getBasicBlockIndex(BB);
+ if (Idx >= 0) {
+ Value *Incoming = PN->getIncomingValue(Idx);
+ const SCEV *IncomingS = SE->getSCEV(Incoming);
+ CheapExpansions[IncomingS] = Incoming;
+ }
+ }
+ Value *NewLHS = CheapExpansions[InvariantLHS];
+ Value *NewRHS = CheapExpansions[InvariantRHS];
+
+ if (!NewLHS)
+ if (auto *ConstLHS = dyn_cast<SCEVConstant>(InvariantLHS))
+ NewLHS = ConstLHS->getValue();
+ if (!NewRHS)
+ if (auto *ConstRHS = dyn_cast<SCEVConstant>(InvariantRHS))
+ NewRHS = ConstRHS->getValue();
+
+ if (!NewLHS || !NewRHS)
+ // We could not find an existing value to replace either LHS or RHS.
+ // Generating new instructions has subtler tradeoffs, so avoid doing that
+ // for now.
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(InvariantPredicate);
+ ICmp->setOperand(0, NewLHS);
+ ICmp->setOperand(1, NewRHS);
+ return true;
+}
+
/// SimplifyIVUsers helper for eliminating useless
/// comparisons against an induction variable.
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
@@ -164,17 +241,11 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
Pred = ICmpInst::getSwappedPredicate(Pred);
}
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
- const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
-
- // Simplify unnecessary loops away.
+ // Get the SCEVs for the ICmp operands (in the specific context of the
+ // current loop)
const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- ICmpInst::Predicate InvariantPredicate;
- const SCEV *InvariantLHS, *InvariantRHS;
+ const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
+ const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
// If the condition is always true or always false, replace it with
// a constant value.
@@ -186,85 +257,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
DeadInsts.emplace_back(ICmp);
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- } else if (isa<PHINode>(IVOperand) &&
- SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
- InvariantLHS, InvariantRHS)) {
-
- // Rewrite the comparison to a loop invariant comparison if it can be done
- // cheaply, where cheaply means "we don't need to emit any new
- // instructions".
-
- Value *NewLHS = nullptr, *NewRHS = nullptr;
-
- if (S == InvariantLHS || X == InvariantLHS)
- NewLHS =
- ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx));
-
- if (S == InvariantRHS || X == InvariantRHS)
- NewRHS =
- ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
-
- auto *PN = cast<PHINode>(IVOperand);
- for (unsigned i = 0, e = PN->getNumIncomingValues();
- i != e && (!NewLHS || !NewRHS);
- ++i) {
-
- // If this is a value incoming from the backedge, then it cannot be a loop
- // invariant value (since we know that IVOperand is an induction variable).
- if (L->contains(PN->getIncomingBlock(i)))
- continue;
-
- // NB! This following assert does not fundamentally have to be true, but
- // it is true today given how SCEV analyzes induction variables.
- // Specifically, today SCEV will *not* recognize %iv as an induction
- // variable in the following case:
- //
- // define void @f(i32 %k) {
- // entry:
- // br i1 undef, label %r, label %l
- //
- // l:
- // %k.inc.l = add i32 %k, 1
- // br label %loop
- //
- // r:
- // %k.inc.r = add i32 %k, 1
- // br label %loop
- //
- // loop:
- // %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ]
- // %iv.inc = add i32 %iv, 1
- // br label %loop
- // }
- //
- // but if it starts to, at some point, then the assertion below will have
- // to be changed to a runtime check.
-
- Value *Incoming = PN->getIncomingValue(i);
-
-#ifndef NDEBUG
- if (auto *I = dyn_cast<Instruction>(Incoming))
- assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!");
-#endif
-
- const SCEV *IncomingS = SE->getSCEV(Incoming);
-
- if (!NewLHS && IncomingS == InvariantLHS)
- NewLHS = Incoming;
- if (!NewRHS && IncomingS == InvariantRHS)
- NewRHS = Incoming;
- }
-
- if (!NewLHS || !NewRHS)
- // We could not find an existing value to replace either LHS or RHS.
- // Generating new instructions has subtler tradeoffs, so avoid doing that
- // for now.
- return;
-
- DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
- ICmp->setPredicate(InvariantPredicate);
- ICmp->setOperand(0, NewLHS);
- ICmp->setOperand(1, NewRHS);
+ } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
+ // fallthrough to end of function
} else if (ICmpInst::isSigned(OriginalPred) &&
SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
// If we were unable to make anything above, all we can is to canonicalize
@@ -309,54 +303,90 @@ bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
return false;
}
-/// SimplifyIVUsers helper for eliminating useless
-/// remainder operations operating on an induction variable.
-void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned) {
+// i %s n -> i %u n if i >= 0 and n >= 0
+void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
+ auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
+ auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
+ Rem->getName() + ".urem", Rem);
+ Rem->replaceAllUsesWith(URem);
+ DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
+ ++NumSimplifiedSRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+// i % n --> i if i is in [0,n).
+void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
+ auto *T = Rem->getType();
+ auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D);
+ SelectInst *Sel =
+ SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+/// SimplifyIVUsers helper for eliminating useless remainder operations
+/// operating on an induction variable or replacing srem by urem.
+void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned) {
+ auto *NValue = Rem->getOperand(0);
+ auto *DValue = Rem->getOperand(1);
// We're only interested in the case where we know something about
- // the numerator.
- if (IVOperand != Rem->getOperand(0))
+ // the numerator, unless it is a srem, because we want to replace srem by urem
+ // in general.
+ bool UsedAsNumerator = IVOperand == NValue;
+ if (!UsedAsNumerator && !IsSigned)
return;
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(Rem->getOperand(0));
- const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+ const SCEV *N = SE->getSCEV(NValue);
// Simplify unnecessary loops away.
const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // i % n --> i if i is in [0,n).
- if ((!IsSigned || SE->isKnownNonNegative(S)) &&
- SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- S, X))
- Rem->replaceAllUsesWith(Rem->getOperand(0));
- else {
- // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType()));
- if (IsSigned && !SE->isKnownNonNegative(LessOne))
- return;
+ N = SE->getSCEVAtScope(N, ICmpLoop);
+
+ bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
+
+ // Do not proceed if the Numerator may be negative
+ if (!IsNumeratorNonNegative)
+ return;
- if (!SE->isKnownPredicate(IsSigned ?
- ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- LessOne, X))
+ const SCEV *D = SE->getSCEV(DValue);
+ D = SE->getSCEVAtScope(D, ICmpLoop);
+
+ if (UsedAsNumerator) {
+ auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ if (SE->isKnownPredicate(LT, N, D)) {
+ replaceRemWithNumerator(Rem);
return;
+ }
- ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
- Rem->getOperand(0), Rem->getOperand(1));
- SelectInst *Sel =
- SelectInst::Create(ICmp,
- ConstantInt::get(Rem->getType(), 0),
- Rem->getOperand(0), "tmp", Rem);
- Rem->replaceAllUsesWith(Sel);
+ auto *T = Rem->getType();
+ const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
+ if (SE->isKnownPredicate(LT, NLessOne, D)) {
+ replaceRemWithNumeratorOrZero(Rem);
+ return;
+ }
}
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- ++NumElimRem;
- Changed = true;
- DeadInsts.emplace_back(Rem);
+ // Try to replace SRem with URem, if both N and D are known non-negative.
+ // Since we had already check N, we only need to check D now
+ if (!IsSigned || !SE->isKnownNonNegative(D))
+ return;
+
+ replaceSRemWithURem(Rem);
}
bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
@@ -474,7 +504,7 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
bool IsSRem = Bin->getOpcode() == Instruction::SRem;
if (IsSRem || Bin->getOpcode() == Instruction::URem) {
- eliminateIVRemainder(Bin, IVOperand, IsSRem);
+ simplifyIVRemainder(Bin, IVOperand, IsSRem);
return true;
}
@@ -492,6 +522,40 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return false;
}
+static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) {
+ if (auto *BB = L->getLoopPreheader())
+ return BB->getTerminator();
+
+ return Hint;
+}
+
+/// Replace the UseInst with a constant if possible.
+bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ if (!SE->isLoopInvariant(S, L))
+ return false;
+
+ // Do not generate something ridiculous even if S is loop invariant.
+ if (Rewriter.isHighCostExpansion(S, L, I))
+ return false;
+
+ auto *IP = GetLoopInvariantInsertPosition(L, I);
+ auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
+
+ I->replaceAllUsesWith(Invariant);
+ DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
+ << " with loop invariant: " << *S << '\n');
+ ++NumFoldedUser;
+ Changed = true;
+ DeadInsts.emplace_back(I);
+ return true;
+}
+
/// Eliminate any operation that SCEV can prove is an identity function.
bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
Instruction *IVOperand) {
@@ -627,7 +691,7 @@ bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
/// Add all uses of Def to the current IV's worklist.
static void pushIVUsers(
- Instruction *Def,
+ Instruction *Def, Loop *L,
SmallPtrSet<Instruction*,16> &Simplified,
SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
@@ -638,8 +702,19 @@ static void pushIVUsers(
// Also ensure unique worklist users.
// If Def is a LoopPhi, it may not be in the Simplified set, so check for
// self edges first.
- if (UI != Def && Simplified.insert(UI).second)
- SimpleIVUsers.push_back(std::make_pair(UI, Def));
+ if (UI == Def)
+ continue;
+
+ // Only change the current Loop, do not change the other parts (e.g. other
+ // Loops).
+ if (!L->contains(UI))
+ continue;
+
+ // Do not push the same instruction more than once.
+ if (!Simplified.insert(UI).second)
+ continue;
+
+ SimpleIVUsers.push_back(std::make_pair(UI, Def));
}
}
@@ -689,7 +764,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
// Push users of the current LoopPhi. In rare cases, pushIVUsers may be
// called multiple times for the same LoopPhi. This is the proper thing to
// do for loop header phis that use each other.
- pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+ pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
while (!SimpleIVUsers.empty()) {
std::pair<Instruction*, Instruction*> UseOper =
@@ -699,6 +774,11 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
// Bypass back edges to avoid extra work.
if (UseInst == CurrIV) continue;
+ // Try to replace UseInst with a loop invariant before any other
+ // simplifications.
+ if (replaceIVUserWithLoopInvariant(UseInst))
+ continue;
+
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
assert(N <= Simplified.size() && "runaway iteration");
@@ -712,7 +792,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
continue;
if (eliminateIVUser(UseOper.first, IVOperand)) {
- pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
continue;
}
@@ -722,7 +802,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
(isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
// re-queue uses of the now modified binary operator and fall
// through to the checks that remain.
- pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
}
}
@@ -732,7 +812,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
continue;
}
if (isSimpleIVUser(UseOper.first, L, SE)) {
- pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+ pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers);
}
}
}
@@ -745,8 +825,9 @@ void IVVisitor::anchor() { }
/// by using ScalarEvolution to analyze the IV's recurrence.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
- IVVisitor *V) {
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
+ SCEVExpander &Rewriter, IVVisitor *V) {
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter,
+ Dead);
SIV.simplifyUsers(CurrIV, V);
return SIV.hasChanged();
}
@@ -755,9 +836,13 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
/// loop. This does not actually change or add IVs.
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
+ SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead, Rewriter);
}
return Changed;
}
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 2ea15f65cef9..f3d4f2ef38d7 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,7 +20,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 77c0a41929ac..03a1d55ddc30 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -18,10 +18,11 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -484,10 +485,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
if (LenTrue && LenFalse) {
- Function *Caller = CI->getParent()->getParent();
- emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller,
- SI->getDebugLoc(),
- "folded strlen(select) to select of constants");
+ ORE.emit([&]() {
+ return OptimizationRemark("instcombine", "simplify-libcalls", CI)
+ << "folded strlen(select) to select of constants";
+ });
return B.CreateSelect(SI->getCondition(),
ConstantInt::get(CI->getType(), LenTrue - 1),
ConstantInt::get(CI->getType(), LenFalse - 1));
@@ -509,6 +510,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
Module &M = *CI->getParent()->getParent()->getParent();
unsigned WCharSize = TLI->getWCharSize(M) * 8;
+ // We cannot perform this optimization without wchar_size metadata.
+ if (WCharSize == 0)
+ return nullptr;
return optimizeStringLength(CI, B, WCharSize);
}
@@ -753,29 +757,44 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
}
// memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+ // TODO: The case where both inputs are constants does not need to be limited
+ // to legal integers or equality comparison. See block below this.
if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
-
IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
- if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
- getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
-
- Type *LHSPtrTy =
- IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
- Type *RHSPtrTy =
- IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
-
- Value *LHSV =
- B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
- Value *RHSV =
- B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+ // First, see if we can fold either argument to a constant.
+ Value *LHSV = nullptr;
+ if (auto *LHSC = dyn_cast<Constant>(LHS)) {
+ LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
+ LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
+ }
+ Value *RHSV = nullptr;
+ if (auto *RHSC = dyn_cast<Constant>(RHS)) {
+ RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
+ RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
+ }
+ // Don't generate unaligned loads. If either source is constant data,
+ // alignment doesn't matter for that source because there is no load.
+ if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
+ (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
+ if (!LHSV) {
+ Type *LHSPtrTy =
+ IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+ LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
+ }
+ if (!RHSV) {
+ Type *RHSPtrTy =
+ IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+ RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
+ }
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
}
}
- // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
+ // TODO: This is limited to i8 arrays.
StringRef LHSStr, RHSStr;
if (getConstantStringInfo(LHS, LHSStr) &&
getConstantStringInfo(RHS, RHSStr)) {
@@ -1014,6 +1033,35 @@ static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
return B.CreateFPExt(V, B.getDoubleTy());
}
+// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
+Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
+ if (!CI->isFast())
+ return nullptr;
+
+ // Propagate fast-math flags from the existing call to new instructions.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Value *Real, *Imag;
+ if (CI->getNumArgOperands() == 1) {
+ Value *Op = CI->getArgOperand(0);
+ assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
+ Real = B.CreateExtractValue(Op, 0, "real");
+ Imag = B.CreateExtractValue(Op, 1, "imag");
+ } else {
+ assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
+ Real = CI->getArgOperand(0);
+ Imag = CI->getArgOperand(1);
+ }
+
+ Value *RealReal = B.CreateFMul(Real, Real);
+ Value *ImagImag = B.CreateFMul(Imag, Imag);
+
+ Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
+ CI->getType());
+ return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
+}
+
Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
@@ -1055,6 +1103,51 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
return InnerChain[Exp];
}
+/// Use square root in place of pow(x, +/-0.5).
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
+ // TODO: There is some subset of 'fast' under which these transforms should
+ // be allowed.
+ if (!Pow->isFast())
+ return nullptr;
+
+ const APFloat *Arg1C;
+ if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C)))
+ return nullptr;
+ if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5))
+ return nullptr;
+
+ // Fast-math flags from the pow() are propagated to all replacement ops.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Pow->getFastMathFlags());
+ Type *Ty = Pow->getType();
+ Value *Sqrt;
+ if (Pow->hasFnAttr(Attribute::ReadNone)) {
+ // We know that errno is never set, so replace with an intrinsic:
+ // pow(x, 0.5) --> llvm.sqrt(x)
+ // llvm.pow(x, 0.5) --> llvm.sqrt(x)
+ auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty);
+ Sqrt = B.CreateCall(F, Pow->getArgOperand(0));
+ } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
+ // Errno could be set, so we must use a sqrt libcall.
+ // TODO: We also should check that the target can in fact lower the sqrt
+ // libcall. We currently have no way to ask this question, so we ask
+ // whether the target has a sqrt libcall which is not exactly the same.
+ Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0),
+ TLI->getName(LibFunc_sqrt), B,
+ Pow->getCalledFunction()->getAttributes());
+ } else {
+ // We can't replace with an intrinsic or a libcall.
+ return nullptr;
+ }
+
+ // If this is pow(x, -0.5), get the reciprocal.
+ if (Arg1C->isExactlyValue(-0.5))
+ Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt);
+
+ return Sqrt;
+}
+
Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
@@ -1092,7 +1185,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
auto *OpC = dyn_cast<CallInst>(Op1);
- if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+ if (OpC && OpC->isFast() && CI->isFast()) {
LibFunc Func;
Function *OpCCallee = OpC->getCalledFunction();
if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
@@ -1105,6 +1198,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
}
}
+ if (Value *Sqrt = replacePowWithSqrt(CI, B))
+ return Sqrt;
+
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
if (!Op2C)
return Ret;
@@ -1112,42 +1208,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
- if (Op2C->isExactlyValue(-0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl)) {
- // If -ffast-math:
- // pow(x, -0.5) -> 1.0 / sqrt(x)
- if (CI->hasUnsafeAlgebra()) {
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- // TODO: If the pow call is an intrinsic, we should lower to the sqrt
- // intrinsic, so we match errno semantics. We also should check that the
- // target can in fact lower the sqrt intrinsic -- we currently have no way
- // to ask this question other than asking whether the target has a sqrt
- // libcall, which is a sufficient but not necessary condition.
- Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
- Callee->getAttributes());
-
- return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip");
- }
- }
-
+ // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
if (Op2C->isExactlyValue(0.5) &&
hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
LibFunc_sqrtl)) {
-
- // In -ffast-math, pow(x, 0.5) -> sqrt(x).
- if (CI->hasUnsafeAlgebra()) {
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
- // intrinsic, to match errno semantics.
- return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
- Callee->getAttributes());
- }
-
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
// and negative infinity correctly.
@@ -1169,15 +1233,21 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return Sel;
}
- if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ // Propagate fast-math-flags from the call to any created instructions.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+ // pow(x, 1.0) --> x
+ if (Op2C->isExactlyValue(1.0))
return Op1;
- if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ // pow(x, 2.0) --> x * x
+ if (Op2C->isExactlyValue(2.0))
return B.CreateFMul(Op1, Op1, "pow2");
- if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ // pow(x, -1.0) --> 1.0 / x
+ if (Op2C->isExactlyValue(-1.0))
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
// In -ffast-math, generate repeated fmul instead of generating pow(x, n).
- if (CI->hasUnsafeAlgebra()) {
+ if (CI->isFast()) {
APFloat V = abs(Op2C->getValueAPF());
// We limit to a max of 7 fmul(s). Thus max exponent is 32.
// This transformation applies to integer exponents only.
@@ -1185,10 +1255,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
!V.isInteger())
return nullptr;
- // Propagate fast math flags.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
// We will memoize intermediate products of the Addition Chain.
Value *InnerChain[33] = {nullptr};
InnerChain[1] = Op1;
@@ -1196,8 +1262,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// We cannot readily convert a non-double type (like float) to a double.
// So we first convert V to something which could be converted to double.
- bool ignored;
- V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored);
+ bool Ignored;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
// For negative exponents simply compute the reciprocal.
@@ -1265,9 +1331,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
- if (CI->hasUnsafeAlgebra()) {
- // Unsafe algebra sets all fast-math-flags to true.
- FMF.setUnsafeAlgebra();
+ if (CI->isFast()) {
+ // If the call is 'fast', then anything we create here will also be 'fast'.
+ FMF.setFast();
} else {
// At a minimum, no-nans-fp-math must be true.
if (!CI->hasNoNaNs())
@@ -1298,13 +1364,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
if (UnsafeFPShrink && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!CI->hasUnsafeAlgebra())
+ if (!CI->isFast())
return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
- // The earlier call must also be unsafe in order to do these transforms.
- if (!OpC || !OpC->hasUnsafeAlgebra())
+ // The earlier call must also be 'fast' in order to do these transforms.
+ if (!OpC || !OpC->isFast())
return Ret;
// log(pow(x,y)) -> y*log(x)
@@ -1314,7 +1380,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
- FMF.setUnsafeAlgebra();
+ FMF.setFast();
B.setFastMathFlags(FMF);
LibFunc Func;
@@ -1346,11 +1412,11 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!CI->hasUnsafeAlgebra())
+ if (!CI->isFast())
return Ret;
Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
- if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
return Ret;
// We're looking for a repeated factor in a multiplication tree,
@@ -1372,8 +1438,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Value *OtherMul0, *OtherMul1;
if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
// Pattern: sqrt((x * y) * z)
- if (OtherMul0 == OtherMul1 &&
- cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+ if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) {
// Matched: sqrt((x * x) * z)
RepeatOp = OtherMul0;
OtherOp = Op1;
@@ -1418,8 +1483,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
if (!OpC)
return Ret;
- // Both calls must allow unsafe optimizations in order to remove them.
- if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+ // Both calls must be 'fast' in order to remove them.
+ if (!CI->isFast() || !OpC->isFast())
return Ret;
// tan(atan(x)) -> x
@@ -2043,13 +2108,107 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return nullptr;
}
+Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
+ LibFunc Func,
+ IRBuilder<> &Builder) {
+ // Don't optimize calls that require strict floating point semantics.
+ if (CI->isStrictFP())
+ return nullptr;
+
+ switch (Func) {
+ case LibFunc_cosf:
+ case LibFunc_cos:
+ case LibFunc_cosl:
+ return optimizeCos(CI, Builder);
+ case LibFunc_sinpif:
+ case LibFunc_sinpi:
+ case LibFunc_cospif:
+ case LibFunc_cospi:
+ return optimizeSinCosPi(CI, Builder);
+ case LibFunc_powf:
+ case LibFunc_pow:
+ case LibFunc_powl:
+ return optimizePow(CI, Builder);
+ case LibFunc_exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ return optimizeExp2(CI, Builder);
+ case LibFunc_fabsf:
+ case LibFunc_fabs:
+ case LibFunc_fabsl:
+ return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtl:
+ return optimizeSqrt(CI, Builder);
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log1p:
+ case LibFunc_log2:
+ case LibFunc_logb:
+ return optimizeLog(CI, Builder);
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ return optimizeTan(CI, Builder);
+ case LibFunc_ceil:
+ return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
+ case LibFunc_floor:
+ return replaceUnaryCall(CI, Builder, Intrinsic::floor);
+ case LibFunc_round:
+ return replaceUnaryCall(CI, Builder, Intrinsic::round);
+ case LibFunc_nearbyint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
+ case LibFunc_rint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::rint);
+ case LibFunc_trunc:
+ return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
+ case LibFunc_acos:
+ case LibFunc_acosh:
+ case LibFunc_asin:
+ case LibFunc_asinh:
+ case LibFunc_atan:
+ case LibFunc_atanh:
+ case LibFunc_cbrt:
+ case LibFunc_cosh:
+ case LibFunc_exp:
+ case LibFunc_exp10:
+ case LibFunc_expm1:
+ case LibFunc_sin:
+ case LibFunc_sinh:
+ case LibFunc_tanh:
+ if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName()))
+ return optimizeUnaryDoubleFP(CI, Builder, true);
+ return nullptr;
+ case LibFunc_copysign:
+ if (hasFloatVersion(CI->getCalledFunction()->getName()))
+ return optimizeBinaryDoubleFP(CI, Builder);
+ return nullptr;
+ case LibFunc_fminf:
+ case LibFunc_fmin:
+ case LibFunc_fminl:
+ case LibFunc_fmaxf:
+ case LibFunc_fmax:
+ case LibFunc_fmaxl:
+ return optimizeFMinFMax(CI, Builder);
+ case LibFunc_cabs:
+ case LibFunc_cabsf:
+ case LibFunc_cabsl:
+ return optimizeCAbs(CI, Builder);
+ default:
+ return nullptr;
+ }
+}
+
Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ // TODO: Split out the code below that operates on FP calls so that
+ // we can all non-FP calls with the StrictFP attribute to be
+ // optimized.
if (CI->isNoBuiltin())
return nullptr;
LibFunc Func;
Function *Callee = CI->getCalledFunction();
- StringRef FuncName = Callee->getName();
SmallVector<OperandBundleDef, 2> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
@@ -2057,15 +2216,19 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
bool isCallingConvC = isCallingConvCCompatible(CI);
// Command-line parameter overrides instruction attribute.
+ // This can't be moved to optimizeFloatingPointLibCall() because it may be
+ // used by the intrinsic optimizations.
if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
UnsafeFPShrink = EnableUnsafeFPShrink;
- else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra())
+ else if (isa<FPMathOperator>(CI) && CI->isFast())
UnsafeFPShrink = true;
// First, check for intrinsics.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
if (!isCallingConvC)
return nullptr;
+ // The FP intrinsics have corresponding constrained versions so we don't
+ // need to check for the StrictFP attribute here.
switch (II->getIntrinsicID()) {
case Intrinsic::pow:
return optimizePow(CI, Builder);
@@ -2106,32 +2269,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return nullptr;
if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
return V;
+ if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
+ return V;
switch (Func) {
- case LibFunc_cosf:
- case LibFunc_cos:
- case LibFunc_cosl:
- return optimizeCos(CI, Builder);
- case LibFunc_sinpif:
- case LibFunc_sinpi:
- case LibFunc_cospif:
- case LibFunc_cospi:
- return optimizeSinCosPi(CI, Builder);
- case LibFunc_powf:
- case LibFunc_pow:
- case LibFunc_powl:
- return optimizePow(CI, Builder);
- case LibFunc_exp2l:
- case LibFunc_exp2:
- case LibFunc_exp2f:
- return optimizeExp2(CI, Builder);
- case LibFunc_fabsf:
- case LibFunc_fabs:
- case LibFunc_fabsl:
- return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
- case LibFunc_sqrtf:
- case LibFunc_sqrt:
- case LibFunc_sqrtl:
- return optimizeSqrt(CI, Builder);
case LibFunc_ffs:
case LibFunc_ffsl:
case LibFunc_ffsll:
@@ -2160,18 +2300,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeFWrite(CI, Builder);
case LibFunc_fputs:
return optimizeFPuts(CI, Builder);
- case LibFunc_log:
- case LibFunc_log10:
- case LibFunc_log1p:
- case LibFunc_log2:
- case LibFunc_logb:
- return optimizeLog(CI, Builder);
case LibFunc_puts:
return optimizePuts(CI, Builder);
- case LibFunc_tan:
- case LibFunc_tanf:
- case LibFunc_tanl:
- return optimizeTan(CI, Builder);
case LibFunc_perror:
return optimizeErrorReporting(CI, Builder);
case LibFunc_vfprintf:
@@ -2179,46 +2309,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeErrorReporting(CI, Builder, 0);
case LibFunc_fputc:
return optimizeErrorReporting(CI, Builder, 1);
- case LibFunc_ceil:
- return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
- case LibFunc_floor:
- return replaceUnaryCall(CI, Builder, Intrinsic::floor);
- case LibFunc_round:
- return replaceUnaryCall(CI, Builder, Intrinsic::round);
- case LibFunc_nearbyint:
- return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
- case LibFunc_rint:
- return replaceUnaryCall(CI, Builder, Intrinsic::rint);
- case LibFunc_trunc:
- return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
- case LibFunc_acos:
- case LibFunc_acosh:
- case LibFunc_asin:
- case LibFunc_asinh:
- case LibFunc_atan:
- case LibFunc_atanh:
- case LibFunc_cbrt:
- case LibFunc_cosh:
- case LibFunc_exp:
- case LibFunc_exp10:
- case LibFunc_expm1:
- case LibFunc_sin:
- case LibFunc_sinh:
- case LibFunc_tanh:
- if (UnsafeFPShrink && hasFloatVersion(FuncName))
- return optimizeUnaryDoubleFP(CI, Builder, true);
- return nullptr;
- case LibFunc_copysign:
- if (hasFloatVersion(FuncName))
- return optimizeBinaryDoubleFP(CI, Builder);
- return nullptr;
- case LibFunc_fminf:
- case LibFunc_fmin:
- case LibFunc_fminl:
- case LibFunc_fmaxf:
- case LibFunc_fmax:
- case LibFunc_fmaxl:
- return optimizeFMinFMax(CI, Builder);
default:
return nullptr;
}
@@ -2228,9 +2318,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibCallSimplifier::LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
+ OptimizationRemarkEmitter &ORE,
function_ref<void(Instruction *, Value *)> Replacer)
- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
- Replacer(Replacer) {}
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+ UnsafeFPShrink(false), Replacer(Replacer) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// Indirect through the replacer used in this instance.
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
index e9a368f4faa4..968eb0208f43 100644
--- a/lib/Transforms/Utils/SplitModule.cpp
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -13,32 +13,51 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "split-module"
-
#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
#include <queue>
+#include <utility>
+#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "split-module"
+
namespace {
-typedef EquivalenceClasses<const GlobalValue *> ClusterMapType;
-typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType;
-typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType;
-}
+
+using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
+using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
+using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
+
+} // end anonymous namespace
static void addNonConstUser(ClusterMapType &GVtoClusterMap,
const GlobalValue *GV, const User *U) {
@@ -125,9 +144,9 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
};
- std::for_each(M->begin(), M->end(), recordGVSet);
- std::for_each(M->global_begin(), M->global_end(), recordGVSet);
- std::for_each(M->alias_begin(), M->alias_end(), recordGVSet);
+ llvm::for_each(M->functions(), recordGVSet);
+ llvm::for_each(M->globals(), recordGVSet);
+ llvm::for_each(M->aliases(), recordGVSet);
// Assigned all GVs to merged clusters while balancing number of objects in
// each.
@@ -147,7 +166,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
for (unsigned i = 0; i < N; ++i)
BalancinQueue.push(std::make_pair(i, 0));
- typedef std::pair<unsigned, ClusterMapType::iterator> SortType;
+ using SortType = std::pair<unsigned, ClusterMapType::iterator>;
+
SmallVector<SortType, 64> Sets;
SmallPtrSet<const GlobalValue *, 32> Visited;
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 20107553665f..3640541e63cc 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -1,4 +1,4 @@
-//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===//
+//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -57,25 +57,41 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "symbol-rewriter"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <string>
+#include <vector>
using namespace llvm;
using namespace SymbolRewriter;
+#define DEBUG_TYPE "symbol-rewriter"
+
static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
cl::desc("Symbol Rewrite Map"),
- cl::value_desc("filename"));
+ cl::value_desc("filename"),
+ cl::Hidden);
static void rewriteComdat(Module &M, GlobalObject *GO,
const std::string &Source,
@@ -92,8 +108,9 @@ static void rewriteComdat(Module &M, GlobalObject *GO,
}
namespace {
+
template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(llvm::Module::*Get)(StringRef) const>
+ ValueType *(Module::*Get)(StringRef) const>
class ExplicitRewriteDescriptor : public RewriteDescriptor {
public:
const std::string Source;
@@ -110,8 +127,10 @@ public:
}
};
+} // end anonymous namespace
+
template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(llvm::Module::*Get)(StringRef) const>
+ ValueType *(Module::*Get)(StringRef) const>
bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
bool Changed = false;
if (ValueType *S = (M.*Get)(Source)) {
@@ -128,10 +147,12 @@ bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
return Changed;
}
+namespace {
+
template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(llvm::Module::*Get)(StringRef) const,
+ ValueType *(Module::*Get)(StringRef) const,
iterator_range<typename iplist<ValueType>::iterator>
- (llvm::Module::*Iterator)()>
+ (Module::*Iterator)()>
class PatternRewriteDescriptor : public RewriteDescriptor {
public:
const std::string Pattern;
@@ -147,10 +168,12 @@ public:
}
};
+} // end anonymous namespace
+
template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(llvm::Module::*Get)(StringRef) const,
+ ValueType *(Module::*Get)(StringRef) const,
iterator_range<typename iplist<ValueType>::iterator>
- (llvm::Module::*Iterator)()>
+ (Module::*Iterator)()>
bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
performOnModule(Module &M) {
bool Changed = false;
@@ -178,55 +201,52 @@ performOnModule(Module &M) {
return Changed;
}
+namespace {
+
/// Represents a rewrite for an explicitly named (function) symbol. Both the
/// source function name and target function name of the transformation are
/// explicitly spelt out.
-typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function,
- llvm::Function, &llvm::Module::getFunction>
- ExplicitRewriteFunctionDescriptor;
+using ExplicitRewriteFunctionDescriptor =
+ ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
+ &Module::getFunction>;
/// Represents a rewrite for an explicitly named (global variable) symbol. Both
/// the source variable name and target variable name are spelt out. This
/// applies only to module level variables.
-typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
- llvm::GlobalVariable,
- &llvm::Module::getGlobalVariable>
- ExplicitRewriteGlobalVariableDescriptor;
+using ExplicitRewriteGlobalVariableDescriptor =
+ ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ GlobalVariable, &Module::getGlobalVariable>;
/// Represents a rewrite for an explicitly named global alias. Both the source
/// and target name are explicitly spelt out.
-typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
- llvm::GlobalAlias,
- &llvm::Module::getNamedAlias>
- ExplicitRewriteNamedAliasDescriptor;
+using ExplicitRewriteNamedAliasDescriptor =
+ ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
+ &Module::getNamedAlias>;
/// Represents a rewrite for a regular expression based pattern for functions.
/// A pattern for the function name is provided and a transformation for that
/// pattern to determine the target function name create the rewrite rule.
-typedef PatternRewriteDescriptor<RewriteDescriptor::Type::Function,
- llvm::Function, &llvm::Module::getFunction,
- &llvm::Module::functions>
- PatternRewriteFunctionDescriptor;
+using PatternRewriteFunctionDescriptor =
+ PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
+ &Module::getFunction, &Module::functions>;
/// Represents a rewrite for a global variable based upon a matching pattern.
/// Each global variable matching the provided pattern will be transformed as
/// described in the transformation pattern for the target. Applies only to
/// module level variables.
-typedef PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
- llvm::GlobalVariable,
- &llvm::Module::getGlobalVariable,
- &llvm::Module::globals>
- PatternRewriteGlobalVariableDescriptor;
+using PatternRewriteGlobalVariableDescriptor =
+ PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ GlobalVariable, &Module::getGlobalVariable,
+ &Module::globals>;
/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
/// aliases which match a given pattern. The provided transformation will be
/// applied to each of the matching names.
-typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
- llvm::GlobalAlias,
- &llvm::Module::getNamedAlias,
- &llvm::Module::aliases>
- PatternRewriteNamedAliasDescriptor;
-} // namespace
+using PatternRewriteNamedAliasDescriptor =
+ PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
+ &Module::getNamedAlias, &Module::aliases>;
+
+} // end anonymous namespace
bool RewriteMapParser::parse(const std::string &MapFile,
RewriteDescriptorList *DL) {
@@ -497,6 +517,7 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
}
namespace {
+
class RewriteSymbolsLegacyPass : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
@@ -510,9 +531,11 @@ private:
RewriteSymbolPass Impl;
};
+} // end anonymous namespace
+
char RewriteSymbolsLegacyPass::ID = 0;
-RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() {
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -523,9 +546,7 @@ RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
return Impl.runImpl(M);
}
-}
-namespace llvm {
PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
if (!runImpl(M))
return PreservedAnalyses::all();
@@ -550,7 +571,6 @@ void RewriteSymbolPass::loadAndParseMapFiles() {
for (const auto &MapFile : MapFiles)
Parser.parse(MapFile, &Descriptors);
}
-}
INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
false, false)
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 9385f825523c..ed444e4cf43c 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -15,7 +15,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 930972924c3c..8c9ecbc3503e 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,17 +13,36 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
+
using namespace llvm;
// Out of line method to get vtable etc for class.
@@ -85,7 +104,6 @@ struct MappingContext {
: VM(&VM), Materializer(Materializer) {}
};
-class MDNodeMapper;
class Mapper {
friend class MDNodeMapper;
@@ -175,7 +193,7 @@ class MDNodeMapper {
/// Data about a node in \a UniquedGraph.
struct Data {
bool HasChanged = false;
- unsigned ID = ~0u;
+ unsigned ID = std::numeric_limits<unsigned>::max();
TempMDNode Placeholder;
};
@@ -316,7 +334,7 @@ private:
void remapOperands(MDNode &N, OperandMapper mapOperand);
};
-} // end namespace
+} // end anonymous namespace
Value *Mapper::mapValue(const Value *V) {
ValueToValueMapTy::iterator I = getVM().find(V);
@@ -579,6 +597,7 @@ void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
}
namespace {
+
/// An entry in the worklist for the post-order traversal.
struct POTWorklistEntry {
MDNode *N; ///< Current node.
@@ -590,7 +609,8 @@ struct POTWorklistEntry {
POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {}
};
-} // end namespace
+
+} // end anonymous namespace
bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) {
assert(G.Info.empty() && "Expected a fresh traversal");
@@ -653,7 +673,7 @@ void MDNodeMapper::UniquedGraph::propagateChanges() {
if (D.HasChanged)
continue;
- if (none_of(N->operands(), [&](const Metadata *Op) {
+ if (llvm::none_of(N->operands(), [&](const Metadata *Op) {
auto Where = Info.find(Op);
return Where != Info.end() && Where->second.HasChanged;
}))
@@ -752,10 +772,11 @@ struct MapMetadataDisabler {
MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
VM.disableMapMetadata();
}
+
~MapMetadataDisabler() { VM.enableMapMetadata(); }
};
-} // end namespace
+} // end anonymous namespace
Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
// If the value already exists in the map, use it.
@@ -1037,11 +1058,13 @@ public:
explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) {
assert(!M.hasWorkToDo() && "Expected to be flushed");
}
+
~FlushingMapper() { M.flush(); }
+
Mapper *operator->() const { return &M; }
};
-} // end namespace
+} // end anonymous namespace
ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,