aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp1112
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp4
-rw-r--r--lib/Transforms/Scalar/BasicBlockPlacement.cpp6
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt3
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp36
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp6
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp200
-rw-r--r--lib/Transforms/Scalar/DCE.cpp10
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp7
-rw-r--r--lib/Transforms/Scalar/GEPSplitter.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp15
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp18
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp217
-rw-r--r--lib/Transforms/Scalar/LICM.cpp728
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp34
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp182
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp21
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp30
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp161
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp21
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp5
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp8
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp41
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp49
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp10
-rw-r--r--lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp6
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp27
-rw-r--r--lib/Transforms/Scalar/Sink.cpp5
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp4
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp68
32 files changed, 1310 insertions, 1749 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
deleted file mode 100644
index dcf14a6860da..000000000000
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ /dev/null
@@ -1,1112 +0,0 @@
-//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass removes redundant branch instructions. This algorithm was
-// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
-// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
-// Algorithm was created to remove array bound checks for strongly typed
-// languages. This implementation expands the idea and removes any conditional
-// branches that can be proved redundant, not only those used in array bound
-// checks. With the SSI representation, each variable has a
-// constraint. By analyzing these constraints we can prove that a branch is
-// redundant. When a branch is proved redundant it means that
-// one direction will always be taken; thus, we can change this branch into an
-// unconditional jump.
-// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
-// after ABCD to clean up the code.
-// This implementation was created based on the implementation of the ABCD
-// algorithm implemented for the compiler Jitrino.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "abcd"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-
-using namespace llvm;
-
-STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
-STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
-
-namespace {
-
-class ABCD : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
- ABCD() : FunctionPass(&ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
-
- bool runOnFunction(Function &F);
-
- private:
- /// Keep track of whether we've modified the program yet.
- bool modified;
-
- enum ProveResult {
- False = 0,
- Reduced = 1,
- True = 2
- };
-
- typedef ProveResult (*meet_function)(ProveResult, ProveResult);
- static ProveResult max(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::max(res1, res2);
- }
- static ProveResult min(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::min(res1, res2);
- }
-
- class Bound {
- public:
- Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
- Bound(const Bound &b, int cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
- Bound(const Bound &b, const APInt &cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
-
- /// Test if Bound is an upper bound
- bool isUpperBound() const { return upper_bound; }
-
- /// Get the bitwidth of this bound
- int32_t getBitWidth() const { return value.getBitWidth(); }
-
- /// Creates a Bound incrementing the one received
- static Bound createIncrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
- b.upper_bound);
- }
-
- /// Creates a Bound decrementing the one received
- static Bound createDecrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
- b.upper_bound);
- }
-
- /// Test if two bounds are equal
- static bool eq(const Bound *a, const Bound *b) {
- if (!a || !b) return false;
-
- assert(a->isUpperBound() == b->isUpperBound());
- return a->value == b->value;
- }
-
- /// Test if val is less than or equal to Bound b
- static bool leq(APInt val, const Bound &b) {
- return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
- }
-
- /// Test if Bound a is less then or equal to Bound
- static bool leq(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.sle(b.value) :
- a.value.sge(b.value);
- }
-
- /// Test if Bound a is less then Bound b
- static bool lt(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.slt(b.value) :
- a.value.sgt(b.value);
- }
-
- /// Test if Bound b is greater then or equal val
- static bool geq(const Bound &b, APInt val) {
- return leq(val, b);
- }
-
- /// Test if Bound a is greater then or equal Bound b
- static bool geq(const Bound &a, const Bound &b) {
- return leq(b, a);
- }
-
- private:
- APInt value;
- bool upper_bound;
- };
-
- /// This class is used to store results some parts of the graph,
- /// so information does not need to be recalculated. The maximum false,
- /// minimum true and minimum reduced results are stored
- class MemoizedResultChart {
- public:
- MemoizedResultChart() {}
- MemoizedResultChart(const MemoizedResultChart &other) {
- if (other.max_false)
- max_false.reset(new Bound(*other.max_false));
- if (other.min_true)
- min_true.reset(new Bound(*other.min_true));
- if (other.min_reduced)
- min_reduced.reset(new Bound(*other.min_reduced));
- }
-
- /// Returns the max false
- const Bound *getFalse() const { return max_false.get(); }
-
- /// Returns the min true
- const Bound *getTrue() const { return min_true.get(); }
-
- /// Returns the min reduced
- const Bound *getReduced() const { return min_reduced.get(); }
-
- /// Return the stored result for this bound
- ProveResult getResult(const Bound &bound) const;
-
- /// Stores a false found
- void addFalse(const Bound &bound);
-
- /// Stores a true found
- void addTrue(const Bound &bound);
-
- /// Stores a Reduced found
- void addReduced(const Bound &bound);
-
- /// Clears redundant reduced
- /// If a min_true is smaller than a min_reduced then the min_reduced
- /// is unnecessary and then removed. It also works for min_reduced
- /// begin smaller than max_false.
- void clearRedundantReduced();
-
- void clear() {
- max_false.reset();
- min_true.reset();
- min_reduced.reset();
- }
-
- private:
- OwningPtr<Bound> max_false, min_true, min_reduced;
- };
-
- /// This class stores the result found for a node of the graph,
- /// so these results do not need to be recalculated, only searched for.
- class MemoizedResult {
- public:
- /// Test if there is true result stored from b to a
- /// that is less then the bound
- bool hasTrue(Value *b, const Bound &bound) const {
- const Bound *trueBound = map.lookup(b).getTrue();
- return trueBound && Bound::leq(*trueBound, bound);
- }
-
- /// Test if there is false result stored from b to a
- /// that is less then the bound
- bool hasFalse(Value *b, const Bound &bound) const {
- const Bound *falseBound = map.lookup(b).getFalse();
- return falseBound && Bound::leq(*falseBound, bound);
- }
-
- /// Test if there is reduced result stored from b to a
- /// that is less then the bound
- bool hasReduced(Value *b, const Bound &bound) const {
- const Bound *reducedBound = map.lookup(b).getReduced();
- return reducedBound && Bound::leq(*reducedBound, bound);
- }
-
- /// Returns the stored bound for b
- ProveResult getBoundResult(Value *b, const Bound &bound) {
- return map[b].getResult(bound);
- }
-
- /// Clears the map
- void clear() {
- DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
- DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
- for (; begin != end; ++begin) {
- begin->second.clear();
- }
- map.clear();
- }
-
- /// Stores the bound found
- void updateBound(Value *b, const Bound &bound, const ProveResult res);
-
- private:
- // Maps a nod in the graph with its results found.
- DenseMap<Value*, MemoizedResultChart> map;
- };
-
- /// This class represents an edge in the inequality graph used by the
- /// ABCD algorithm. An edge connects node v to node u with a value c if
- /// we could infer a constraint v <= u + c in the source program.
- class Edge {
- public:
- Edge(Value *V, APInt val, bool upper)
- : vertex(V), value(val), upper_bound(upper) {}
-
- Value *getVertex() const { return vertex; }
- const APInt &getValue() const { return value; }
- bool isUpperBound() const { return upper_bound; }
-
- private:
- Value *vertex;
- APInt value;
- bool upper_bound;
- };
-
- /// Weighted and Directed graph to represent constraints.
- /// There is one type of constraint, a <= b + X, which will generate an
- /// edge from b to a with weight X.
- class InequalityGraph {
- public:
-
- /// Adds an edge from V_from to V_to with weight value
- void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
-
- /// Test if there is a node V
- bool hasNode(Value *V) const { return graph.count(V); }
-
- /// Test if there is any edge from V in the upper direction
- bool hasEdge(Value *V, bool upper) const;
-
- /// Returns all edges pointed by vertex V
- SmallVector<Edge, 16> getEdges(Value *V) const {
- return graph.lookup(V);
- }
-
- /// Prints the graph in dot format.
- /// Blue edges represent upper bound and Red lower bound.
- void printGraph(raw_ostream &OS, Function &F) const {
- printHeader(OS, F);
- printBody(OS);
- printFooter(OS);
- }
-
- /// Clear the graph
- void clear() {
- graph.clear();
- }
-
- private:
- DenseMap<Value *, SmallVector<Edge, 16> > graph;
-
- /// Prints the header of the dot file
- void printHeader(raw_ostream &OS, Function &F) const;
-
- /// Prints the footer of the dot file
- void printFooter(raw_ostream &OS) const {
- OS << "}\n";
- }
-
- /// Prints the body of the dot file
- void printBody(raw_ostream &OS) const;
-
- /// Prints vertex source to the dot file
- void printVertex(raw_ostream &OS, Value *source) const;
-
- /// Prints the edge to the dot file
- void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
-
- void printName(raw_ostream &OS, Value *info) const;
- };
-
- /// Iterates through all BasicBlocks, if the Terminator Instruction
- /// uses an Comparator Instruction, all operands of this comparator
- /// are sent to be transformed to SSI. Only Instruction operands are
- /// transformed.
- void createSSI(Function &F);
-
- /// Creates the graphs for this function.
- /// It will look for all comparators used in branches, and create them.
- /// These comparators will create constraints for any instruction as an
- /// operand.
- void executeABCD(Function &F);
-
- /// Seeks redundancies in the comparator instruction CI.
- /// If the ABCD algorithm can prove that the comparator CI always
- /// takes one way, then the Terminator Instruction TI is substituted from
- /// a conditional branch to a unconditional one.
- /// This code basically receives a comparator, and verifies which kind of
- /// instruction it is. Depending on the kind of instruction, we use different
- /// strategies to prove its redundancy.
- void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Substitutes Terminator Instruction TI, that is a conditional branch,
- /// with one unconditional branch. Succ_edge determines if the new
- /// unconditional edge will be the first or second edge of the former TI
- /// instruction.
- void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
-
- /// When an conditional branch is removed, the BasicBlock that is no longer
- /// reachable will have problems in phi functions. This method fixes these
- /// phis removing the former BasicBlock from the list of incoming BasicBlocks
- /// of all phis. In case the phi remains with no predecessor it will be
- /// marked to be removed later.
- void fixPhi(BasicBlock *BB, BasicBlock *Succ);
-
- /// Removes phis that have no predecessor
- void removePhis();
-
- /// Creates constraints for Instructions.
- /// If the constraint for this instruction has already been created
- /// nothing is done.
- void createConstraintInstruction(Instruction *I);
-
- /// Creates constraints for Binary Operators.
- /// It will create constraints only for addition and subtraction,
- /// the other binary operations are not treated by ABCD.
- /// For additions in the form a = b + X and a = X + b, where X is a constant,
- /// the constraint a <= b + X can be obtained. For this constraint, an edge
- /// a->b with weight X is added to the lower bound graph, and an edge
- /// b->a with weight -X is added to the upper bound graph.
- /// Only subtractions in the format a = b - X is used by ABCD.
- /// Edges are created using the same semantic as addition.
- void createConstraintBinaryOperator(BinaryOperator *BO);
-
- /// Creates constraints for Comparator Instructions.
- /// Only comparators that have any of the following operators
- /// are used to create constraints: >=, >, <=, <. And only if
- /// at least one operand is an Instruction. In a Comparator Instruction
- /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
- /// t and f represent sigma for operands in true and false branches. The
- /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
- /// b_f <= b. There are two more constraints that depend on the operator.
- /// For the operator <= : a_t <= b_t and b_f <= a_f-1
- /// For the operator < : a_t <= b_t-1 and b_f <= a_f
- /// For the operator >= : b_t <= a_t and a_f <= b_f-1
- /// For the operator > : b_t <= a_t-1 and a_f <= b_f
- void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Creates constraints for PHI nodes.
- /// In a PHI node a = phi(b,c) we can create the constraint
- /// a<= max(b,c). With this constraint there will be the edges,
- /// b->a and c->a with weight 0 in the lower bound graph, and the edges
- /// a->b and a->c with weight 0 in the upper bound graph.
- void createConstraintPHINode(PHINode *PN);
-
- /// Given a binary operator, we are only interest in the case
- /// that one operand is an Instruction and the other is a ConstantInt. In
- /// this case the method returns true, otherwise false. It also obtains the
- /// Instruction and ConstantInt from the BinaryOperator and returns it.
- bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2);
-
- /// This method creates a constraint between a Sigma and an Instruction.
- /// These constraints are created as soon as we find a comparator that uses a
- /// SSI variable.
- void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f);
-
- /// If PN_op1 and PN_o2 are different from NULL, create a constraint
- /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
- /// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value);
-
- /// Returns the sigma representing the Instruction I in BasicBlock BB.
- /// Returns NULL in case there is no sigma for this Instruction in this
- /// Basic Block. This methods assume that sigmas are the first instructions
- /// in a block, and that there can be only two sigmas in a block. So it will
- /// only look on the first two instructions of BasicBlock BB.
- PHINode *findSigma(BasicBlock *BB, Instruction *I);
-
- /// Original ABCD algorithm to prove redundant checks.
- /// This implementation works on any kind of inequality branch.
- bool demandProve(Value *a, Value *b, int c, bool upper_bound);
-
- /// Prove that distance between b and a is <= bound
- ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
-
- /// Updates the distance value for a and b
- void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
- meet_function meet);
-
- InequalityGraph inequality_graph;
- MemoizedResult mem_result;
- DenseMap<Value*, const Bound*> active;
- SmallPtrSet<Value*, 16> created;
- SmallVector<PHINode *, 16> phis_to_remove;
-};
-
-} // end anonymous namespace.
-
-char ABCD::ID = 0;
-static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand");
-
-
-bool ABCD::runOnFunction(Function &F) {
- modified = false;
- createSSI(F);
- executeABCD(F);
- DEBUG(inequality_graph.printGraph(dbgs(), F));
- removePhis();
-
- inequality_graph.clear();
- mem_result.clear();
- active.clear();
- created.clear();
- phis_to_remove.clear();
- return modified;
-}
-
-/// Iterates through all BasicBlocks, if the Terminator Instruction
-/// uses an Comparator Instruction, all operands of this comparator
-/// are sent to be transformed to SSI. Only Instruction operands are
-/// transformed.
-void ABCD::createSSI(Function &F) {
- SSI *ssi = &getAnalysis<SSI>();
-
- SmallVector<Instruction *, 16> Insts;
-
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
- modified = true; // XXX: but yet createSSI might do nothing
- Insts.push_back(I);
- }
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
- modified = true;
- Insts.push_back(I);
- }
- }
- }
- ssi->createSSI(Insts);
-}
-
-/// Creates the graphs for this function.
-/// It will look for all comparators used in branches, and create them.
-/// These comparators will create constraints for any instruction as an
-/// operand.
-void ABCD::executeABCD(Function &F) {
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
- if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
- continue;
-
- createConstraintCmpInst(ICI, TI);
- seekRedundancy(ICI, TI);
- }
-}
-
-/// Seeks redundancies in the comparator instruction CI.
-/// If the ABCD algorithm can prove that the comparator CI always
-/// takes one way, then the Terminator Instruction TI is substituted from
-/// a conditional branch to a unconditional one.
-/// This code basically receives a comparator, and verifies which kind of
-/// instruction it is. Depending on the kind of instruction, we use different
-/// strategies to prove its redundancy.
-void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
- CmpInst::Predicate Pred = ICI->getPredicate();
-
- Value *source, *dest;
- int distance1, distance2;
- bool upper;
-
- switch(Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- upper = false;
- distance1 = 1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- upper = false;
- distance1 = 0;
- distance2 = -1;
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- upper = true;
- distance1 = -1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- upper = true;
- distance1 = 0;
- distance2 = 1;
- break;
-
- default:
- return;
- }
-
- ++NumBranchTested;
- source = ICI->getOperand(0);
- dest = ICI->getOperand(1);
- if (demandProve(dest, source, distance1, upper)) {
- removeRedundancy(TI, true);
- } else if (demandProve(dest, source, distance2, !upper)) {
- removeRedundancy(TI, false);
- }
-}
-
-/// Substitutes Terminator Instruction TI, that is a conditional branch,
-/// with one unconditional branch. Succ_edge determines if the new
-/// unconditional edge will be the first or second edge of the former TI
-/// instruction.
-void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
- BasicBlock *Succ;
- if (Succ_edge) {
- Succ = TI->getSuccessor(0);
- fixPhi(TI->getParent(), TI->getSuccessor(1));
- } else {
- Succ = TI->getSuccessor(1);
- fixPhi(TI->getParent(), TI->getSuccessor(0));
- }
-
- BranchInst::Create(Succ, TI);
- TI->eraseFromParent(); // XXX: invoke
- ++NumBranchRemoved;
- modified = true;
-}
-
-/// When an conditional branch is removed, the BasicBlock that is no longer
-/// reachable will have problems in phi functions. This method fixes these
-/// phis removing the former BasicBlock from the list of incoming BasicBlocks
-/// of all phis. In case the phi remains with no predecessor it will be
-/// marked to be removed later.
-void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
- BasicBlock::iterator begin = Succ->begin();
- while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
- PN->removeIncomingValue(BB, false);
- if (PN->getNumIncomingValues() == 0)
- phis_to_remove.push_back(PN);
- }
-}
-
-/// Removes phis that have no predecessor
-void ABCD::removePhis() {
- for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
- PHINode *PN = phis_to_remove[i];
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- PN->eraseFromParent();
- }
-}
-
-/// Creates constraints for Instructions.
-/// If the constraint for this instruction has already been created
-/// nothing is done.
-void ABCD::createConstraintInstruction(Instruction *I) {
- // Test if this instruction has not been created before
- if (created.insert(I)) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- createConstraintBinaryOperator(BO);
- } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
- createConstraintPHINode(PN);
- }
- }
-}
-
-/// Creates constraints for Binary Operators.
-/// It will create constraints only for addition and subtraction,
-/// the other binary operations are not treated by ABCD.
-/// For additions in the form a = b + X and a = X + b, where X is a constant,
-/// the constraint a <= b + X can be obtained. For this constraint, an edge
-/// a->b with weight X is added to the lower bound graph, and an edge
-/// b->a with weight -X is added to the upper bound graph.
-/// Only subtractions in the format a = b - X is used by ABCD.
-/// Edges are created using the same semantic as addition.
-void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
- Instruction *I1 = NULL, *I2 = NULL;
- ConstantInt *CI1 = NULL, *CI2 = NULL;
-
- // Test if an operand is an Instruction and the other is a Constant
- if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
- return;
-
- Instruction *I = 0;
- APInt value;
-
- switch (BO->getOpcode()) {
- case Instruction::Add:
- if (I1) {
- I = I1;
- value = CI2->getValue();
- } else if (I2) {
- I = I2;
- value = CI1->getValue();
- }
- break;
-
- case Instruction::Sub:
- // Instructions like a = X-b, where X is a constant are not represented
- // in the graph.
- if (!I1)
- return;
-
- I = I1;
- value = -CI2->getValue();
- break;
-
- default:
- return;
- }
-
- inequality_graph.addEdge(I, BO, value, true);
- inequality_graph.addEdge(BO, I, -value, false);
- createConstraintInstruction(I);
-}
-
-/// Given a binary operator, we are only interest in the case
-/// that one operand is an Instruction and the other is a ConstantInt. In
-/// this case the method returns true, otherwise false. It also obtains the
-/// Instruction and ConstantInt from the BinaryOperator and returns it.
-bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2) {
- Value *op1 = BO->getOperand(0);
- Value *op2 = BO->getOperand(1);
-
- if ((*I1 = dyn_cast<Instruction>(op1))) {
- if ((*C2 = dyn_cast<ConstantInt>(op2)))
- return true; // First is Instruction and second ConstantInt
-
- return false; // Both are Instruction
- } else {
- if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
- (*I2 = dyn_cast<Instruction>(op2)))
- return true; // First is ConstantInt and second Instruction
-
- return false; // Both are not Instruction
- }
-}
-
-/// Creates constraints for Comparator Instructions.
-/// Only comparators that have any of the following operators
-/// are used to create constraints: >=, >, <=, <. And only if
-/// at least one operand is an Instruction. In a Comparator Instruction
-/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-/// t and f represent sigma for operands in true and false branches. The
-/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-/// b_f <= b. There are two more constraints that depend on the operator.
-/// For the operator <= : a_t <= b_t and b_f <= a_f-1
-/// For the operator < : a_t <= b_t-1 and b_f <= a_f
-/// For the operator >= : b_t <= a_t and a_f <= b_f-1
-/// For the operator > : b_t <= a_t-1 and a_f <= b_f
-void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
- Value *V_op1 = ICI->getOperand(0);
- Value *V_op2 = ICI->getOperand(1);
-
- if (!V_op1->getType()->isIntegerTy())
- return;
-
- Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
- Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
-
- // Test if at least one operand is an Instruction
- if (!I_op1 && !I_op2)
- return;
-
- BasicBlock *BB_succ_t = TI->getSuccessor(0);
- BasicBlock *BB_succ_f = TI->getSuccessor(1);
-
- PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
- *SIG_op2_t = NULL, *SIG_op2_f = NULL;
-
- createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
- createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
-
- int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
- APInt MinusOne = APInt::getAllOnesValue(width);
- APInt Zero = APInt::getNullValue(width);
-
- CmpInst::Predicate Pred = ICI->getPredicate();
- ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
- ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
- switch (Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
- break;
-
- default:
- break;
- }
-
- if (I_op1)
- createConstraintInstruction(I_op1);
- if (I_op2)
- createConstraintInstruction(I_op2);
-}
-
-/// Creates constraints for PHI nodes.
-/// In a PHI node a = phi(b,c) we can create the constraint
-/// a<= max(b,c). With this constraint there will be the edges,
-/// b->a and c->a with weight 0 in the lower bound graph, and the edges
-/// a->b and a->c with weight 0 in the upper bound graph.
-void ABCD::createConstraintPHINode(PHINode *PN) {
- // FIXME: We really want to disallow sigma nodes, but I don't know the best
- // way to detect the other than this.
- if (PN->getNumOperands() == 2) return;
-
- int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- createConstraintInstruction(I);
- }
- inequality_graph.addEdge(V, PN, APInt(width, 0), true);
- inequality_graph.addEdge(V, PN, APInt(width, 0), false);
- }
-}
-
-/// This method creates a constraint between a Sigma and an Instruction.
-/// These constraints are created as soon as we find a comparator that uses a
-/// SSI variable.
-void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f) {
- *SIG_op_t = findSigma(BB_succ_t, I_op);
- *SIG_op_f = findSigma(BB_succ_f, I_op);
-
- if (*SIG_op_t) {
- int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- }
- if (*SIG_op_f) {
- int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- }
-}
-
-/// If PN_op1 and PN_o2 are different from NULL, create a constraint
-/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-/// with the respective V_op#, if V_op# is a ConstantInt.
-void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value) {
- if (SIG_op1 && SIG_op2) {
- inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
- } else if (SIG_op1 && V_op2) {
- inequality_graph.addEdge(V_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
- } else if (SIG_op2 && V_op1) {
- inequality_graph.addEdge(SIG_op2, V_op1, value, true);
- inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
- }
-}
-
-/// Returns the sigma representing the Instruction I in BasicBlock BB.
-/// Returns NULL in case there is no sigma for this Instruction in this
-/// Basic Block. This methods assume that sigmas are the first instructions
-/// in a block, and that there can be only two sigmas in a block. So it will
-/// only look on the first two instructions of BasicBlock BB.
-PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
- // BB has more than one predecessor, BB cannot have sigmas.
- if (I == NULL || BB->getSinglePredecessor() == NULL)
- return NULL;
-
- BasicBlock::iterator begin = BB->begin();
- BasicBlock::iterator end = BB->end();
-
- for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
- Instruction *I_succ = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I_succ))
- if (PN->getIncomingValue(0) == I)
- return PN;
- }
-
- return NULL;
-}
-
-/// Original ABCD algorithm to prove redundant checks.
-/// This implementation works on any kind of inequality branch.
-bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
- int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
- Bound bound(APInt(width, c), upper_bound);
-
- mem_result.clear();
- active.clear();
-
- ProveResult res = prove(a, b, bound, 0);
- return res != False;
-}
-
-/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
- unsigned level) {
- // if (C[b-a<=e] == True for some e <= bound
- // Same or stronger difference was already proven
- if (mem_result.hasTrue(b, bound))
- return True;
-
- // if (C[b-a<=e] == False for some e >= bound
- // Same or weaker difference was already disproved
- if (mem_result.hasFalse(b, bound))
- return False;
-
- // if (C[b-a<=e] == Reduced for some e <= bound
- // b is on a cycle that was reduced for same or stronger difference
- if (mem_result.hasReduced(b, bound))
- return Reduced;
-
- // traversal reached the source vertex
- if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
- return True;
-
- // if b has no predecessor then fail
- if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
- return False;
-
- // a cycle was encountered
- if (active.count(b)) {
- if (Bound::leq(*active.lookup(b), bound))
- return Reduced; // a "harmless" cycle
-
- return False; // an amplifying cycle
- }
-
- active[b] = &bound;
- PHINode *PN = dyn_cast<PHINode>(b);
-
- // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
- // value, then it is a phi, if it has 1 incoming value it is a sigma.
- if (PN && PN->getNumIncomingValues() > 1)
- updateMemDistance(a, b, bound, level, min);
- else
- updateMemDistance(a, b, bound, level, max);
-
- active.erase(b);
-
- ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
- return res;
-}
-
-/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
- unsigned level, meet_function meet) {
- ABCD::ProveResult res = (meet == max) ? False : True;
-
- SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
- SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
-
- for (; begin != end ; ++begin) {
- if (((res >= Reduced) && (meet == max)) ||
- ((res == False) && (meet == min))) {
- break;
- }
- const Edge &in = *begin;
- if (in.isUpperBound() == bound.isUpperBound()) {
- Value *succ = in.getVertex();
- res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
- level+1));
- }
- }
-
- mem_result.updateBound(b, bound, res);
-}
-
-/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
- if (max_false && Bound::leq(bound, *max_false))
- return False;
- if (min_true && Bound::leq(*min_true, bound))
- return True;
- if (min_reduced && Bound::leq(*min_reduced, bound))
- return Reduced;
- return False;
-}
-
-/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
- if (!max_false || Bound::leq(*max_false, bound))
- max_false.reset(new Bound(bound));
-
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
- if (Bound::eq(max_false.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
- if (!min_true || Bound::leq(bound, *min_true))
- min_true.reset(new Bound(bound));
-
- if (Bound::eq(min_true.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
- if (Bound::eq(min_true.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
- if (!min_reduced || Bound::leq(bound, *min_reduced))
- min_reduced.reset(new Bound(bound));
-
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-}
-
-/// Clears redundant reduced
-/// If a min_true is smaller than a min_reduced then the min_reduced
-/// is unnecessary and then removed. It also works for min_reduced
-/// begin smaller than max_false.
-void ABCD::MemoizedResultChart::clearRedundantReduced() {
- if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
- min_reduced.reset();
- if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
- min_reduced.reset();
-}
-
-/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
- const ProveResult res) {
- if (res == False) {
- map[b].addFalse(bound);
- } else if (res == True) {
- map[b].addTrue(bound);
- } else {
- map[b].addReduced(bound);
- }
-}
-
-/// Adds an edge from V_from to V_to with weight value
-void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
- APInt value, bool upper) {
- assert(V_from->getType() == V_to->getType());
- assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
- value.getBitWidth());
-
- graph[V_from].push_back(Edge(V_to, value, upper));
-}
-
-/// Test if there is any edge from V in the upper direction
-bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
- SmallVector<Edge, 16> it = graph.lookup(V);
-
- SmallVector<Edge, 16>::iterator begin = it.begin();
- SmallVector<Edge, 16>::iterator end = it.end();
- for (; begin != end; ++begin) {
- if (begin->isUpperBound() == upper) {
- return true;
- }
- }
- return false;
-}
-
-/// Prints the header of the dot file
-void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
- OS << "digraph dotgraph {\n";
- OS << "label=\"Inequality Graph for \'";
- OS << F.getNameStr() << "\' function\";\n";
- OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
-}
-
-/// Prints the body of the dot file
-void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
- graph.begin(), end = graph.end();
-
- for (; begin != end ; ++begin) {
- SmallVector<Edge, 16>::const_iterator begin_par =
- begin->second.begin(), end_par = begin->second.end();
- Value *source = begin->first;
-
- printVertex(OS, source);
-
- for (; begin_par != end_par ; ++begin_par) {
- const Edge &edge = *begin_par;
- printEdge(OS, source, edge);
- }
- }
-}
-
-/// Prints vertex source to the dot file
-///
-void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " [label=\"{";
- printName(OS, source);
- OS << "}\"];\n";
-}
-
-/// Prints the edge to the dot file
-void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
- const Edge &edge) const {
- Value *dest = edge.getVertex();
- APInt value = edge.getValue();
- bool upper = edge.isUpperBound();
-
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " -> ";
- OS << "\"";
- printName(OS, dest);
- OS << "\"";
- OS << " [label=\"" << value << "\"";
- if (upper) {
- OS << "color=\"blue\"";
- } else {
- OS << "color=\"red\"";
- }
- OS << "];\n";
-}
-
-void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
- OS << *CI;
- } else {
- if (!info->hasName()) {
- info->setName("V");
- }
- OS << info->getNameStr();
- }
-}
-
-/// createABCDPass - The public interface to this file...
-FunctionPass *llvm::createABCDPass() {
- return new ABCD();
-}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 2d19467ce746..ada086e9db76 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
struct ADCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(&ID) {}
+ ADCE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function& F);
@@ -45,7 +45,7 @@ namespace {
}
char ADCE::ID = 0;
-static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index 54533f50405f..b144678c6a0e 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
namespace {
struct BlockPlacement : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(&ID) {}
+ BlockPlacement() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -74,8 +74,8 @@ namespace {
}
char BlockPlacement::ID = 0;
-static RegisterPass<BlockPlacement>
-X("block-placement", "Profile Guided Basic Block Placement");
+INITIALIZE_PASS(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false);
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 1a3b10cc9baa..b7598eace536 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,9 +1,9 @@
add_llvm_library(LLVMScalarOpts
- ABCD.cpp
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
+ CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
GEPSplitter.cpp
@@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts
LoopStrengthReduce.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
+ LowerAtomic.cpp
MemCpyOptimizer.cpp
Reassociate.cpp
Reg2Mem.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 272066c8c0c4..e07b761e589c 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -41,6 +42,11 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+ cl::desc("Split critical edges during codegen prepare"),
+ cl::init(true), cl::Hidden);
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -54,7 +60,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {}
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -82,8 +88,8 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-static RegisterPass<CodeGenPrepare> X("codegenprepare",
- "Optimize for code generation");
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false);
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
@@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
- if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
@@ -548,9 +554,9 @@ protected:
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
- - CallInst::ArgOffset)))
- return SizeCI->isAllOnesValue();
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+ return SizeCI->isAllOnesValue();
return false;
}
};
@@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
// Split all critical edges where the dest block has a PHI.
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
+ if (CriticalEdgeSplit) {
+ TerminatorInst *BBTI = BB.getTerminator();
+ if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
}
}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index ea208135739d..a0ea369d0cad 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
struct ConstantPropagation : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(&ID) {}
+ ConstantPropagation() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -45,8 +45,8 @@ namespace {
}
char ConstantPropagation::ID = 0;
-static RegisterPass<ConstantPropagation>
-X("constprop", "Simple constant propagation");
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false);
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 000000000000..0d4e45de3466
--- /dev/null
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,200 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis, "Number of phis propagated");
+STATISTIC(NumSelects, "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps, "Number of comparisons propagated");
+
+namespace {
+ class CorrelatedValuePropagation : public FunctionPass {
+ LazyValueInfo *LVI;
+
+ bool processSelect(SelectInst *SI);
+ bool processPHI(PHINode *P);
+ bool processMemAccess(Instruction *I);
+ bool processCmp(CmpInst *C);
+
+ public:
+ static char ID;
+ CorrelatedValuePropagation(): FunctionPass(ID) { }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ }
+ };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false);
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+ return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+ if (S->getType()->isVectorTy()) return false;
+ if (isa<Constant>(S->getOperand(0))) return false;
+
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ if (!C) return false;
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return false;
+
+ S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+ S->eraseFromParent();
+
+ ++NumSelects;
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+ bool Changed = false;
+
+ BasicBlock *BB = P->getParent();
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ Value *Incoming = P->getIncomingValue(i);
+ if (isa<Constant>(Incoming)) continue;
+
+ Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+ P->getIncomingBlock(i),
+ BB);
+ if (!C) continue;
+
+ P->setIncomingValue(i, C);
+ Changed = true;
+ }
+
+ if (Value *ConstVal = P->hasConstantValue()) {
+ P->replaceAllUsesWith(ConstVal);
+ P->eraseFromParent();
+ Changed = true;
+ }
+
+ ++NumPhis;
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+ Value *Pointer = 0;
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ Pointer = L->getPointerOperand();
+ else
+ Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+ if (isa<Constant>(Pointer)) return false;
+
+ Constant *C = LVI->getConstant(Pointer, I->getParent());
+ if (!C) return false;
+
+ ++NumMemAccess;
+ I->replaceUsesOfWith(Pointer, C);
+ return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out. Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge. If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+ Value *Op0 = C->getOperand(0);
+ if (isa<Instruction>(Op0) &&
+ cast<Instruction>(Op0)->getParent() == C->getParent())
+ return false;
+
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return false;
+
+ pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+ if (PI == PE) return false;
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Result == LazyValueInfo::Unknown) return false;
+
+ ++PI;
+ while (PI != PE) {
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Res != Result) return false;
+ ++PI;
+ }
+
+ ++NumCmps;
+
+ if (Result == LazyValueInfo::True)
+ C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+ else
+ C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+ C->eraseFromParent();
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ bool FnChanged = false;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool BBChanged = false;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *II = BI++;
+ switch (II->getOpcode()) {
+ case Instruction::Select:
+ BBChanged |= processSelect(cast<SelectInst>(II));
+ break;
+ case Instruction::PHI:
+ BBChanged |= processPHI(cast<PHINode>(II));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ BBChanged |= processCmp(cast<CmpInst>(II));
+ break;
+ case Instruction::Load:
+ case Instruction::Store:
+ BBChanged |= processMemAccess(II);
+ break;
+ }
+ }
+
+ // Propagating correlated values might leave cruft around.
+ // Try to clean it up before we continue.
+ if (BBChanged)
+ SimplifyInstructionsInBlock(FI);
+
+ FnChanged |= BBChanged;
+ }
+
+ return FnChanged;
+}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 39940c35da5d..87ea8038356a 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,7 @@ namespace {
//
struct DeadInstElimination : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(&ID) {}
+ DeadInstElimination() : BasicBlockPass(ID) {}
virtual bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -56,8 +56,8 @@ namespace {
}
char DeadInstElimination::ID = 0;
-static RegisterPass<DeadInstElimination>
-X("die", "Dead Instruction Elimination");
+INITIALIZE_PASS(DeadInstElimination, "die",
+ "Dead Instruction Elimination", false, false);
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
@@ -70,7 +70,7 @@ namespace {
//
struct DCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(&ID) {}
+ DCE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -81,7 +81,7 @@ namespace {
}
char DCE::ID = 0;
-static RegisterPass<DCE> Y("dce", "Dead Code Elimination");
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e047e4ffa151..c8fd9d9fa556 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -40,7 +40,7 @@ namespace {
TargetData *TD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(&ID) {}
+ DSE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
bool Changed = false;
@@ -82,7 +82,7 @@ namespace {
}
char DSE::ID = 0;
-static RegisterPass<DSE> X("dse", "Dead Store Elimination");
+INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
continue;
- } else if (CallSite::get(BBI).getInstruction() != 0) {
+ } else if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't
// be undeadifying any of our pointers.
- CallSite CS = CallSite::get(BBI);
if (AA.doesNotAccessMemory(CS))
continue;
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
index 610a41dae44b..53dd06d24bb5 100644
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,13 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
public:
static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(&ID) {}
+ explicit GEPSplitter() : FunctionPass(ID) {}
};
}
char GEPSplitter::ID = 0;
-static RegisterPass<GEPSplitter> X("split-geps",
- "split complex GEPs into simple GEPs");
+INITIALIZE_PASS(GEPSplitter, "split-geps",
+ "split complex GEPs into simple GEPs", false, false);
FunctionPass *llvm::createGEPSplitterPass() {
return new GEPSplitter();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 88b67768fa5d..c62ce1f27f64 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -165,7 +165,6 @@ namespace {
Expression create_expression(CastInst* C);
Expression create_expression(GetElementPtrInst* G);
Expression create_expression(CallInst* C);
- Expression create_expression(Constant* C);
Expression create_expression(ExtractValueInst* C);
Expression create_expression(InsertValueInst* C);
@@ -665,7 +664,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
+ : FunctionPass(ID), NoLoads(noloads), MD(0) { }
private:
bool NoLoads;
@@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
}
-static RegisterPass<GVN> X("gvn",
- "Global Value Numbering");
+INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
@@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) {
for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
UI != E; ++UI)
- if (PHINode* use_phi = dyn_cast<PHINode>(UI))
+ if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
if (use_phi->getParent() == inst->getParent())
return false;
@@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
- SSAUpdate.Initialize(LI);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
const Type *LoadTy = LI->getType();
@@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) {
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
uint32_t ValNo = VN.lookup(CurInst);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b5c9dd881df8..af2eafc47cbf 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(&ID) {}
+ IndVarSimplify() : LoopPass(ID) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -102,7 +102,7 @@ namespace {
void RewriteNonIntegerIVs(Loop *L);
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- Value *IndVar,
+ PHINode *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter);
@@ -117,8 +117,8 @@ namespace {
}
char IndVarSimplify::ID = 0;
-static RegisterPass<IndVarSimplify>
-X("indvars", "Canonicalize Induction Variables");
+INITIALIZE_PASS(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false);
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() {
/// is actually a much broader range than just linear tests.
ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
- Value *IndVar,
+ PHINode *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter) {
@@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = L->getCanonicalInductionVariableIncrement();
+ CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
} else {
// We have to use the preincremented value...
RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
@@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that we know the largest of the induction variable expressions
// in this loop, insert a canonical induction variable of the largest size.
- Value *IndVar = 0;
+ PHINode *IndVar = 0;
if (NeedCannIV) {
// Check to see if the loop already has any canonical-looking induction
// variables. If any are present and wider than the planned canonical
@@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
Value::use_iterator IncrUse = Incr->use_begin();
- Instruction *U1 = cast<Instruction>(IncrUse++);
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
if (IncrUse == Incr->use_end()) return;
- Instruction *U2 = cast<Instruction>(IncrUse++);
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
if (IncrUse != Incr->use_end()) return;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index edce14cd92ea..104d5aecbdd3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -45,7 +46,10 @@ Threshold("jump-threading-threshold",
// Turn on use of LazyValueInfo.
static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+EnableLVI("enable-jump-threading-lvi",
+ cl::desc("Use LVI for jump threading"),
+ cl::init(true),
+ cl::ReallyHidden);
@@ -74,15 +78,32 @@ namespace {
#else
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(&ID) {}
+ JumpThreading() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (EnableLVI)
+ if (EnableLVI) {
AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
+ }
}
void FindLoopHeaders(Function &F);
@@ -111,8 +132,8 @@ namespace {
}
char JumpThreading::ID = 0;
-static RegisterPass<JumpThreading>
-X("jump-threading", "Jump Threading");
+INITIALIZE_PASS(JumpThreading, "jump-threading",
+ "Jump Threading", false, false);
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
+ if (LVI) LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) {
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
BasicBlock *Succ = BI->getSuccessor(0);
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ if (LVI) LVI->eraseBlock(BB);
if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
// If we deleted BB and BB was the header of a loop, then the
@@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) {
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
+// Helper method for ComputeValueKnownInPredecessors. If Value is a
+// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > &Result,
+ Constant *Value, BasicBlock* BB){
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+ Result.push_back(std::make_pair(FoldedCInt, BB));
+ else if (isa<UndefValue>(Value))
+ Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
/// if we can infer that the value is a known ConstantInt in any of our
/// predecessors. If so, return the known list of value and pred BB in the
@@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) {
///
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
// If V is a constantint, then it is known in all predecessors.
if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
ConstantInt *CI = dyn_cast<ConstantInt>(V);
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(CI, *PI));
+
return true;
}
@@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ } else if (LVI) {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ // LVI returns null is no value could be determined.
+ if (!CI) continue;
+ PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
}
}
+
return !Result.empty();
}
@@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
else
InterestingVal = ConstantInt::getFalse(I->getContext());
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- bool HasValue = false;
- for (unsigned r = 0, e = Result.size(); r != e; ++r)
- if (Result[r].second == RHSVals[i].second) {
- HasValue = true;
- break;
- }
-
- if (!HasValue) {
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
Result.push_back(RHSVals[i]);
Result.back().first = InterestingVal;
}
}
+
return !Result.empty();
}
@@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
if (Result[i].first)
Result[i].first =
cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+
return true;
}
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(BO->getType()));
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+ }
+
+ return !Result.empty();
}
// Handle compare with phi operand, where the PHI is defined in this block.
@@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
- if (isa<UndefValue>(Res))
- Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
- Result.push_back(std::make_pair(CI, PredBB));
+ if (Constant *ConstRes = dyn_cast<Constant>(Res))
+ PushConstantIntOrUndef(Result, ConstRes, PredBB);
}
return !Result.empty();
@@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isIntegerTy() && // Not vector compare.
- (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
- Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+ Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- LazyValueInfo::Tristate
- Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, P, BB);
- if (Res == LazyValueInfo::Unknown)
- continue;
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ }
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ return !Result.empty();
}
-
- return !Result.empty();
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(CmpConst->getType()));
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (LVI) {
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+ if (CInt) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CInt, *PI));
}
+
+ return !Result.empty();
}
+
return false;
}
@@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ if (LVI) LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
@@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
}
+
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
+ CondCmp->getPredicate(),
+ CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
+ }
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
return false;
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< ", across block:\n "
<< *BB << "\n");
+ if (LVI)
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
@@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
@@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 73473952912e..2ef85446bd9b 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -26,8 +26,7 @@
// pointer. There are no calls in the loop which mod/ref the pointer.
// If these conditions are true, we can promote the loads and stores in the
// loop of the pointer to use a temporary alloca'd variable. We then use
-// the mem2reg functionality to construct the appropriate SSA form for the
-// variable.
+// the SSAUpdater to construct the appropriate SSA form for the value.
//
//===----------------------------------------------------------------------===//
@@ -37,14 +36,15 @@
#include "llvm/DerivedTypes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(&ID) {}
+ LICM() : LoopPass(ID) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -75,39 +75,31 @@ namespace {
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg)
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominanceFrontier>();
AU.addPreservedID(LoopSimplifyID);
}
bool doFinalization() {
- // Free the values stored in the map
- for (std::map<Loop *, AliasSetTracker *>::iterator
- I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
- delete I->second;
-
- LoopToAliasMap.clear();
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
return false;
}
private:
- // Various analyses that we use...
AliasAnalysis *AA; // Current AliasAnalysis information
LoopInfo *LI; // Current LoopInfo
- DominatorTree *DT; // Dominator Tree for the current Loop...
- DominanceFrontier *DF; // Current Dominance Frontier
+ DominatorTree *DT; // Dominator Tree for the current Loop.
- // State that is updated as we process loops
+ // State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
- std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
@@ -204,25 +196,12 @@ namespace {
bool isLoopInvariantInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
- /// to scalars as we can.
- ///
- void PromoteValuesInLoop();
-
- /// FindPromotableValuesInLoop - Check the current loop for stores to
- /// definite pointers, which are not loaded and stored through may aliases.
- /// If these are found, create an alloca for the value, add it to the
- /// PromotedValues list, and keep track of the mapping from value to
- /// alloca...
- ///
- void FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &Val2AlMap);
+ void PromoteAliasSet(AliasSet &AS);
};
}
char LICM::ID = 0;
-static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
+INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- DF = &getAnalysis<DominanceFrontier>();
DT = &getAnalysis<DominatorTree>();
CurAST = new AliasSetTracker(*AA);
- // Collect Alias info from subloops
+ // Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
LoopItr != LoopItrE; ++LoopItr) {
Loop *InnerL = *LoopItr;
- AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
- assert (InnerAST && "Where is my AST?");
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
// What if InnerLoop was modified by other passes ?
CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
}
CurLoop = L;
@@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops...
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
- // memory references to scalars that we can...
- if (!DisablePromotion && Preheader && L->hasDedicatedExits())
- PromoteValuesInLoop();
-
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I);
+ }
+
// Clear out loops state information for the next iteration
CurLoop = 0;
Preheader = 0;
- LoopToAliasMap[L] = CurAST;
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
return Changed;
}
@@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
- // We are processing blocks in reverse dfo, so process children first...
+ // We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
SinkRegion(Children[i]);
@@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
// Check to see if we can sink this instruction to the exit blocks
// of the loop. We can do this if the all users of the instruction are
@@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
Instruction &I = *II++;
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
isSafeToExecuteUnconditionally(I))
hoist(I);
- }
+ }
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
/// position, and may either delete it or move it to outside of the loop.
///
void LICM::sink(Instruction &I) {
- DEBUG(dbgs() << "LICM sinking instruction: " << I);
+ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) {
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.removeFromParent();
- BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
- ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+ I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
}
- } else if (ExitBlocks.empty()) {
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
// The instruction is actually dead if there ARE NO exit blocks.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
- } else {
- // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
- // do all of the hard work of inserting PHI nodes as necessary. We convert
- // the value into a stack object to get it to do this.
-
- // Firstly, we create a stack object to hold the value...
- AllocaInst *AI = 0;
-
- if (!I.getType()->isVoidTy()) {
- AI = new AllocaInst(I.getType(), 0, I.getName(),
- I.getParent()->getParent()->getEntryBlock().begin());
- CurAST->add(AI);
- }
-
- // Secondly, insert load instructions for each use of the instruction
- // outside of the loop.
- while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.use_back());
-
- // If the user is a PHI Node, we actually have to insert load instructions
- // in all predecessor blocks, not in the PHI block itself!
- if (PHINode *UPN = dyn_cast<PHINode>(U)) {
- // Only insert into each predecessor once, so that we don't have
- // different incoming values from the same block!
- std::map<BasicBlock*, Value*> InsertedBlocks;
- for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
- if (UPN->getIncomingValue(i) == &I) {
- BasicBlock *Pred = UPN->getIncomingBlock(i);
- Value *&PredVal = InsertedBlocks[Pred];
- if (!PredVal) {
- // Insert a new load instruction right before the terminator in
- // the predecessor block.
- PredVal = new LoadInst(AI, "", Pred->getTerminator());
- CurAST->add(cast<LoadInst>(PredVal));
- }
-
- UPN->setIncomingValue(i, PredVal);
- }
-
- } else {
- LoadInst *L = new LoadInst(AI, "", U);
- U->replaceUsesOfWith(&I, L);
- CurAST->add(L);
- }
- }
-
- // Thirdly, insert a copy of the instruction in each exit block of the loop
- // that is dominated by the instruction, storing the result into the memory
- // location. Be careful not to insert the instruction into any particular
- // basic block more than once.
- std::set<BasicBlock*> InsertedBlocks;
- BasicBlock *InstOrigBB = I.getParent();
-
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
-
- if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
- // If we haven't already processed this exit block, do so now.
- if (InsertedBlocks.insert(ExitBlock).second) {
- // Insert the code after the last PHI node...
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
- // If this is the first exit block processed, just move the original
- // instruction, otherwise clone the original instruction and insert
- // the copy.
- Instruction *New;
- if (InsertedBlocks.size() == 1) {
- I.removeFromParent();
- ExitBlock->getInstList().insert(InsertPt, &I);
- New = &I;
- } else {
- New = I.clone();
- CurAST->copyValue(&I, New);
- if (!I.getName().empty())
- New->setName(I.getName()+".le");
- ExitBlock->getInstList().insert(InsertPt, New);
- }
-
- // Now that we have inserted the instruction, store it into the alloca
- if (AI) new StoreInst(New, AI, InsertPt);
- }
- }
- }
-
- // If the instruction doesn't dominate any exit blocks, it must be dead.
- if (InsertedBlocks.empty()) {
- CurAST->deleteValue(&I);
- I.eraseFromParent();
- }
-
- // Finally, promote the fine value to SSA form.
- if (AI) {
- std::vector<AllocaInst*> Allocas;
- Allocas.push_back(AI);
- PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
}
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
}
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
}
/// hoist - When an instruction is found to only use loop invariant operands
@@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) {
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
<< I << "\n");
- // Remove the instruction from its current basic block... but don't delete the
- // instruction.
- I.removeFromParent();
-
- // Insert the new node in Preheader, before the terminator.
- Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
return true;
}
-
-/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant. We promote these memory locations to use allocas
-/// instead. These allocas can easily be raised to register values by the
-/// PromoteMem2Reg functionality.
+/// which are loop invariant.
///
-void LICM::PromoteValuesInLoop() {
- // PromotedValues - List of values that are promoted out of the loop. Each
- // value has an alloca instruction for it, and a canonical version of the
- // pointer.
- std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
- std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
-
- FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
- if (ValueToAllocaMap.empty()) return; // If there are values to promote.
-
- Changed = true;
- NumPromoted += PromotedValues.size();
-
- std::vector<Value*> PointerValueNumbers;
-
- // Emit a copy from the value into the alloca'd value in the loop preheader
- TerminatorInst *LoopPredInst = Preheader->getTerminator();
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- Value *Ptr = PromotedValues[i].second;
-
- // If we are promoting a pointer value, update alias information for the
- // inserted load.
- Value *LoadValue = 0;
- if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
- // Locate a load or store through the pointer, and assign the same value
- // to LI as we are loading or storing. Since we know that the value is
- // stored in this loop, this will always succeed.
- for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- LoadValue = LI;
- break;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(1) == Ptr) {
- LoadValue = SI->getOperand(0);
- break;
- }
- }
- }
- assert(LoadValue && "No store through the pointer found!");
- PointerValueNumbers.push_back(LoadValue); // Remember this for later.
- }
-
- // Load from the memory we are promoting.
- LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
-
- if (LoadValue) CurAST->copyValue(LoadValue, LI);
-
- // Store into the temporary alloca.
- new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
- }
+void LICM::PromoteAliasSet(AliasSet &AS) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
- // Scan the basic blocks in the loop, replacing uses of our pointers with
- // uses of the allocas in question.
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
//
- for (Loop::block_iterator I = CurLoop->block_begin(),
- E = CurLoop->block_end(); I != E; ++I) {
- BasicBlock *BB = *I;
- // Rewrite all loads and stores in the block of the pointer...
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(L->getOperand(0));
- if (I != ValueToAllocaMap.end())
- L->setOperand(0, I->second); // Rewrite load instruction...
- } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(S->getOperand(1));
- if (I != ValueToAllocaMap.end())
- S->setOperand(1, I->second); // Rewrite store instruction...
- }
- }
- }
-
- // Now that the body of the loop uses the allocas instead of the original
- // memory locations, insert code to copy the alloca value back into the
- // original memory location on all exits from the loop. Note that we only
- // want to insert one copy of the code in each exit block, though the loop may
- // exit to the same block more than once.
+ // for () { if (c) *P += 1; }
//
- SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- if (!ProcessedBlocks.insert(ExitBlocks[i]))
- continue;
-
- // Copy all of the allocas into their memory locations.
- BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
- Instruction *InsertPos = BI;
- unsigned PVN = 0;
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- // Load from the alloca.
- LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
- // If this is a pointer type, update alias info appropriately.
- if (LI->getType()->isPointerTy())
- CurAST->copyValue(PointerValueNumbers[PVN++], LI);
-
- // Store into the memory we promoted.
- new StoreInst(LI, PromotedValues[i].second, InsertPos);
- }
- }
-
- // Now that we have done the deed, use the mem2reg functionality to promote
- // all of the new allocas we just created into real SSA registers.
+ // into:
//
- std::vector<AllocaInst*> PromotedAllocas;
- PromotedAllocas.reserve(PromotedValues.size());
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
- PromotedAllocas.push_back(PromotedValues[i].first);
- PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
-}
-
-/// FindPromotableValuesInLoop - Check the current loop for stores to definite
-/// pointers, which are not loaded and stored through may aliases and are safe
-/// for promotion. If these are found, create an alloca for the value, add it
-/// to the PromotedValues list, and keep track of the mapping from value to
-/// alloca.
-void LICM::FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
- Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
-
- // Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
- // We can promote this alias set if it has a store, if it is a "Must" alias
- // set, if the pointer is loop invariant, and if we are not eliminating any
- // volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
- continue;
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
- assert(!AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
- Value *V = AS.begin()->getValue();
-
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
- {
- bool PointerOk = true;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- if (V->getType() != I->getValue()->getType()) {
- PointerOk = false;
- break;
- }
- if (!PointerOk)
- continue;
- }
-
- // It isn't safe to promote a load/store from the loop if the load/store is
- // conditional. For example, turning:
- //
- // for () { if (c) *P += 1; }
- //
- // into:
- //
- // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
- //
- // is not safe, because *P may only be valid to access if 'c' is true.
- //
- // It is safe to promote P if all uses are direct load/stores and if at
- // least one is guaranteed to be executed.
- bool GuaranteedToExecute = false;
- bool InvalidInst = false;
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
UI != UE; ++UI) {
- // Ignore instructions not in this loop.
+ // Ignore instructions that are outside the loop.
Instruction *Use = dyn_cast<Instruction>(*UI);
if (!Use || !CurLoop->contains(Use))
continue;
-
- if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
- InvalidInst = true;
- break;
- }
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ if (isa<LoadInst>(Use))
+ assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+ else if (isa<StoreInst>(Use)) {
+ assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ if (Use->getOperand(0) == ASIV) return;
+ } else
+ return; // Not a load or store.
if (!GuaranteedToExecute)
GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+ LoopUses.push_back(Use);
}
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
- // If there is an non-load/store instruction in the loop, we can't promote
- // it. If there isn't a guaranteed-to-execute instruction, we can't
- // promote.
- if (InvalidInst || !GuaranteedToExecute)
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ // It wants to know some value of the same type as what we'll be inserting.
+ Value *SomeValue;
+ if (isa<LoadInst>(LoopUses[0]))
+ SomeValue = LoopUses[0];
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+ SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+ // First step: bucket up uses of the pointers by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the loop with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+ Instruction *User = LoopUses[LoopUse];
+ std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (isa<StoreInst>(User)) {
+ SSA.AddAvailableValue(User->getParent(),
+ cast<StoreInst>(User)->getOperand(0));
+ } else {
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ }
+ BlockUses.clear();
continue;
+ }
- const Type *Ty = cast<PointerType>(V->getType())->getElementType();
- AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
- PromotedValues.push_back(std::make_pair(AI, V));
+ // Otherwise, check to see if this block is all loads. If so, we can queue
+ // them all as live in loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
- // Update the AST and alias analysis.
- CurAST->copyValue(V, AI);
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ BasicBlock *BB = User->getParent();
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(S->getOperand(1))) continue;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Now that all the intra-loop values are classified, set up the preheader.
+ // It gets a load of the pointer we're promoting, and it is the live-out value
+ // from the preheader.
+ LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Now that the preheader is good to go, set up the exit blocks. Each exit
+ // block gets a store of the live-out values that feed them. Since we've
+ // already told the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
- DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ ALoad->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(ALoad, NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // If the preheader load is itself a pointer, we need to tell alias analysis
+ // about the new pointer we created in the preheader block and about any PHI
+ // nodes that just got inserted.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ CurAST->copyValue(SomeValue, PreheaderLoad);
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(SomeValue, NewPHIs[i]);
}
+
+ // Now that everything is rewritten, delete the old instructions from the body
+ // of the loop. They should all be dead now.
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ User->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(User, NewVal);
+ }
+
+ CurAST->deleteValue(User);
+ User->eraseFromParent();
+ }
+
+ // fwew, we're done!
}
+
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
@@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
/// set.
void LICM::deleteAnalysisValue(Value *V, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index e4894e99b68f..543dfc1cba09 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -28,7 +28,7 @@ namespace {
class LoopDeletion : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(&ID) {}
+ LoopDeletion() : LoopPass(ID) {}
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -38,9 +38,9 @@ namespace {
bool &Changed, BasicBlock *Preheader);
virtual void getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<ScalarEvolution>();
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
@@ -55,7 +55,8 @@ namespace {
}
char LoopDeletion::ID = 0;
-static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops");
+INITIALIZE_PASS(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false);
Pass* llvm::createLoopDeletionPass() {
return new LoopDeletion();
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 31058e5759a4..a4336743a8f0 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -74,7 +74,7 @@ namespace {
class LoopIndexSplit : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopIndexSplit() : LoopPass(&ID) {}
+ LoopIndexSplit() : LoopPass(ID) {}
// Index split Loop L. Return true if loop is split.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -197,8 +197,8 @@ namespace {
}
char LoopIndexSplit::ID = 0;
-static RegisterPass<LoopIndexSplit>
-X("loop-index-split", "Index Split Loops");
+INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
+ "Index Split Loops", false, false);
Pass *llvm::createLoopIndexSplitPass() {
return new LoopIndexSplit();
@@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
PI != PE; ++PI) {
BasicBlock *P = *PI;
- if (P == DeadBB || DT->dominates(DeadBB, P))
+ if (DT->dominates(DeadBB, P))
PredBlocks.push_back(P);
}
@@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
// the dominance frontiers.
for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
I != E; ++I) {
- if (*I == CondBB || !DT->dominates(CondBB, *I)) continue;
+ if (!DT->properlyDominates(CondBB, *I)) continue;
DominanceFrontier::iterator BBDF = DF->find(*I);
DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
@@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
bool usedOutsideBB = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
- Instruction *U = cast<Instruction>(UI);
+ Instruction *U = cast<Instruction>(*UI);
if (U->getParent() != BB)
usedOutsideBB = true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 16c4a15d3550..65acc1d9257a 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -35,7 +35,7 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(&ID) {}
+ LoopRotate() : LoopPass(ID) {}
// Rotate Loop L as many times as possible. Return true if
// loop is rotated at least once.
@@ -43,15 +43,15 @@ namespace {
// LCSSA form makes instruction renaming easier.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
// Helper functions
@@ -79,7 +79,7 @@ namespace {
}
char LoopRotate::ID = 0;
-static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
+INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
@@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal);
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
@@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ // Move the original header to the bottom of the loop, where it now more
+ // naturally belongs. This isn't necessary for correctness, and CodeGen can
+ // usually reorder blocks on its own to fix things like this up, but it's
+ // still nice to keep the IR readable.
+ //
+ // The original header should have only one predecessor at this point, since
+ // we checked that the loop had a proper preheader and unique backedge before
+ // we started.
+ assert(OrigHeader->getSinglePredecessor() &&
+ "Original loop header has too many predecessors after loop rotation!");
+ OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+ // Also, since this original header only has one predecessor, zap its
+ // PHI nodes, which are now trivial.
+ FoldSingleEntryPHINodes(OrigHeader);
+
+ // TODO: We could just go ahead and merge OrigHeader into its predecessor
+ // at this point, if we don't mind updating dominator info.
+
+ // Establish a new preheader, update dominators, etc.
preserveCanonicalLoopForm(LPM);
++NumRotated;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1f9b4156b9cd..e8dc5d3a640e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) {
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
- if (!RegUsesMap.count(Reg)) return false;
- const SmallBitVector &UsedByIndices =
- RegUsesMap.find(Reg)->second.UsedByIndices;
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
@@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
- const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
- IgnoreSignificantBits);
- if (!Start) return 0;
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
return 0;
@@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -965,6 +970,12 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// WidestFixupType - This records the widest use type for any fixup using
+ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+ /// max fixup widths to be equivalent, because the narrower one may be relying
+ /// on the implicit truncation to truncate away bogus bits.
+ const Type *WidestFixupType;
+
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// used to formulate a replacement for OperandValToReplace in UserInst.
@@ -976,15 +987,14 @@ public:
LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true) {}
+ AllFixupsOutsideLoop(true),
+ WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
- void check() const;
-
void print(raw_ostream &OS) const;
void dump() const;
};
@@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const {
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << ',';
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+
+ if (WidestFixupType)
+ OS << ", widest fixup type: " << *WidestFixupType;
}
void LSRUse::dump() const {
@@ -1354,6 +1367,10 @@ public:
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
// Add one to the backedge-taken count to get the trip count.
- const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern. There's no check
@@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) {
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
- // Search all uses for the formula. This could be more clever. Ignore
- // ICmpZero uses because they may contain formulae generated by
- // GenerateICmpZeroScales, in which case adding fixup offsets may
- // be invalid.
+ // Search all uses for the formula. This could be more clever.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
- LU.Kind) {
+ F.AM.Scale == OrigF.AM.Scale) {
if (F.AM.BaseOffs == 0)
return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and procede to the next LSRUse.
break;
}
}
}
}
+ // Nothing looked good.
return 0;
}
@@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- next(I); NewStrideIter != E; ++NewStrideIter) {
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
@@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
@@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
@@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
- SmallVectorImpl<const SCEV *> &UninterestingOps,
const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(*I, C, Ops, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
@@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- C, Ops, UninterestingOps, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
+ C, Ops, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, UninterestingOps, L, SE);
+ Ops, L, SE);
return;
}
}
- // Otherwise use the value itself. Loop-variant "unknown" values are
- // uninteresting; we won't be able to do anything meaningful with them.
- if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
- UninterestingOps.push_back(S);
- else
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself, optionally with a scale applied.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
- CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
-
- // Add any uninteresting values as one register, as we won't be able to
- // form any interesting reassociation opportunities with them. They'll
- // just have to be added inside the loop no matter what we do.
- if (!UninterestingAddOps.empty())
- AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, L, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ continue;
+
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
@@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
- ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
- (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
std::swap(F.BaseRegs[i], F.BaseRegs.back());
@@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() {
}
GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
}
/// If their are multiple formulae with the same set of registers used
@@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
return Power;
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
-void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.AM.BaseOffs;
- DEBUG(errs() << "New fixup has offset "
+ DEBUG(dbgs() << "New fixup has offset "
<< Fixup.Offset << '\n');
}
if (Fixup.LUIdx == NumUses-1)
@@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
@@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
}
}
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
/// SolveRecurse - This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
@@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
- DEBUG(dbgs() << "\n"
- "After generating reuse formulae:\n";
- print_uses(dbgs()));
-
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
@@ -3724,15 +3792,15 @@ private:
}
char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false);
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(&ID), TLI(tli) {}
+ : LoopPass(ID), TLI(tli) {}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4ad41ae4b59f..d0edfa220051 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -26,7 +27,7 @@
using namespace llvm;
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
@@ -42,7 +43,7 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
+ LoopUnroll() : LoopPass(ID) {}
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
@@ -55,23 +56,24 @@ namespace {
/// loop preheaders be inserted into the CFG...
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopInfo>();
AU.addPreservedID(LCSSAID);
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
};
}
char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
@@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
// FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
return true;
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 0c900ffc4027..9afe428ba569 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -77,7 +77,6 @@ namespace {
bool redoLoop;
Loop *currentLoop;
- DominanceFrontier *DF;
DominatorTree *DT;
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
@@ -92,15 +91,15 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
- LoopPass(&ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
loopPreheader(NULL) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
+ /// loop preheaders be inserted into the CFG.
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
@@ -110,7 +109,6 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
private:
@@ -160,7 +158,7 @@ namespace {
};
}
char LoopUnswitch::ID = 0;
-static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
+INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
- DF = getAnalysisIfAvailable<DominanceFrontier>();
DT = getAnalysisIfAvailable<DominatorTree>();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
@@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
if (DT)
DT->runOnFunction(*F);
- if (DF)
- DF->runOnFunction(*F);
}
return Changed;
}
@@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
-/// 1. Exit the loop with no side effects.
-/// 2. Branch to the latch block with no side-effects.
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
///
-/// If these conditions are true, we return true and set ExitBB to the block we
+/// If true, we return true and set ExitBB to the block we
/// exit through.
///
static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
BasicBlock *&ExitBB,
std::set<BasicBlock*> &Visited) {
if (!Visited.insert(BB).second) {
- // Already visited and Ok, end of recursion.
- return true;
+ // Already visited. Without more analysis, this could indicate an infinte loop.
+ return false;
} else if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
@@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
/// process. If so, return the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
- Visited.insert(L->getHeader()); // Branches to header are ok.
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
BasicBlock *ExitBB = 0;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
@@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
if (!BI->isConditional() || BI->getCondition() != Cond)
return false;
- // Check to see if a successor of the branch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 000000000000..973ffe7e6a40
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,161 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+bool LowerAtomicIntrinsic(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+
+ Function *Callee = CI->getCalledFunction();
+ if (!Callee)
+ return false;
+
+ unsigned IID = Callee->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::memory_barrier:
+ break;
+
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Delta = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
+ switch (IID) {
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ }
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Builder.CreateStore(Val, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_cmp_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Cmp = CI->getArgOperand(1);
+ Value *Val = CI->getArgOperand(2);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+
+ return true;
+}
+
+struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {}
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ Changed |= LowerAtomicIntrinsic(CI);
+ }
+ return Changed;
+ }
+
+};
+
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form",
+ false, false);
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0e566c5bd9be..24fae423d2f7 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -304,7 +304,7 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(&ID) {}
+ MemCpyOpt() : FunctionPass(ID) {}
private:
// This transformation requires dominator postdominator info
@@ -331,8 +331,7 @@ namespace {
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
-static RegisterPass<MemCpyOpt> X("memcpyopt",
- "MemCpy Optimization");
+INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
@@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// If the call is readnone, ignore it, otherwise bail out. We don't even
// allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (AA.getModRefBehavior(CallSite::get(BI)) ==
+ if (AA.getModRefBehavior(CallSite(BI)) ==
AliasAnalysis::DoesNotAccessMemory)
continue;
@@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// because we'll need to do type comparisons based on the underlying type.
Value *cpyDest = cpy->getDest();
Value *cpySrc = cpy->getSource();
- CallSite CS = CallSite::get(C);
+ CallSite CS(C);
// We need to be able to reason about the size of the memcpy, so we require
// that it be a constant.
@@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
return true;
}
-/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
-/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
-/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
-/// This allows later passes to remove the first memcpy altogether.
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
@@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
const Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
+ M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+ ArgTys, 3));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 98452f5d82c4..b8afcc12d927 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,7 @@ namespace {
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(&ID) {}
+ Reassociate() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -103,7 +103,8 @@ namespace {
}
char Reassociate::ID = 0;
-static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
+INITIALIZE_PASS(Reassociate, "reassociate",
+ "Reassociate expressions", false, false);
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 13222ac22004..506b72ac34e0 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
namespace {
struct RegToMem : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(&ID) {}
+ RegToMem() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,8 +59,8 @@ namespace {
}
char RegToMem::ID = 0;
-static RegisterPass<RegToMem>
-X("reg2mem", "Demote all values to stack slots");
+INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false);
bool RegToMem::runOnFunction(Function &F) {
@@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) {
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
//
-const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 907ece8fcce9..6115c05c20ac 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -275,12 +275,12 @@ public:
return I->second;
}
- LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+ /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
StructValueState.find(std::make_pair(V, i));
assert(I != StructValueState.end() && "V is not in valuemap!");
return I->second;
- }
+ }*/
/// getTrackedRetVals - Get the inferred return value map.
///
@@ -508,17 +508,16 @@ private:
void visitLoadInst (LoadInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitCallInst (CallInst &I) {
- visitCallSite(CallSite::get(&I));
+ visitCallSite(&I);
}
void visitInvokeInst (InvokeInst &II) {
- visitCallSite(CallSite::get(&II));
+ visitCallSite(&II);
visitTerminatorInst(II);
}
void visitCallSite (CallSite CS);
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVANextInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
void visitInstruction(Instruction &I) {
@@ -1586,7 +1585,7 @@ namespace {
///
struct SCCP : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(&ID) {}
+ SCCP() : FunctionPass(ID) {}
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
@@ -1600,8 +1599,8 @@ namespace {
} // end anonymous namespace
char SCCP::ID = 0;
-static RegisterPass<SCCP>
-X("sccp", "Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(SCCP, "sccp",
+ "Sparse Conditional Constant Propagation", false, false);
// createSCCPPass - This is the public interface to this file.
FunctionPass *llvm::createSCCPPass() {
@@ -1702,14 +1701,15 @@ namespace {
///
struct IPSCCP : public ModulePass {
static char ID;
- IPSCCP() : ModulePass(&ID) {}
+ IPSCCP() : ModulePass(ID) {}
bool runOnModule(Module &M);
};
} // end anonymous namespace
char IPSCCP::ID = 0;
-static RegisterPass<IPSCCP>
-Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false);
// createIPSCCPPass - This is the public interface to this file.
ModulePass *llvm::createIPSCCPPass() {
@@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) {
bool IPSCCP::runOnModule(Module &M) {
SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
//
@@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) {
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
- if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
- Solver.AddArgumentTrackedFunction(F);
- continue;
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
}
// Assume the function is called.
@@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) {
continue;
// We can only do this if we know that nothing else can call the function.
- if (!F->hasLocalLinkage() || AddressIsTaken(F))
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
continue;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index dd445f63320a..fee317dbd9ab 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Target/TargetData.h"
@@ -51,7 +52,7 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(&ID) {
+ explicit SROA(signed T = -1) : FunctionPass(ID) {
if (T == -1)
SRThreshold = 128;
else
@@ -114,8 +115,7 @@ namespace {
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
- AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -135,7 +135,8 @@ namespace {
}
char SROA::ID = 0;
-static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
+INITIALIZE_PASS(SROA, "scalarrepl",
+ "Scalar Replacement of Aggregates", false, false);
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
@@ -193,6 +194,27 @@ private:
};
} // end anonymous namespace.
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form. We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+ StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+ if (!Triple.startswith("i386") &&
+ !Triple.startswith("x86_64"))
+ return false;
+
+ // Reject all the MMX vector types.
+ switch (VTy->getNumElements()) {
+ default: return false;
+ case 1: return VTy->getElementType()->isIntegerTy(64);
+ case 2: return VTy->getElementType()->isIntegerTy(32);
+ case 4: return VTy->getElementType()->isIntegerTy(16);
+ case 8: return VTy->getElementType()->isIntegerTy(8);
+ }
+}
+
+
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+ !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == CallInst::ArgOffset, Info);
+ UI.getOperandNo() == 0, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ return HasPadding(ATy->getElementType(), TD);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return HasPadding(VTy->getElementType(), TD);
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
@@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
if (PrevFieldEnd < SL->getSizeInBits())
return true;
}
-
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- return HasPadding(ATy->getElementType(), TD);
- } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- return HasPadding(VTy->getElementType(), TD);
}
+
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
@@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != CallInst::ArgOffset) return false;
+ if (UI.getOperandNo() != 0) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 49d93a2fcc27..360749caf111 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(&ID) {}
+ CFGSimplifyPass() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
};
}
char CFGSimplifyPass::ID = 0;
-static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+ "Simplify the CFG", false, false);
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
@@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
while (LocalChange) {
LocalChange = false;
- // Loop over all of the basic blocks (except the first one) and remove them
- // if they are unneeded...
+ // Loop over all of the basic blocks and remove them if they are unneeded...
//
- for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(BBIt++, TD)) {
LocalChange = true;
++NumSimpl;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index c3408e77807f..3ec70ec2e024 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,7 @@ namespace {
const TargetData *TD;
public:
static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -46,8 +46,8 @@ namespace {
char SimplifyHalfPowrLibCalls::ID = 0;
} // end anonymous namespace.
-static RegisterPass<SimplifyHalfPowrLibCalls>
-X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
+INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
+ "Simplify half_powr library calls", false, false);
// Public interface to the Simplify HalfPowr LibCalls pass.
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b1c619125c35..d7ce53f36715 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization {
StrLen, B, TD);
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
- ICmpInst *Old = cast<ICmpInst>(UI++);
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
ConstantInt::getNullValue(StrNCmp->getType()),
"cmp");
@@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
- CI->getType());
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+ ToFindStr[0], B, TD), CI->getType());
return 0;
}
};
@@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
- false);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt8Ty(*Context), false);
EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
return CI->getArgOperand(0);
}
@@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()+1), 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
+ ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
+ FormatStr.size() + 1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+ IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1236,7 +1237,7 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1253,8 +1254,8 @@ namespace {
char SimplifyLibCalls::ID = 0;
} // end anonymous namespace.
-static RegisterPass<SimplifyLibCalls>
-X("simplify-libcalls", "Simplify well-known library calls");
+INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false);
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * pow(pow(x,y),z)-> pow(x,y*z)
//
// puts:
-// * puts("") -> putchar("\n")
+// * puts("") -> putchar('\n')
//
// round, roundf, roundl:
// * round(cnst) -> cnst'
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index b88ba4850509..95d3dedfb62d 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,7 @@ namespace {
public:
static char ID; // Pass identification
- Sinking() : FunctionPass(&ID) {}
+ Sinking() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -56,8 +56,7 @@ namespace {
} // end anonymous namespace
char Sinking::ID = 0;
-static RegisterPass<Sinking>
-X("sink", "Code sinking");
+INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 9208238f4ba5..2e437ac778c8 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -49,7 +49,7 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(&ID) {}
+ TailDup() : FunctionPass(ID) {}
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +59,7 @@ namespace {
}
char TailDup::ID = 0;
-static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication");
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
// Public interface to the Tail Duplication pass
FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 01c8e5d6fcf4..371725467a24 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(&ID) {}
+ TailCallElim() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -87,7 +87,8 @@ namespace {
}
char TailCallElim::ID = 0;
-static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination");
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false);
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
@@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
- for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != IgnoreRI) {
- Value *RetOp = RI->getOperand(0);
-
- // We can only perform this transformation if the value returned is
- // evaluatable at the start of the initial invocation of the function,
- // instead of at the end of the evaluation.
- //
- if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
-
- if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
- ReturnedValue = RetOp;
- }
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
return ReturnedValue;
}
@@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
- // Exactly one operand should be the result of the call instruction...
+ // Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
return 0;
@@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
- if (!CanMoveAboveCall(BBI, CI)) {
- // If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
- // using accumulator recursion elimination. Check to see if this is the
- // case, and if so, remember the initial accumulator value for later.
- if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
- // Yes, this is accumulator recursion. Remember which instruction
- // accumulates.
- AccumulatorRecursionInstr = BBI;
- } else {
- return false; // Otherwise, we cannot eliminate the tail recursion!
- }
+ for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
}
+ }
// We can only transform call/return pairs that either ignore the return value
// of the call and return void, ignore the value of the call and return a