diff options
Diffstat (limited to 'lib/Transforms/Scalar')
32 files changed, 1310 insertions, 1749 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp deleted file mode 100644 index dcf14a6860da..000000000000 --- a/lib/Transforms/Scalar/ABCD.cpp +++ /dev/null @@ -1,1112 +0,0 @@ -//===------- ABCD.cpp - Removes redundant conditional branches ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass removes redundant branch instructions. This algorithm was -// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper -// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original -// Algorithm was created to remove array bound checks for strongly typed -// languages. This implementation expands the idea and removes any conditional -// branches that can be proved redundant, not only those used in array bound -// checks. With the SSI representation, each variable has a -// constraint. By analyzing these constraints we can prove that a branch is -// redundant. When a branch is proved redundant it means that -// one direction will always be taken; thus, we can change this branch into an -// unconditional jump. -// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination -// after ABCD to clean up the code. -// This implementation was created based on the implementation of the ABCD -// algorithm implemented for the compiler Jitrino. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "abcd" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/SSI.h" - -using namespace llvm; - -STATISTIC(NumBranchTested, "Number of conditional branches analyzed"); -STATISTIC(NumBranchRemoved, "Number of conditional branches removed"); - -namespace { - -class ABCD : public FunctionPass { - public: - static char ID; // Pass identification, replacement for typeid. - ABCD() : FunctionPass(&ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<SSI>(); - } - - bool runOnFunction(Function &F); - - private: - /// Keep track of whether we've modified the program yet. - bool modified; - - enum ProveResult { - False = 0, - Reduced = 1, - True = 2 - }; - - typedef ProveResult (*meet_function)(ProveResult, ProveResult); - static ProveResult max(ProveResult res1, ProveResult res2) { - return (ProveResult) std::max(res1, res2); - } - static ProveResult min(ProveResult res1, ProveResult res2) { - return (ProveResult) std::min(res1, res2); - } - - class Bound { - public: - Bound(APInt v, bool upper) : value(v), upper_bound(upper) {} - Bound(const Bound &b, int cnst) - : value(b.value - cnst), upper_bound(b.upper_bound) {} - Bound(const Bound &b, const APInt &cnst) - : value(b.value - cnst), upper_bound(b.upper_bound) {} - - /// Test if Bound is an upper bound - bool isUpperBound() const { return upper_bound; } - - /// Get the bitwidth of this bound - int32_t getBitWidth() const { return value.getBitWidth(); } - - /// Creates a Bound incrementing the one received - static Bound createIncrement(const Bound &b) { - return Bound(b.isUpperBound() ? b.value+1 : b.value-1, - b.upper_bound); - } - - /// Creates a Bound decrementing the one received - static Bound createDecrement(const Bound &b) { - return Bound(b.isUpperBound() ? b.value-1 : b.value+1, - b.upper_bound); - } - - /// Test if two bounds are equal - static bool eq(const Bound *a, const Bound *b) { - if (!a || !b) return false; - - assert(a->isUpperBound() == b->isUpperBound()); - return a->value == b->value; - } - - /// Test if val is less than or equal to Bound b - static bool leq(APInt val, const Bound &b) { - return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value); - } - - /// Test if Bound a is less then or equal to Bound - static bool leq(const Bound &a, const Bound &b) { - assert(a.isUpperBound() == b.isUpperBound()); - return a.isUpperBound() ? a.value.sle(b.value) : - a.value.sge(b.value); - } - - /// Test if Bound a is less then Bound b - static bool lt(const Bound &a, const Bound &b) { - assert(a.isUpperBound() == b.isUpperBound()); - return a.isUpperBound() ? a.value.slt(b.value) : - a.value.sgt(b.value); - } - - /// Test if Bound b is greater then or equal val - static bool geq(const Bound &b, APInt val) { - return leq(val, b); - } - - /// Test if Bound a is greater then or equal Bound b - static bool geq(const Bound &a, const Bound &b) { - return leq(b, a); - } - - private: - APInt value; - bool upper_bound; - }; - - /// This class is used to store results some parts of the graph, - /// so information does not need to be recalculated. The maximum false, - /// minimum true and minimum reduced results are stored - class MemoizedResultChart { - public: - MemoizedResultChart() {} - MemoizedResultChart(const MemoizedResultChart &other) { - if (other.max_false) - max_false.reset(new Bound(*other.max_false)); - if (other.min_true) - min_true.reset(new Bound(*other.min_true)); - if (other.min_reduced) - min_reduced.reset(new Bound(*other.min_reduced)); - } - - /// Returns the max false - const Bound *getFalse() const { return max_false.get(); } - - /// Returns the min true - const Bound *getTrue() const { return min_true.get(); } - - /// Returns the min reduced - const Bound *getReduced() const { return min_reduced.get(); } - - /// Return the stored result for this bound - ProveResult getResult(const Bound &bound) const; - - /// Stores a false found - void addFalse(const Bound &bound); - - /// Stores a true found - void addTrue(const Bound &bound); - - /// Stores a Reduced found - void addReduced(const Bound &bound); - - /// Clears redundant reduced - /// If a min_true is smaller than a min_reduced then the min_reduced - /// is unnecessary and then removed. It also works for min_reduced - /// begin smaller than max_false. - void clearRedundantReduced(); - - void clear() { - max_false.reset(); - min_true.reset(); - min_reduced.reset(); - } - - private: - OwningPtr<Bound> max_false, min_true, min_reduced; - }; - - /// This class stores the result found for a node of the graph, - /// so these results do not need to be recalculated, only searched for. - class MemoizedResult { - public: - /// Test if there is true result stored from b to a - /// that is less then the bound - bool hasTrue(Value *b, const Bound &bound) const { - const Bound *trueBound = map.lookup(b).getTrue(); - return trueBound && Bound::leq(*trueBound, bound); - } - - /// Test if there is false result stored from b to a - /// that is less then the bound - bool hasFalse(Value *b, const Bound &bound) const { - const Bound *falseBound = map.lookup(b).getFalse(); - return falseBound && Bound::leq(*falseBound, bound); - } - - /// Test if there is reduced result stored from b to a - /// that is less then the bound - bool hasReduced(Value *b, const Bound &bound) const { - const Bound *reducedBound = map.lookup(b).getReduced(); - return reducedBound && Bound::leq(*reducedBound, bound); - } - - /// Returns the stored bound for b - ProveResult getBoundResult(Value *b, const Bound &bound) { - return map[b].getResult(bound); - } - - /// Clears the map - void clear() { - DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin(); - DenseMapIterator<Value*, MemoizedResultChart> end = map.end(); - for (; begin != end; ++begin) { - begin->second.clear(); - } - map.clear(); - } - - /// Stores the bound found - void updateBound(Value *b, const Bound &bound, const ProveResult res); - - private: - // Maps a nod in the graph with its results found. - DenseMap<Value*, MemoizedResultChart> map; - }; - - /// This class represents an edge in the inequality graph used by the - /// ABCD algorithm. An edge connects node v to node u with a value c if - /// we could infer a constraint v <= u + c in the source program. - class Edge { - public: - Edge(Value *V, APInt val, bool upper) - : vertex(V), value(val), upper_bound(upper) {} - - Value *getVertex() const { return vertex; } - const APInt &getValue() const { return value; } - bool isUpperBound() const { return upper_bound; } - - private: - Value *vertex; - APInt value; - bool upper_bound; - }; - - /// Weighted and Directed graph to represent constraints. - /// There is one type of constraint, a <= b + X, which will generate an - /// edge from b to a with weight X. - class InequalityGraph { - public: - - /// Adds an edge from V_from to V_to with weight value - void addEdge(Value *V_from, Value *V_to, APInt value, bool upper); - - /// Test if there is a node V - bool hasNode(Value *V) const { return graph.count(V); } - - /// Test if there is any edge from V in the upper direction - bool hasEdge(Value *V, bool upper) const; - - /// Returns all edges pointed by vertex V - SmallVector<Edge, 16> getEdges(Value *V) const { - return graph.lookup(V); - } - - /// Prints the graph in dot format. - /// Blue edges represent upper bound and Red lower bound. - void printGraph(raw_ostream &OS, Function &F) const { - printHeader(OS, F); - printBody(OS); - printFooter(OS); - } - - /// Clear the graph - void clear() { - graph.clear(); - } - - private: - DenseMap<Value *, SmallVector<Edge, 16> > graph; - - /// Prints the header of the dot file - void printHeader(raw_ostream &OS, Function &F) const; - - /// Prints the footer of the dot file - void printFooter(raw_ostream &OS) const { - OS << "}\n"; - } - - /// Prints the body of the dot file - void printBody(raw_ostream &OS) const; - - /// Prints vertex source to the dot file - void printVertex(raw_ostream &OS, Value *source) const; - - /// Prints the edge to the dot file - void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const; - - void printName(raw_ostream &OS, Value *info) const; - }; - - /// Iterates through all BasicBlocks, if the Terminator Instruction - /// uses an Comparator Instruction, all operands of this comparator - /// are sent to be transformed to SSI. Only Instruction operands are - /// transformed. - void createSSI(Function &F); - - /// Creates the graphs for this function. - /// It will look for all comparators used in branches, and create them. - /// These comparators will create constraints for any instruction as an - /// operand. - void executeABCD(Function &F); - - /// Seeks redundancies in the comparator instruction CI. - /// If the ABCD algorithm can prove that the comparator CI always - /// takes one way, then the Terminator Instruction TI is substituted from - /// a conditional branch to a unconditional one. - /// This code basically receives a comparator, and verifies which kind of - /// instruction it is. Depending on the kind of instruction, we use different - /// strategies to prove its redundancy. - void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI); - - /// Substitutes Terminator Instruction TI, that is a conditional branch, - /// with one unconditional branch. Succ_edge determines if the new - /// unconditional edge will be the first or second edge of the former TI - /// instruction. - void removeRedundancy(TerminatorInst *TI, bool Succ_edge); - - /// When an conditional branch is removed, the BasicBlock that is no longer - /// reachable will have problems in phi functions. This method fixes these - /// phis removing the former BasicBlock from the list of incoming BasicBlocks - /// of all phis. In case the phi remains with no predecessor it will be - /// marked to be removed later. - void fixPhi(BasicBlock *BB, BasicBlock *Succ); - - /// Removes phis that have no predecessor - void removePhis(); - - /// Creates constraints for Instructions. - /// If the constraint for this instruction has already been created - /// nothing is done. - void createConstraintInstruction(Instruction *I); - - /// Creates constraints for Binary Operators. - /// It will create constraints only for addition and subtraction, - /// the other binary operations are not treated by ABCD. - /// For additions in the form a = b + X and a = X + b, where X is a constant, - /// the constraint a <= b + X can be obtained. For this constraint, an edge - /// a->b with weight X is added to the lower bound graph, and an edge - /// b->a with weight -X is added to the upper bound graph. - /// Only subtractions in the format a = b - X is used by ABCD. - /// Edges are created using the same semantic as addition. - void createConstraintBinaryOperator(BinaryOperator *BO); - - /// Creates constraints for Comparator Instructions. - /// Only comparators that have any of the following operators - /// are used to create constraints: >=, >, <=, <. And only if - /// at least one operand is an Instruction. In a Comparator Instruction - /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where - /// t and f represent sigma for operands in true and false branches. The - /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and - /// b_f <= b. There are two more constraints that depend on the operator. - /// For the operator <= : a_t <= b_t and b_f <= a_f-1 - /// For the operator < : a_t <= b_t-1 and b_f <= a_f - /// For the operator >= : b_t <= a_t and a_f <= b_f-1 - /// For the operator > : b_t <= a_t-1 and a_f <= b_f - void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI); - - /// Creates constraints for PHI nodes. - /// In a PHI node a = phi(b,c) we can create the constraint - /// a<= max(b,c). With this constraint there will be the edges, - /// b->a and c->a with weight 0 in the lower bound graph, and the edges - /// a->b and a->c with weight 0 in the upper bound graph. - void createConstraintPHINode(PHINode *PN); - - /// Given a binary operator, we are only interest in the case - /// that one operand is an Instruction and the other is a ConstantInt. In - /// this case the method returns true, otherwise false. It also obtains the - /// Instruction and ConstantInt from the BinaryOperator and returns it. - bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); - - /// This method creates a constraint between a Sigma and an Instruction. - /// These constraints are created as soon as we find a comparator that uses a - /// SSI variable. - void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, - BasicBlock *BB_succ_f, PHINode **SIG_op_t, - PHINode **SIG_op_f); - - /// If PN_op1 and PN_o2 are different from NULL, create a constraint - /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace - /// with the respective V_op#, if V_op# is a ConstantInt. - void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, - ConstantInt *V_op1, ConstantInt *V_op2, - APInt value); - - /// Returns the sigma representing the Instruction I in BasicBlock BB. - /// Returns NULL in case there is no sigma for this Instruction in this - /// Basic Block. This methods assume that sigmas are the first instructions - /// in a block, and that there can be only two sigmas in a block. So it will - /// only look on the first two instructions of BasicBlock BB. - PHINode *findSigma(BasicBlock *BB, Instruction *I); - - /// Original ABCD algorithm to prove redundant checks. - /// This implementation works on any kind of inequality branch. - bool demandProve(Value *a, Value *b, int c, bool upper_bound); - - /// Prove that distance between b and a is <= bound - ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level); - - /// Updates the distance value for a and b - void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level, - meet_function meet); - - InequalityGraph inequality_graph; - MemoizedResult mem_result; - DenseMap<Value*, const Bound*> active; - SmallPtrSet<Value*, 16> created; - SmallVector<PHINode *, 16> phis_to_remove; -}; - -} // end anonymous namespace. - -char ABCD::ID = 0; -static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand"); - - -bool ABCD::runOnFunction(Function &F) { - modified = false; - createSSI(F); - executeABCD(F); - DEBUG(inequality_graph.printGraph(dbgs(), F)); - removePhis(); - - inequality_graph.clear(); - mem_result.clear(); - active.clear(); - created.clear(); - phis_to_remove.clear(); - return modified; -} - -/// Iterates through all BasicBlocks, if the Terminator Instruction -/// uses an Comparator Instruction, all operands of this comparator -/// are sent to be transformed to SSI. Only Instruction operands are -/// transformed. -void ABCD::createSSI(Function &F) { - SSI *ssi = &getAnalysis<SSI>(); - - SmallVector<Instruction *, 16> Insts; - - for (Function::iterator begin = F.begin(), end = F.end(); - begin != end; ++begin) { - BasicBlock *BB = begin; - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumOperands() == 0) - continue; - - if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) { - if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) { - modified = true; // XXX: but yet createSSI might do nothing - Insts.push_back(I); - } - if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) { - modified = true; - Insts.push_back(I); - } - } - } - ssi->createSSI(Insts); -} - -/// Creates the graphs for this function. -/// It will look for all comparators used in branches, and create them. -/// These comparators will create constraints for any instruction as an -/// operand. -void ABCD::executeABCD(Function &F) { - for (Function::iterator begin = F.begin(), end = F.end(); - begin != end; ++begin) { - BasicBlock *BB = begin; - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumOperands() == 0) - continue; - - ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0)); - if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy()) - continue; - - createConstraintCmpInst(ICI, TI); - seekRedundancy(ICI, TI); - } -} - -/// Seeks redundancies in the comparator instruction CI. -/// If the ABCD algorithm can prove that the comparator CI always -/// takes one way, then the Terminator Instruction TI is substituted from -/// a conditional branch to a unconditional one. -/// This code basically receives a comparator, and verifies which kind of -/// instruction it is. Depending on the kind of instruction, we use different -/// strategies to prove its redundancy. -void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) { - CmpInst::Predicate Pred = ICI->getPredicate(); - - Value *source, *dest; - int distance1, distance2; - bool upper; - - switch(Pred) { - case CmpInst::ICMP_SGT: // signed greater than - upper = false; - distance1 = 1; - distance2 = 0; - break; - - case CmpInst::ICMP_SGE: // signed greater or equal - upper = false; - distance1 = 0; - distance2 = -1; - break; - - case CmpInst::ICMP_SLT: // signed less than - upper = true; - distance1 = -1; - distance2 = 0; - break; - - case CmpInst::ICMP_SLE: // signed less or equal - upper = true; - distance1 = 0; - distance2 = 1; - break; - - default: - return; - } - - ++NumBranchTested; - source = ICI->getOperand(0); - dest = ICI->getOperand(1); - if (demandProve(dest, source, distance1, upper)) { - removeRedundancy(TI, true); - } else if (demandProve(dest, source, distance2, !upper)) { - removeRedundancy(TI, false); - } -} - -/// Substitutes Terminator Instruction TI, that is a conditional branch, -/// with one unconditional branch. Succ_edge determines if the new -/// unconditional edge will be the first or second edge of the former TI -/// instruction. -void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) { - BasicBlock *Succ; - if (Succ_edge) { - Succ = TI->getSuccessor(0); - fixPhi(TI->getParent(), TI->getSuccessor(1)); - } else { - Succ = TI->getSuccessor(1); - fixPhi(TI->getParent(), TI->getSuccessor(0)); - } - - BranchInst::Create(Succ, TI); - TI->eraseFromParent(); // XXX: invoke - ++NumBranchRemoved; - modified = true; -} - -/// When an conditional branch is removed, the BasicBlock that is no longer -/// reachable will have problems in phi functions. This method fixes these -/// phis removing the former BasicBlock from the list of incoming BasicBlocks -/// of all phis. In case the phi remains with no predecessor it will be -/// marked to be removed later. -void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) { - BasicBlock::iterator begin = Succ->begin(); - while (PHINode *PN = dyn_cast<PHINode>(begin++)) { - PN->removeIncomingValue(BB, false); - if (PN->getNumIncomingValues() == 0) - phis_to_remove.push_back(PN); - } -} - -/// Removes phis that have no predecessor -void ABCD::removePhis() { - for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) { - PHINode *PN = phis_to_remove[i]; - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - PN->eraseFromParent(); - } -} - -/// Creates constraints for Instructions. -/// If the constraint for this instruction has already been created -/// nothing is done. -void ABCD::createConstraintInstruction(Instruction *I) { - // Test if this instruction has not been created before - if (created.insert(I)) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - createConstraintBinaryOperator(BO); - } else if (PHINode *PN = dyn_cast<PHINode>(I)) { - createConstraintPHINode(PN); - } - } -} - -/// Creates constraints for Binary Operators. -/// It will create constraints only for addition and subtraction, -/// the other binary operations are not treated by ABCD. -/// For additions in the form a = b + X and a = X + b, where X is a constant, -/// the constraint a <= b + X can be obtained. For this constraint, an edge -/// a->b with weight X is added to the lower bound graph, and an edge -/// b->a with weight -X is added to the upper bound graph. -/// Only subtractions in the format a = b - X is used by ABCD. -/// Edges are created using the same semantic as addition. -void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) { - Instruction *I1 = NULL, *I2 = NULL; - ConstantInt *CI1 = NULL, *CI2 = NULL; - - // Test if an operand is an Instruction and the other is a Constant - if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2)) - return; - - Instruction *I = 0; - APInt value; - - switch (BO->getOpcode()) { - case Instruction::Add: - if (I1) { - I = I1; - value = CI2->getValue(); - } else if (I2) { - I = I2; - value = CI1->getValue(); - } - break; - - case Instruction::Sub: - // Instructions like a = X-b, where X is a constant are not represented - // in the graph. - if (!I1) - return; - - I = I1; - value = -CI2->getValue(); - break; - - default: - return; - } - - inequality_graph.addEdge(I, BO, value, true); - inequality_graph.addEdge(BO, I, -value, false); - createConstraintInstruction(I); -} - -/// Given a binary operator, we are only interest in the case -/// that one operand is an Instruction and the other is a ConstantInt. In -/// this case the method returns true, otherwise false. It also obtains the -/// Instruction and ConstantInt from the BinaryOperator and returns it. -bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2) { - Value *op1 = BO->getOperand(0); - Value *op2 = BO->getOperand(1); - - if ((*I1 = dyn_cast<Instruction>(op1))) { - if ((*C2 = dyn_cast<ConstantInt>(op2))) - return true; // First is Instruction and second ConstantInt - - return false; // Both are Instruction - } else { - if ((*C1 = dyn_cast<ConstantInt>(op1)) && - (*I2 = dyn_cast<Instruction>(op2))) - return true; // First is ConstantInt and second Instruction - - return false; // Both are not Instruction - } -} - -/// Creates constraints for Comparator Instructions. -/// Only comparators that have any of the following operators -/// are used to create constraints: >=, >, <=, <. And only if -/// at least one operand is an Instruction. In a Comparator Instruction -/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where -/// t and f represent sigma for operands in true and false branches. The -/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and -/// b_f <= b. There are two more constraints that depend on the operator. -/// For the operator <= : a_t <= b_t and b_f <= a_f-1 -/// For the operator < : a_t <= b_t-1 and b_f <= a_f -/// For the operator >= : b_t <= a_t and a_f <= b_f-1 -/// For the operator > : b_t <= a_t-1 and a_f <= b_f -void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { - Value *V_op1 = ICI->getOperand(0); - Value *V_op2 = ICI->getOperand(1); - - if (!V_op1->getType()->isIntegerTy()) - return; - - Instruction *I_op1 = dyn_cast<Instruction>(V_op1); - Instruction *I_op2 = dyn_cast<Instruction>(V_op2); - - // Test if at least one operand is an Instruction - if (!I_op1 && !I_op2) - return; - - BasicBlock *BB_succ_t = TI->getSuccessor(0); - BasicBlock *BB_succ_f = TI->getSuccessor(1); - - PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL, - *SIG_op2_t = NULL, *SIG_op2_f = NULL; - - createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f); - createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f); - - int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth(); - APInt MinusOne = APInt::getAllOnesValue(width); - APInt Zero = APInt::getNullValue(width); - - CmpInst::Predicate Pred = ICI->getPredicate(); - ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1); - ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2); - switch (Pred) { - case CmpInst::ICMP_SGT: // signed greater than - createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero); - break; - - case CmpInst::ICMP_SGE: // signed greater or equal - createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne); - break; - - case CmpInst::ICMP_SLT: // signed less than - createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero); - break; - - case CmpInst::ICMP_SLE: // signed less or equal - createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne); - break; - - default: - break; - } - - if (I_op1) - createConstraintInstruction(I_op1); - if (I_op2) - createConstraintInstruction(I_op2); -} - -/// Creates constraints for PHI nodes. -/// In a PHI node a = phi(b,c) we can create the constraint -/// a<= max(b,c). With this constraint there will be the edges, -/// b->a and c->a with weight 0 in the lower bound graph, and the edges -/// a->b and a->c with weight 0 in the upper bound graph. -void ABCD::createConstraintPHINode(PHINode *PN) { - // FIXME: We really want to disallow sigma nodes, but I don't know the best - // way to detect the other than this. - if (PN->getNumOperands() == 2) return; - - int32_t width = cast<IntegerType>(PN->getType())->getBitWidth(); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = PN->getIncomingValue(i); - if (Instruction *I = dyn_cast<Instruction>(V)) { - createConstraintInstruction(I); - } - inequality_graph.addEdge(V, PN, APInt(width, 0), true); - inequality_graph.addEdge(V, PN, APInt(width, 0), false); - } -} - -/// This method creates a constraint between a Sigma and an Instruction. -/// These constraints are created as soon as we find a comparator that uses a -/// SSI variable. -void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, - BasicBlock *BB_succ_f, PHINode **SIG_op_t, - PHINode **SIG_op_f) { - *SIG_op_t = findSigma(BB_succ_t, I_op); - *SIG_op_f = findSigma(BB_succ_f, I_op); - - if (*SIG_op_t) { - int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth(); - inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true); - inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false); - } - if (*SIG_op_f) { - int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth(); - inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true); - inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false); - } -} - -/// If PN_op1 and PN_o2 are different from NULL, create a constraint -/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace -/// with the respective V_op#, if V_op# is a ConstantInt. -void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, - ConstantInt *V_op1, ConstantInt *V_op2, - APInt value) { - if (SIG_op1 && SIG_op2) { - inequality_graph.addEdge(SIG_op2, SIG_op1, value, true); - inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false); - } else if (SIG_op1 && V_op2) { - inequality_graph.addEdge(V_op2, SIG_op1, value, true); - inequality_graph.addEdge(SIG_op1, V_op2, -value, false); - } else if (SIG_op2 && V_op1) { - inequality_graph.addEdge(SIG_op2, V_op1, value, true); - inequality_graph.addEdge(V_op1, SIG_op2, -value, false); - } -} - -/// Returns the sigma representing the Instruction I in BasicBlock BB. -/// Returns NULL in case there is no sigma for this Instruction in this -/// Basic Block. This methods assume that sigmas are the first instructions -/// in a block, and that there can be only two sigmas in a block. So it will -/// only look on the first two instructions of BasicBlock BB. -PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) { - // BB has more than one predecessor, BB cannot have sigmas. - if (I == NULL || BB->getSinglePredecessor() == NULL) - return NULL; - - BasicBlock::iterator begin = BB->begin(); - BasicBlock::iterator end = BB->end(); - - for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) { - Instruction *I_succ = begin; - if (PHINode *PN = dyn_cast<PHINode>(I_succ)) - if (PN->getIncomingValue(0) == I) - return PN; - } - - return NULL; -} - -/// Original ABCD algorithm to prove redundant checks. -/// This implementation works on any kind of inequality branch. -bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) { - int32_t width = cast<IntegerType>(a->getType())->getBitWidth(); - Bound bound(APInt(width, c), upper_bound); - - mem_result.clear(); - active.clear(); - - ProveResult res = prove(a, b, bound, 0); - return res != False; -} - -/// Prove that distance between b and a is <= bound -ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound, - unsigned level) { - // if (C[b-a<=e] == True for some e <= bound - // Same or stronger difference was already proven - if (mem_result.hasTrue(b, bound)) - return True; - - // if (C[b-a<=e] == False for some e >= bound - // Same or weaker difference was already disproved - if (mem_result.hasFalse(b, bound)) - return False; - - // if (C[b-a<=e] == Reduced for some e <= bound - // b is on a cycle that was reduced for same or stronger difference - if (mem_result.hasReduced(b, bound)) - return Reduced; - - // traversal reached the source vertex - if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true))) - return True; - - // if b has no predecessor then fail - if (!inequality_graph.hasEdge(b, bound.isUpperBound())) - return False; - - // a cycle was encountered - if (active.count(b)) { - if (Bound::leq(*active.lookup(b), bound)) - return Reduced; // a "harmless" cycle - - return False; // an amplifying cycle - } - - active[b] = &bound; - PHINode *PN = dyn_cast<PHINode>(b); - - // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming - // value, then it is a phi, if it has 1 incoming value it is a sigma. - if (PN && PN->getNumIncomingValues() > 1) - updateMemDistance(a, b, bound, level, min); - else - updateMemDistance(a, b, bound, level, max); - - active.erase(b); - - ABCD::ProveResult res = mem_result.getBoundResult(b, bound); - return res; -} - -/// Updates the distance value for a and b -void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound, - unsigned level, meet_function meet) { - ABCD::ProveResult res = (meet == max) ? False : True; - - SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b); - SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end(); - - for (; begin != end ; ++begin) { - if (((res >= Reduced) && (meet == max)) || - ((res == False) && (meet == min))) { - break; - } - const Edge &in = *begin; - if (in.isUpperBound() == bound.isUpperBound()) { - Value *succ = in.getVertex(); - res = meet(res, prove(a, succ, Bound(bound, in.getValue()), - level+1)); - } - } - - mem_result.updateBound(b, bound, res); -} - -/// Return the stored result for this bound -ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{ - if (max_false && Bound::leq(bound, *max_false)) - return False; - if (min_true && Bound::leq(*min_true, bound)) - return True; - if (min_reduced && Bound::leq(*min_reduced, bound)) - return Reduced; - return False; -} - -/// Stores a false found -void ABCD::MemoizedResultChart::addFalse(const Bound &bound) { - if (!max_false || Bound::leq(*max_false, bound)) - max_false.reset(new Bound(bound)); - - if (Bound::eq(max_false.get(), min_reduced.get())) - min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced))); - if (Bound::eq(max_false.get(), min_true.get())) - min_true.reset(new Bound(Bound::createIncrement(*min_true))); - if (Bound::eq(min_reduced.get(), min_true.get())) - min_reduced.reset(); - clearRedundantReduced(); -} - -/// Stores a true found -void ABCD::MemoizedResultChart::addTrue(const Bound &bound) { - if (!min_true || Bound::leq(bound, *min_true)) - min_true.reset(new Bound(bound)); - - if (Bound::eq(min_true.get(), min_reduced.get())) - min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced))); - if (Bound::eq(min_true.get(), max_false.get())) - max_false.reset(new Bound(Bound::createDecrement(*max_false))); - if (Bound::eq(max_false.get(), min_reduced.get())) - min_reduced.reset(); - clearRedundantReduced(); -} - -/// Stores a Reduced found -void ABCD::MemoizedResultChart::addReduced(const Bound &bound) { - if (!min_reduced || Bound::leq(bound, *min_reduced)) - min_reduced.reset(new Bound(bound)); - - if (Bound::eq(min_reduced.get(), min_true.get())) - min_true.reset(new Bound(Bound::createIncrement(*min_true))); - if (Bound::eq(min_reduced.get(), max_false.get())) - max_false.reset(new Bound(Bound::createDecrement(*max_false))); -} - -/// Clears redundant reduced -/// If a min_true is smaller than a min_reduced then the min_reduced -/// is unnecessary and then removed. It also works for min_reduced -/// begin smaller than max_false. -void ABCD::MemoizedResultChart::clearRedundantReduced() { - if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced)) - min_reduced.reset(); - if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false)) - min_reduced.reset(); -} - -/// Stores the bound found -void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound, - const ProveResult res) { - if (res == False) { - map[b].addFalse(bound); - } else if (res == True) { - map[b].addTrue(bound); - } else { - map[b].addReduced(bound); - } -} - -/// Adds an edge from V_from to V_to with weight value -void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from, - APInt value, bool upper) { - assert(V_from->getType() == V_to->getType()); - assert(cast<IntegerType>(V_from->getType())->getBitWidth() == - value.getBitWidth()); - - graph[V_from].push_back(Edge(V_to, value, upper)); -} - -/// Test if there is any edge from V in the upper direction -bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const { - SmallVector<Edge, 16> it = graph.lookup(V); - - SmallVector<Edge, 16>::iterator begin = it.begin(); - SmallVector<Edge, 16>::iterator end = it.end(); - for (; begin != end; ++begin) { - if (begin->isUpperBound() == upper) { - return true; - } - } - return false; -} - -/// Prints the header of the dot file -void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const { - OS << "digraph dotgraph {\n"; - OS << "label=\"Inequality Graph for \'"; - OS << F.getNameStr() << "\' function\";\n"; - OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n"; -} - -/// Prints the body of the dot file -void ABCD::InequalityGraph::printBody(raw_ostream &OS) const { - DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin = - graph.begin(), end = graph.end(); - - for (; begin != end ; ++begin) { - SmallVector<Edge, 16>::const_iterator begin_par = - begin->second.begin(), end_par = begin->second.end(); - Value *source = begin->first; - - printVertex(OS, source); - - for (; begin_par != end_par ; ++begin_par) { - const Edge &edge = *begin_par; - printEdge(OS, source, edge); - } - } -} - -/// Prints vertex source to the dot file -/// -void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const { - OS << "\""; - printName(OS, source); - OS << "\""; - OS << " [label=\"{"; - printName(OS, source); - OS << "}\"];\n"; -} - -/// Prints the edge to the dot file -void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source, - const Edge &edge) const { - Value *dest = edge.getVertex(); - APInt value = edge.getValue(); - bool upper = edge.isUpperBound(); - - OS << "\""; - printName(OS, source); - OS << "\""; - OS << " -> "; - OS << "\""; - printName(OS, dest); - OS << "\""; - OS << " [label=\"" << value << "\""; - if (upper) { - OS << "color=\"blue\""; - } else { - OS << "color=\"red\""; - } - OS << "];\n"; -} - -void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const { - if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) { - OS << *CI; - } else { - if (!info->hasName()) { - info->setName("V"); - } - OS << info->getNameStr(); - } -} - -/// createABCDPass - The public interface to this file... -FunctionPass *llvm::createABCDPass() { - return new ABCD(); -} diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 2d19467ce746..ada086e9db76 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed"); namespace { struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ADCE() : FunctionPass(&ID) {} + ADCE() : FunctionPass(ID) {} virtual bool runOnFunction(Function& F); @@ -45,7 +45,7 @@ namespace { } char ADCE::ID = 0; -static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination"); +INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false); bool ADCE::runOnFunction(Function& F) { SmallPtrSet<Instruction*, 128> alive; diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index 54533f50405f..b144678c6a0e 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved"); namespace { struct BlockPlacement : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BlockPlacement() : FunctionPass(&ID) {} + BlockPlacement() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -74,8 +74,8 @@ namespace { } char BlockPlacement::ID = 0; -static RegisterPass<BlockPlacement> -X("block-placement", "Profile Guided Basic Block Placement"); +INITIALIZE_PASS(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false); FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); } diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 1a3b10cc9baa..b7598eace536 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -1,9 +1,9 @@ add_llvm_library(LLVMScalarOpts - ABCD.cpp ADCE.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp ConstantProp.cpp + CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp GEPSplitter.cpp @@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LowerAtomic.cpp MemCpyOptimizer.cpp Reassociate.cpp Reg2Mem.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 272066c8c0c4..e07b761e589c 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -41,6 +42,11 @@ using namespace llvm; using namespace llvm::PatternMatch; +static cl::opt<bool> +CriticalEdgeSplit("cgp-critical-edge-splitting", + cl::desc("Split critical edges during codegen prepare"), + cl::init(true), cl::Hidden); + namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -54,7 +60,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) - : FunctionPass(&ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) {} bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -82,8 +88,8 @@ namespace { } char CodeGenPrepare::ID = 0; -static RegisterPass<CodeGenPrepare> X("codegenprepare", - "Optimize for code generation"); +INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false); FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); @@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) + if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) + if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. @@ -548,9 +554,9 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - - CallInst::ArgOffset))) - return SizeCI->isAllOnesValue(); + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) + return SizeCI->isAllOnesValue(); return false; } }; @@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool MadeChange = false; // Split all critical edges where the dest block has a PHI. - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); + if (CriticalEdgeSplit) { + TerminatorInst *BBTI = BB.getTerminator(); + if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { + for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { + BasicBlock *SuccBB = BBTI->getSuccessor(i); + if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) + SplitEdgeNicely(BBTI, i, BackEdges, this); + } } } diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index ea208135739d..a0ea369d0cad 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { struct ConstantPropagation : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ConstantPropagation() : FunctionPass(&ID) {} + ConstantPropagation() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -45,8 +45,8 @@ namespace { } char ConstantPropagation::ID = 0; -static RegisterPass<ConstantPropagation> -X("constprop", "Simple constant propagation"); +INITIALIZE_PASS(ConstantPropagation, "constprop", + "Simple constant propagation", false, false); FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp new file mode 100644 index 000000000000..0d4e45de3466 --- /dev/null +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -0,0 +1,200 @@ +//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Correlated Value Propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "correlated-value-propagation" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPhis, "Number of phis propagated"); +STATISTIC(NumSelects, "Number of selects propagated"); +STATISTIC(NumMemAccess, "Number of memory access targets propagated"); +STATISTIC(NumCmps, "Number of comparisons propagated"); + +namespace { + class CorrelatedValuePropagation : public FunctionPass { + LazyValueInfo *LVI; + + bool processSelect(SelectInst *SI); + bool processPHI(PHINode *P); + bool processMemAccess(Instruction *I); + bool processCmp(CmpInst *C); + + public: + static char ID; + CorrelatedValuePropagation(): FunctionPass(ID) { } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LazyValueInfo>(); + } + }; +} + +char CorrelatedValuePropagation::ID = 0; +INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false); + +// Public interface to the Value Propagation pass +Pass *llvm::createCorrelatedValuePropagationPass() { + return new CorrelatedValuePropagation(); +} + +bool CorrelatedValuePropagation::processSelect(SelectInst *S) { + if (S->getType()->isVectorTy()) return false; + if (isa<Constant>(S->getOperand(0))) return false; + + Constant *C = LVI->getConstant(S->getOperand(0), S->getParent()); + if (!C) return false; + + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return false; + + S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2)); + S->eraseFromParent(); + + ++NumSelects; + + return true; +} + +bool CorrelatedValuePropagation::processPHI(PHINode *P) { + bool Changed = false; + + BasicBlock *BB = P->getParent(); + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + Value *Incoming = P->getIncomingValue(i); + if (isa<Constant>(Incoming)) continue; + + Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), + P->getIncomingBlock(i), + BB); + if (!C) continue; + + P->setIncomingValue(i, C); + Changed = true; + } + + if (Value *ConstVal = P->hasConstantValue()) { + P->replaceAllUsesWith(ConstVal); + P->eraseFromParent(); + Changed = true; + } + + ++NumPhis; + + return Changed; +} + +bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { + Value *Pointer = 0; + if (LoadInst *L = dyn_cast<LoadInst>(I)) + Pointer = L->getPointerOperand(); + else + Pointer = cast<StoreInst>(I)->getPointerOperand(); + + if (isa<Constant>(Pointer)) return false; + + Constant *C = LVI->getConstant(Pointer, I->getParent()); + if (!C) return false; + + ++NumMemAccess; + I->replaceUsesOfWith(Pointer, C); + return true; +} + +/// processCmp - If the value of this comparison could be determined locally, +/// constant propagation would already have figured it out. Instead, walk +/// the predecessors and statically evaluate the comparison based on information +/// available on that edge. If a given static evaluation is true on ALL +/// incoming edges, then it's true universally and we can simplify the compare. +bool CorrelatedValuePropagation::processCmp(CmpInst *C) { + Value *Op0 = C->getOperand(0); + if (isa<Instruction>(Op0) && + cast<Instruction>(Op0)->getParent() == C->getParent()) + return false; + + Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); + if (!Op1) return false; + + pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent()); + if (PI == PE) return false; + + LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Result == LazyValueInfo::Unknown) return false; + + ++PI; + while (PI != PE) { + LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Res != Result) return false; + ++PI; + } + + ++NumCmps; + + if (Result == LazyValueInfo::True) + C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext())); + else + C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext())); + + C->eraseFromParent(); + + return true; +} + +bool CorrelatedValuePropagation::runOnFunction(Function &F) { + LVI = &getAnalysis<LazyValueInfo>(); + + bool FnChanged = false; + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + bool BBChanged = false; + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { + Instruction *II = BI++; + switch (II->getOpcode()) { + case Instruction::Select: + BBChanged |= processSelect(cast<SelectInst>(II)); + break; + case Instruction::PHI: + BBChanged |= processPHI(cast<PHINode>(II)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + BBChanged |= processCmp(cast<CmpInst>(II)); + break; + case Instruction::Load: + case Instruction::Store: + BBChanged |= processMemAccess(II); + break; + } + } + + // Propagating correlated values might leave cruft around. + // Try to clean it up before we continue. + if (BBChanged) + SimplifyInstructionsInBlock(FI); + + FnChanged |= BBChanged; + } + + return FnChanged; +} diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 39940c35da5d..87ea8038356a 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -35,7 +35,7 @@ namespace { // struct DeadInstElimination : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - DeadInstElimination() : BasicBlockPass(&ID) {} + DeadInstElimination() : BasicBlockPass(ID) {} virtual bool runOnBasicBlock(BasicBlock &BB) { bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { @@ -56,8 +56,8 @@ namespace { } char DeadInstElimination::ID = 0; -static RegisterPass<DeadInstElimination> -X("die", "Dead Instruction Elimination"); +INITIALIZE_PASS(DeadInstElimination, "die", + "Dead Instruction Elimination", false, false); Pass *llvm::createDeadInstEliminationPass() { return new DeadInstElimination(); @@ -70,7 +70,7 @@ namespace { // struct DCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - DCE() : FunctionPass(&ID) {} + DCE() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -81,7 +81,7 @@ namespace { } char DCE::ID = 0; -static RegisterPass<DCE> Y("dce", "Dead Code Elimination"); +INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false); bool DCE::runOnFunction(Function &F) { // Start out with all of the instructions in the worklist... diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index e047e4ffa151..c8fd9d9fa556 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,7 +40,7 @@ namespace { TargetData *TD; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(&ID) {} + DSE() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { bool Changed = false; @@ -82,7 +82,7 @@ namespace { } char DSE::ID = 0; -static RegisterPass<DSE> X("dse", "Dead Store Elimination"); +INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false); FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } @@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } continue; - } else if (CallSite::get(BBI).getInstruction() != 0) { + } else if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't // be undeadifying any of our pointers. - CallSite CS = CallSite::get(BBI); if (AA.doesNotAccessMemory(CS)) continue; diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp index 610a41dae44b..53dd06d24bb5 100644 --- a/lib/Transforms/Scalar/GEPSplitter.cpp +++ b/lib/Transforms/Scalar/GEPSplitter.cpp @@ -27,13 +27,13 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const; public: static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(&ID) {} + explicit GEPSplitter() : FunctionPass(ID) {} }; } char GEPSplitter::ID = 0; -static RegisterPass<GEPSplitter> X("split-geps", - "split complex GEPs into simple GEPs"); +INITIALIZE_PASS(GEPSplitter, "split-geps", + "split complex GEPs into simple GEPs", false, false); FunctionPass *llvm::createGEPSplitterPass() { return new GEPSplitter(); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 88b67768fa5d..c62ce1f27f64 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -165,7 +165,6 @@ namespace { Expression create_expression(CastInst* C); Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); Expression create_expression(ExtractValueInst* C); Expression create_expression(InsertValueInst* C); @@ -665,7 +664,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(&ID), NoLoads(noloads), MD(0) { } + : FunctionPass(ID), NoLoads(noloads), MD(0) { } private: bool NoLoads; @@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { return new GVN(NoLoads); } -static RegisterPass<GVN> X("gvn", - "Global Value Numbering"); +INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false); void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; @@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) { for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) - if (PHINode* use_phi = dyn_cast<PHINode>(UI)) + if (PHINode* use_phi = dyn_cast<PHINode>(*UI)) if (use_phi->getParent() == inst->getParent()) return false; @@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); - SSAUpdate.Initialize(LI); + SSAUpdate.Initialize(LI->getType(), LI->getName()); const Type *LoadTy = LI->getType(); @@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa<DbgInfoIntrinsic>(CurInst)) continue; + + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) + if (CallI->isInlineAsm()) + continue; uint32_t ValNo = VN.lookup(CurInst); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b5c9dd881df8..af2eafc47cbf 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -77,7 +77,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(&ID) {} + IndVarSimplify() : LoopPass(ID) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -102,7 +102,7 @@ namespace { void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - Value *IndVar, + PHINode *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter); @@ -117,8 +117,8 @@ namespace { } char IndVarSimplify::ID = 0; -static RegisterPass<IndVarSimplify> -X("indvars", "Canonicalize Induction Variables"); +INITIALIZE_PASS(IndVarSimplify, "indvars", + "Canonicalize Induction Variables", false, false); Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); @@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() { /// is actually a much broader range than just linear tests. ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - Value *IndVar, + PHINode *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter) { @@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = L->getCanonicalInductionVariableIncrement(); + CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock); } else { // We have to use the preincremented value... RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, @@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that we know the largest of the induction variable expressions // in this loop, insert a canonical induction variable of the largest size. - Value *IndVar = 0; + PHINode *IndVar = 0; if (NeedCannIV) { // Check to see if the loop already has any canonical-looking induction // variables. If any are present and wider than the planned canonical @@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); - Instruction *U1 = cast<Instruction>(IncrUse++); + Instruction *U1 = cast<Instruction>(*IncrUse++); if (IncrUse == Incr->use_end()) return; - Instruction *U2 = cast<Instruction>(IncrUse++); + Instruction *U2 = cast<Instruction>(*IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index edce14cd92ea..104d5aecbdd3 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -45,7 +46,10 @@ Threshold("jump-threading-threshold", // Turn on use of LazyValueInfo. static cl::opt<bool> -EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden); +EnableLVI("enable-jump-threading-lvi", + cl::desc("Use LVI for jump threading"), + cl::init(true), + cl::ReallyHidden); @@ -74,15 +78,32 @@ namespace { #else SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; #endif + DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; + + // RAII helper for updating the recursion stack. + struct RecursionSetRemover { + DenseSet<std::pair<Value*, BasicBlock*> > &TheSet; + std::pair<Value*, BasicBlock*> ThePair; + + RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S, + std::pair<Value*, BasicBlock*> P) + : TheSet(S), ThePair(P) { } + + ~RecursionSetRemover() { + TheSet.erase(ThePair); + } + }; public: static char ID; // Pass identification - JumpThreading() : FunctionPass(&ID) {} + JumpThreading() : FunctionPass(ID) {} bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - if (EnableLVI) + if (EnableLVI) { AU.addRequired<LazyValueInfo>(); + AU.addPreserved<LazyValueInfo>(); + } } void FindLoopHeaders(Function &F); @@ -111,8 +132,8 @@ namespace { } char JumpThreading::ID = 0; -static RegisterPass<JumpThreading> -X("jump-threading", "Jump Threading"); +INITIALIZE_PASS(JumpThreading, "jump-threading", + "Jump Threading", false, false); // Public interface to the Jump Threading pass FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } @@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); + if (LVI) LVI->eraseBlock(BB); DeleteDeadBlock(BB); Changed = true; } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) { bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); BasicBlock *Succ = BI->getSuccessor(0); + // FIXME: It is always conservatively correct to drop the info + // for a block even if it doesn't get erased. This isn't totally + // awesome, but it allows us to use AssertingVH to prevent nasty + // dangling pointer issues within LazyValueInfo. + if (LVI) LVI->eraseBlock(BB); if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { Changed = true; // If we deleted BB and BB was the header of a loop, then the @@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) { LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); } +// Helper method for ComputeValueKnownInPredecessors. If Value is a +// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing. +static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*, + BasicBlock*> > &Result, + Constant *Value, BasicBlock* BB){ + if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value)) + Result.push_back(std::make_pair(FoldedCInt, BB)); + else if (isa<UndefValue>(Value)) + Result.push_back(std::make_pair((ConstantInt*)0, BB)); +} + /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see /// if we can infer that the value is a known ConstantInt in any of our /// predecessors. If so, return the known list of value and pred BB in the @@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// bool JumpThreading:: ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ + // This method walks up use-def chains recursively. Because of this, we could + // get into an infinite loop going around loops in the use-def chain. To + // prevent this, keep track of what (value, block) pairs we've already visited + // and terminate the search if we loop back to them + if (!RecursionSet.insert(std::make_pair(V, BB)).second) + return false; + + // An RAII help to remove this pair from the recursion set once the recursion + // stack pops back out again. + RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); + // If V is a constantint, then it is known in all predecessors. if (isa<ConstantInt>(V) || isa<UndefValue>(V)) { ConstantInt *CI = dyn_cast<ConstantInt>(V); for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Result.push_back(std::make_pair(CI, *PI)); + return true; } @@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) { ConstantInt *CI = dyn_cast<ConstantInt>(InVal); Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); + } else if (LVI) { + Constant *CI = LVI->getConstantOnEdge(InVal, + PN->getIncomingBlock(i), BB); + // LVI returns null is no value could be determined. + if (!CI) continue; + PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i)); } } + return !Result.empty(); } @@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ else InterestingVal = ConstantInt::getFalse(I->getContext()); + SmallPtrSet<BasicBlock*, 4> LHSKnownBBs; + // Scan for the sentinel. If we find an undef, force it to the // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { Result.push_back(LHSVals[i]); Result.back().first = InterestingVal; + LHSKnownBBs.insert(LHSVals[i].second); } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { // If we already inferred a value for this block on the LHS, don't // re-add it. - bool HasValue = false; - for (unsigned r = 0, e = Result.size(); r != e; ++r) - if (Result[r].second == RHSVals[i].second) { - HasValue = true; - break; - } - - if (!HasValue) { + if (!LHSKnownBBs.count(RHSVals[i].second)) { Result.push_back(RHSVals[i]); Result.back().first = InterestingVal; } } + return !Result.empty(); } @@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (Result[i].first) Result[i].first = cast<ConstantInt>(ConstantExpr::getNot(Result[i].first)); + return true; } + + // Try to simplify some other binary operator values. + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; + ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals); + + // Try to use constant folding to simplify the binary operator. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first ? LHSVals[i].first : + cast<Constant>(UndefValue::get(BO->getType())); + Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); + + PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + } + } + + return !Result.empty(); } // Handle compare with phi operand, where the PHI is defined in this block. @@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); } - if (isa<UndefValue>(Res)) - Result.push_back(std::make_pair((ConstantInt*)0, PredBB)); - else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res)) - Result.push_back(std::make_pair(CI, PredBB)); + if (Constant *ConstRes = dyn_cast<Constant>(Res)) + PushConstantIntOrUndef(Result, ConstRes, PredBB); } return !Result.empty(); @@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. if (LVI && isa<Constant>(Cmp->getOperand(1)) && - Cmp->getType()->isIntegerTy() && // Not vector compare. - (!isa<Instruction>(Cmp->getOperand(0)) || - cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { - Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + Cmp->getType()->isIntegerTy()) { + if (!isa<Instruction>(Cmp->getOperand(0)) || + cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { + Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate Res = + LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, P, BB); + if (Res == LazyValueInfo::Unknown) + continue; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - // If the value is known by LazyValueInfo to be a constant in a - // predecessor, use that information to try to thread this block. - LazyValueInfo::Tristate - Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, P, BB); - if (Res == LazyValueInfo::Unknown) - continue; + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + } - Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + return !Result.empty(); } - - return !Result.empty(); + + // Try to find a constant value for the LHS of a comparison, + // and evaluate it statically if we can. + if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) { + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); + + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first ? LHSVals[i].first : + cast<Constant>(UndefValue::get(CmpConst->getType())); + Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + V, CmpConst); + PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + } + + return !Result.empty(); + } + } + } + + if (LVI) { + // If all else fails, see if LVI can figure out a constant value for us. + Constant *CI = LVI->getConstant(V, BB); + ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI); + if (CInt) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(CInt, *PI)); } + + return !Result.empty(); } + return false; } @@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + if (LVI) LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); if (isEntry && BB != &BB->getParent()->getEntryBlock()) @@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } } + + // For a comparison where the LHS is outside this block, it's possible + // that we've branched on it before. Used LVI to see if we can simplify + // the branch based on that. + BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); + Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1)); + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE && + (!isa<Instruction>(CondCmp->getOperand(0)) || + cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) { + // For predecessor edge, determine if the comparison is true or false + // on that edge. If they're all true or all false, we can simplify the + // branch. + // FIXME: We could handle mixed true/false by duplicating code. + LazyValueInfo::Tristate Baseline = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Baseline != LazyValueInfo::Unknown) { + // Check that all remaining incoming values match the first one. + while (++PI != PE) { + LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge( + CondCmp->getPredicate(), + CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Ret != Baseline) break; + } + + // If we terminated early, then one of the values didn't match. + if (PI == PE) { + unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0; + unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1; + RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD); + BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); + CondBr->eraseFromParent(); + return true; + } + } + } } // Check for some cases that are worth simplifying. Right now we want to look @@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues; if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) return false; + assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); @@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, << ", across block:\n " << *BB << "\n"); + if (LVI) + LVI->threadEdge(PredBB, BB, SuccBB); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. @@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I); + SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); @@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I); + SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 73473952912e..2ef85446bd9b 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -26,8 +26,7 @@ // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use -// the mem2reg functionality to construct the appropriate SSA form for the -// variable. +// the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// @@ -37,14 +36,15 @@ #include "llvm/DerivedTypes.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid - LICM() : LoopPass(&ID) {} + LICM() : LoopPass(ID) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -75,39 +75,31 @@ namespace { /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg) + AU.addRequired<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); - AU.addPreserved<DominanceFrontier>(); AU.addPreservedID(LoopSimplifyID); } bool doFinalization() { - // Free the values stored in the map - for (std::map<Loop *, AliasSetTracker *>::iterator - I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I) - delete I->second; - - LoopToAliasMap.clear(); + assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); return false; } private: - // Various analyses that we use... AliasAnalysis *AA; // Current AliasAnalysis information LoopInfo *LI; // Current LoopInfo - DominatorTree *DT; // Dominator Tree for the current Loop... - DominanceFrontier *DF; // Current Dominance Frontier + DominatorTree *DT; // Dominator Tree for the current Loop. - // State that is updated as we process loops + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... - std::map<Loop *, AliasSetTracker *> LoopToAliasMap; + DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); @@ -204,25 +196,12 @@ namespace { bool isLoopInvariantInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - /// PromoteValuesInLoop - Look at the stores in the loop and promote as many - /// to scalars as we can. - /// - void PromoteValuesInLoop(); - - /// FindPromotableValuesInLoop - Check the current loop for stores to - /// definite pointers, which are not loaded and stored through may aliases. - /// If these are found, create an alloca for the value, add it to the - /// PromotedValues list, and keep track of the mapping from value to - /// alloca... - /// - void FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &Val2AlMap); + void PromoteAliasSet(AliasSet &AS); }; } char LICM::ID = 0; -static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion"); +INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false); Pass *llvm::createLICMPass() { return new LICM(); } @@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Get our Loop and Alias Analysis information... LI = &getAnalysis<LoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); - DF = &getAnalysis<DominanceFrontier>(); DT = &getAnalysis<DominatorTree>(); CurAST = new AliasSetTracker(*AA); - // Collect Alias info from subloops + // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); LoopItr != LoopItrE; ++LoopItr) { Loop *InnerL = *LoopItr; - AliasSetTracker *InnerAST = LoopToAliasMap[InnerL]; - assert (InnerAST && "Where is my AST?"); + AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; + assert(InnerAST && "Where is my AST?"); // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); + + // Once we've incorporated the inner loop's AST into ours, we don't need the + // subloop's anymore. + delete InnerAST; + LoopToAliasSetMap.erase(InnerL); } CurLoop = L; @@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops... + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block } @@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { HoistRegion(DT->getNode(L->getHeader())); // Now that all loop invariants have been removed from the loop, promote any - // memory references to scalars that we can... - if (!DisablePromotion && Preheader && L->hasDedicatedExits()) - PromoteValuesInLoop(); - + // memory references to scalars that we can. + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) + PromoteAliasSet(*I); + } + // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; - LoopToAliasMap[L] = CurAST; + // If this loop is nested inside of another one, save the alias information + // for when we process the outer loop. + if (L->getParentLoop()) + LoopToAliasSetMap[L] = CurAST; + else + delete CurAST; return Changed; } @@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return; - // We are processing blocks in reverse dfo, so process children first... + // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) SinkRegion(Children[i]); @@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; + + // If the instruction is dead, we would try to sink it because it isn't used + // in the loop, instead, just delete it. + if (isInstructionTriviallyDead(&I)) { + DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); + Changed = true; + continue; + } // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are @@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { Instruction &I = *II++; + // Try constant folding this instruction. If all the operands are + // constants, it is technically hoistable, but it would be better to just + // fold it. + if (Constant *C = ConstantFoldInstruction(&I)) { + DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); + CurAST->copyValue(&I, C); + CurAST->deleteValue(&I); + I.replaceAllUsesWith(C); + I.eraseFromParent(); + continue; + } + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. @@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) { if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && isSafeToExecuteUnconditionally(I)) hoist(I); - } + } const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) @@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DEBUG(dbgs() << "LICM sinking instruction: " << I); + DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); + CurLoop->getUniqueExitBlocks(ExitBlocks); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) { // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. - I.removeFromParent(); - BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); - ExitBlocks[0]->getInstList().insert(InsertPt, &I); + I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); + + // This instruction is no longer in the AST for the current loop, because + // we just sunk it out of the loop. If we just sunk it into an outer + // loop, we will rediscover the operation when we process it. + CurAST->deleteValue(&I); } - } else if (ExitBlocks.empty()) { + return; + } + + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); - } else { - // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to - // do all of the hard work of inserting PHI nodes as necessary. We convert - // the value into a stack object to get it to do this. - - // Firstly, we create a stack object to hold the value... - AllocaInst *AI = 0; - - if (!I.getType()->isVoidTy()) { - AI = new AllocaInst(I.getType(), 0, I.getName(), - I.getParent()->getParent()->getEntryBlock().begin()); - CurAST->add(AI); - } - - // Secondly, insert load instructions for each use of the instruction - // outside of the loop. - while (!I.use_empty()) { - Instruction *U = cast<Instruction>(I.use_back()); - - // If the user is a PHI Node, we actually have to insert load instructions - // in all predecessor blocks, not in the PHI block itself! - if (PHINode *UPN = dyn_cast<PHINode>(U)) { - // Only insert into each predecessor once, so that we don't have - // different incoming values from the same block! - std::map<BasicBlock*, Value*> InsertedBlocks; - for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i) - if (UPN->getIncomingValue(i) == &I) { - BasicBlock *Pred = UPN->getIncomingBlock(i); - Value *&PredVal = InsertedBlocks[Pred]; - if (!PredVal) { - // Insert a new load instruction right before the terminator in - // the predecessor block. - PredVal = new LoadInst(AI, "", Pred->getTerminator()); - CurAST->add(cast<LoadInst>(PredVal)); - } - - UPN->setIncomingValue(i, PredVal); - } - - } else { - LoadInst *L = new LoadInst(AI, "", U); - U->replaceUsesOfWith(&I, L); - CurAST->add(L); - } - } - - // Thirdly, insert a copy of the instruction in each exit block of the loop - // that is dominated by the instruction, storing the result into the memory - // location. Be careful not to insert the instruction into any particular - // basic block more than once. - std::set<BasicBlock*> InsertedBlocks; - BasicBlock *InstOrigBB = I.getParent(); - - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = ExitBlocks[i]; - - if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) { - // If we haven't already processed this exit block, do so now. - if (InsertedBlocks.insert(ExitBlock).second) { - // Insert the code after the last PHI node... - BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - - // If this is the first exit block processed, just move the original - // instruction, otherwise clone the original instruction and insert - // the copy. - Instruction *New; - if (InsertedBlocks.size() == 1) { - I.removeFromParent(); - ExitBlock->getInstList().insert(InsertPt, &I); - New = &I; - } else { - New = I.clone(); - CurAST->copyValue(&I, New); - if (!I.getName().empty()) - New->setName(I.getName()+".le"); - ExitBlock->getInstList().insert(InsertPt, New); - } - - // Now that we have inserted the instruction, store it into the alloca - if (AI) new StoreInst(New, AI, InsertPt); - } - } - } - - // If the instruction doesn't dominate any exit blocks, it must be dead. - if (InsertedBlocks.empty()) { - CurAST->deleteValue(&I); - I.eraseFromParent(); - } - - // Finally, promote the fine value to SSA form. - if (AI) { - std::vector<AllocaInst*> Allocas; - Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, CurAST); + return; + } + + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the + // hard work of inserting PHI nodes as necessary. + SmallVector<PHINode*, 8> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + if (!I.use_empty()) + SSA.Initialize(I.getType(), I.getName()); + + // Insert a copy of the instruction in each exit block of the loop that is + // dominated by the instruction. Each exit block is known to only be in the + // ExitBlocks list once. + BasicBlock *InstOrigBB = I.getParent(); + unsigned NumInserted = 0; + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + + if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) + continue; + + // Insert the code after the last PHI node. + BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); + + // If this is the first exit block processed, just move the original + // instruction, otherwise clone the original instruction and insert + // the copy. + Instruction *New; + if (NumInserted++ == 0) { + I.moveBefore(InsertPt); + New = &I; + } else { + New = I.clone(); + if (!I.getName().empty()) + New->setName(I.getName()+".le"); + ExitBlock->getInstList().insert(InsertPt, New); } + + // Now that we have inserted the instruction, inform SSAUpdater. + if (!I.use_empty()) + SSA.AddAvailableValue(ExitBlock, New); } + + // If the instruction doesn't dominate any exit blocks, it must be dead. + if (NumInserted == 0) { + CurAST->deleteValue(&I); + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.eraseFromParent(); + return; + } + + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { + // Grab the use before incrementing the iterator. + Use &U = UI.getUse(); + // Increment the iterator before removing the use from the list. + ++UI; + SSA.RewriteUseAfterInsertions(U); + } + + // Update CurAST for NewPHIs if I had pointer type. + if (I.getType()->isPointerTy()) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(&I, NewPHIs[i]); + + // Finally, remove the instruction from CurAST. It is no longer in the loop. + CurAST->deleteValue(&I); } /// hoist - When an instruction is found to only use loop invariant operands @@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) { DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); - // Remove the instruction from its current basic block... but don't delete the - // instruction. - I.removeFromParent(); - - // Insert the new node in Preheader, before the terminator. - Preheader->getInstList().insert(Preheader->getTerminator(), &I); + // Move the new node to the Preheader, before its terminator. + I.moveBefore(Preheader->getTerminator()); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { return true; } - -/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking +/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers -/// which are loop invariant. We promote these memory locations to use allocas -/// instead. These allocas can easily be raised to register values by the -/// PromoteMem2Reg functionality. +/// which are loop invariant. /// -void LICM::PromoteValuesInLoop() { - // PromotedValues - List of values that are promoted out of the loop. Each - // value has an alloca instruction for it, and a canonical version of the - // pointer. - std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; - std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca - - FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); - if (ValueToAllocaMap.empty()) return; // If there are values to promote. - - Changed = true; - NumPromoted += PromotedValues.size(); - - std::vector<Value*> PointerValueNumbers; - - // Emit a copy from the value into the alloca'd value in the loop preheader - TerminatorInst *LoopPredInst = Preheader->getTerminator(); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - Value *Ptr = PromotedValues[i].second; - - // If we are promoting a pointer value, update alias information for the - // inserted load. - Value *LoadValue = 0; - if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) { - // Locate a load or store through the pointer, and assign the same value - // to LI as we are loading or storing. Since we know that the value is - // stored in this loop, this will always succeed. - for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - LoadValue = LI; - break; - } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(1) == Ptr) { - LoadValue = SI->getOperand(0); - break; - } - } - } - assert(LoadValue && "No store through the pointer found!"); - PointerValueNumbers.push_back(LoadValue); // Remember this for later. - } - - // Load from the memory we are promoting. - LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst); - - if (LoadValue) CurAST->copyValue(LoadValue, LI); - - // Store into the temporary alloca. - new StoreInst(LI, PromotedValues[i].first, LoopPredInst); - } +void LICM::PromoteAliasSet(AliasSet &AS) { + // We can promote this alias set if it has a store, if it is a "Must" alias + // set, if the pointer is loop invariant, and if we are not eliminating any + // volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) + return; + + assert(!AS.empty() && + "Must alias set should have at least one pointer element in it!"); + Value *SomePtr = AS.begin()->getValue(); - // Scan the basic blocks in the loop, replacing uses of our pointers with - // uses of the allocas in question. + // It isn't safe to promote a load/store from the loop if the load/store is + // conditional. For example, turning: // - for (Loop::block_iterator I = CurLoop->block_begin(), - E = CurLoop->block_end(); I != E; ++I) { - BasicBlock *BB = *I; - // Rewrite all loads and stores in the block of the pointer... - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(L->getOperand(0)); - if (I != ValueToAllocaMap.end()) - L->setOperand(0, I->second); // Rewrite load instruction... - } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(S->getOperand(1)); - if (I != ValueToAllocaMap.end()) - S->setOperand(1, I->second); // Rewrite store instruction... - } - } - } - - // Now that the body of the loop uses the allocas instead of the original - // memory locations, insert code to copy the alloca value back into the - // original memory location on all exits from the loop. Note that we only - // want to insert one copy of the code in each exit block, though the loop may - // exit to the same block more than once. + // for () { if (c) *P += 1; } // - SmallPtrSet<BasicBlock*, 16> ProcessedBlocks; - - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - if (!ProcessedBlocks.insert(ExitBlocks[i])) - continue; - - // Copy all of the allocas into their memory locations. - BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI(); - Instruction *InsertPos = BI; - unsigned PVN = 0; - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - // Load from the alloca. - LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); - - // If this is a pointer type, update alias info appropriately. - if (LI->getType()->isPointerTy()) - CurAST->copyValue(PointerValueNumbers[PVN++], LI); - - // Store into the memory we promoted. - new StoreInst(LI, PromotedValues[i].second, InsertPos); - } - } - - // Now that we have done the deed, use the mem2reg functionality to promote - // all of the new allocas we just created into real SSA registers. + // into: // - std::vector<AllocaInst*> PromotedAllocas; - PromotedAllocas.reserve(PromotedValues.size()); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) - PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); -} - -/// FindPromotableValuesInLoop - Check the current loop for stores to definite -/// pointers, which are not loaded and stored through may aliases and are safe -/// for promotion. If these are found, create an alloca for the value, add it -/// to the PromotedValues list, and keep track of the mapping from value to -/// alloca. -void LICM::FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &ValueToAllocaMap) { - Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); - - // Loop over all of the alias sets in the tracker object. - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) { - AliasSet &AS = *I; - // We can promote this alias set if it has a store, if it is a "Must" alias - // set, if the pointer is loop invariant, and if we are not eliminating any - // volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) - continue; + // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; + // + // is not safe, because *P may only be valid to access if 'c' is true. + // + // It is safe to promote P if all uses are direct load/stores and if at + // least one is guaranteed to be executed. + bool GuaranteedToExecute = false; + + SmallVector<Instruction*, 64> LoopUses; + SmallPtrSet<Value*, 4> PointerMustAliases; + + // Check that all of the pointers in the alias set have the same type. We + // cannot (yet) promote a memory location that is loaded and stored in + // different sizes. + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + Value *ASIV = ASI->getValue(); + PointerMustAliases.insert(ASIV); - assert(!AS.empty() && - "Must alias set should have at least one pointer element in it!"); - Value *V = AS.begin()->getValue(); - // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. - { - bool PointerOk = true; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - if (V->getType() != I->getValue()->getType()) { - PointerOk = false; - break; - } - if (!PointerOk) - continue; - } - - // It isn't safe to promote a load/store from the loop if the load/store is - // conditional. For example, turning: - // - // for () { if (c) *P += 1; } - // - // into: - // - // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; - // - // is not safe, because *P may only be valid to access if 'c' is true. - // - // It is safe to promote P if all uses are direct load/stores and if at - // least one is guaranteed to be executed. - bool GuaranteedToExecute = false; - bool InvalidInst = false; - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + if (SomePtr->getType() != ASIV->getType()) + return; + + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { - // Ignore instructions not in this loop. + // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - - if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) { - InvalidInst = true; - break; - } + + // If there is an non-load/store instruction in the loop, we can't promote + // it. + if (isa<LoadInst>(Use)) + assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); + else if (isa<StoreInst>(Use)) { + assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); + if (Use->getOperand(0) == ASIV) return; + } else + return; // Not a load or store. if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); + + LoopUses.push_back(Use); } + } + + // If there isn't a guaranteed-to-execute instruction, we can't promote. + if (!GuaranteedToExecute) + return; + + // Otherwise, this is safe to promote, lets do it! + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + Changed = true; + ++NumPromoted; - // If there is an non-load/store instruction in the loop, we can't promote - // it. If there isn't a guaranteed-to-execute instruction, we can't - // promote. - if (InvalidInst || !GuaranteedToExecute) + // We use the SSAUpdater interface to insert phi nodes as required. + SmallVector<PHINode*, 16> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + // It wants to know some value of the same type as what we'll be inserting. + Value *SomeValue; + if (isa<LoadInst>(LoopUses[0])) + SomeValue = LoopUses[0]; + else + SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); + SSA.Initialize(SomeValue->getType(), SomeValue->getName()); + + // First step: bucket up uses of the pointers by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. + DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; + for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { + Instruction *User = LoopUses[i]; + UsesByBlock[User->getParent()].push_back(User); + } + + // Okay, now we can iterate over all the blocks in the loop with uses, + // processing them. Keep track of which loads are loading a live-in value. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { + Instruction *User = LoopUses[LoopUse]; + std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (isa<StoreInst>(User)) { + SSA.AddAvailableValue(User->getParent(), + cast<StoreInst>(User)->getOperand(0)); + } else { + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + } + BlockUses.clear(); continue; + } - const Type *Ty = cast<PointerType>(V->getType())->getElementType(); - AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart); - PromotedValues.push_back(std::make_pair(AI, V)); + // Otherwise, check to see if this block is all loads. If so, we can queue + // them all as live in loads. + bool HasStore = false; + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { + if (isa<StoreInst>(BlockUses[i])) { + HasStore = true; + break; + } + } + + if (!HasStore) { + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) + LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + BlockUses.clear(); + continue; + } - // Update the AST and alias analysis. - CurAST->copyValue(V, AI); + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + BasicBlock *BB = User->getParent(); + Value *StoredValue = 0; + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast<LoadInst>(II)) { + // If this is a load from an unrelated pointer, ignore it. + if (!PointerMustAliases.count(L->getOperand(0))) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *S = dyn_cast<StoreInst>(II)) { + // If this is a store to an unrelated pointer, ignore it. + if (!PointerMustAliases.count(S->getOperand(1))) continue; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); + // Remember that this is the active value in the block. + StoredValue = S->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Now that all the intra-loop values are classified, set up the preheader. + // It gets a load of the pointer we're promoting, and it is the live-out value + // from the preheader. + LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", + Preheader->getTerminator()); + SSA.AddAvailableValue(Preheader, PreheaderLoad); + + // Now that the preheader is good to go, set up the exit blocks. Each exit + // block gets a store of the live-out values that feed them. Since we've + // already told the SSA updater about the defs in the loop and the preheader + // definition, it is all set and we can start using it. + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Instruction *InsertPos = ExitBlock->getFirstNonPHI(); + new StoreInst(LiveInValue, SomePtr, InsertPos); + } - DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n"); + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { + LoadInst *ALoad = LiveInLoads[i]; + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + ALoad->replaceAllUsesWith(NewVal); + CurAST->copyValue(ALoad, NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // If the preheader load is itself a pointer, we need to tell alias analysis + // about the new pointer we created in the preheader block and about any PHI + // nodes that just got inserted. + if (PreheaderLoad->getType()->isPointerTy()) { + // Copy any value stored to or loaded from a must-alias of the pointer. + CurAST->copyValue(SomeValue, PreheaderLoad); + + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(SomeValue, NewPHIs[i]); } + + // Now that everything is rewritten, delete the old instructions from the body + // of the loop. They should all be dead now. + for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { + Instruction *User = LoopUses[i]; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + User->replaceAllUsesWith(NewVal); + CurAST->copyValue(User, NewVal); + } + + CurAST->deleteValue(User); + User->eraseFromParent(); + } + + // fwew, we're done! } + /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; @@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias /// set. void LICM::deleteAnalysisValue(Value *V, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index e4894e99b68f..543dfc1cba09 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -28,7 +28,7 @@ namespace { class LoopDeletion : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopDeletion() : LoopPass(&ID) {} + LoopDeletion() : LoopPass(ID) {} // Possibly eliminate loop L if it is dead. bool runOnLoop(Loop* L, LPPassManager& LPM); @@ -38,9 +38,9 @@ namespace { bool &Changed, BasicBlock *Preheader); virtual void getAnalysisUsage(AnalysisUsage& AU) const { - AU.addRequired<ScalarEvolution>(); AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); @@ -55,7 +55,8 @@ namespace { } char LoopDeletion::ID = 0; -static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops"); +INITIALIZE_PASS(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false); Pass* llvm::createLoopDeletionPass() { return new LoopDeletion(); diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 31058e5759a4..a4336743a8f0 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -74,7 +74,7 @@ namespace { class LoopIndexSplit : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopIndexSplit() : LoopPass(&ID) {} + LoopIndexSplit() : LoopPass(ID) {} // Index split Loop L. Return true if loop is split. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -197,8 +197,8 @@ namespace { } char LoopIndexSplit::ID = 0; -static RegisterPass<LoopIndexSplit> -X("loop-index-split", "Index Split Loops"); +INITIALIZE_PASS(LoopIndexSplit, "loop-index-split", + "Index Split Loops", false, false); Pass *llvm::createLoopIndexSplitPass() { return new LoopIndexSplit(); @@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB); PI != PE; ++PI) { BasicBlock *P = *PI; - if (P == DeadBB || DT->dominates(DeadBB, P)) + if (DT->dominates(DeadBB, P)) PredBlocks.push_back(P); } @@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, // the dominance frontiers. for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end(); I != E; ++I) { - if (*I == CondBB || !DT->dominates(CondBB, *I)) continue; + if (!DT->properlyDominates(CondBB, *I)) continue; DominanceFrontier::iterator BBDF = DF->find(*I); DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); @@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) { bool usedOutsideBB = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - Instruction *U = cast<Instruction>(UI); + Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB) usedOutsideBB = true; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 16c4a15d3550..65acc1d9257a 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -35,7 +35,7 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(&ID) {} + LoopRotate() : LoopPass(ID) {} // Rotate Loop L as many times as possible. Return true if // loop is rotated at least once. @@ -43,15 +43,15 @@ namespace { // LCSSA form makes instruction renaming easier. virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominanceFrontier>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } // Helper functions @@ -79,7 +79,7 @@ namespace { } char LoopRotate::ID = 0; -static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops"); +INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false); Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } @@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // The value now exits in two versions: the initial value in the preheader // and the loop "next" value in the original header. - SSA.Initialize(OrigHeaderVal); + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal); @@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // NewHeader is now the header of the loop. L->moveToHeader(NewHeader); + // Move the original header to the bottom of the loop, where it now more + // naturally belongs. This isn't necessary for correctness, and CodeGen can + // usually reorder blocks on its own to fix things like this up, but it's + // still nice to keep the IR readable. + // + // The original header should have only one predecessor at this point, since + // we checked that the loop had a proper preheader and unique backedge before + // we started. + assert(OrigHeader->getSinglePredecessor() && + "Original loop header has too many predecessors after loop rotation!"); + OrigHeader->moveAfter(OrigHeader->getSinglePredecessor()); + + // Also, since this original header only has one predecessor, zap its + // PHI nodes, which are now trivial. + FoldSingleEntryPHINodes(OrigHeader); + + // TODO: We could just go ahead and merge OrigHeader into its predecessor + // at this point, if we don't mind updating dominator info. + + // Establish a new preheader, update dominators, etc. preserveCanonicalLoopForm(LPM); ++NumRotated; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1f9b4156b9cd..e8dc5d3a640e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) { bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUsesMap.count(Reg)) return false; - const SmallBitVector &UsedByIndices = - RegUsesMap.find(Reg)->second.UsedByIndices; + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + if (I == RegUsesMap.end()) + return false; + const SmallBitVector &UsedByIndices = I->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { - const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, - IgnoreSignificantBits); - if (!Start) return 0; const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); if (!Step) return 0; + const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } return 0; @@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddExpr(NewOps); + if (Result != 0) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result != 0) + S = SE.getAddRecExpr(NewOps, AR->getLoop()); return Result; } return 0; @@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); - S = SE.getAddExpr(NewOps); + if (Result) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result) + S = SE.getAddRecExpr(NewOps, AR->getLoop()); return Result; } return 0; @@ -965,6 +970,12 @@ public: /// may be used. bool AllFixupsOutsideLoop; + /// WidestFixupType - This records the widest use type for any fixup using + /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different + /// max fixup widths to be equivalent, because the narrower one may be relying + /// on the implicit truncation to truncate away bogus bits. + const Type *WidestFixupType; + /// Formulae - A list of ways to build a value that can satisfy this user. /// After the list is populated, one of these is selected heuristically and /// used to formulate a replacement for OperandValToReplace in UserInst. @@ -976,15 +987,14 @@ public: LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true) {} + AllFixupsOutsideLoop(true), + WidestFixupType(0) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); void DeleteFormula(Formula &F); void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); - void check() const; - void print(raw_ostream &OS) const; void dump() const; }; @@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const { for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), E = Offsets.end(); I != E; ++I) { OS << *I; - if (next(I) != E) + if (llvm::next(I) != E) OS << ','; } OS << '}'; if (AllFixupsOutsideLoop) OS << ", all-fixups-outside-loop"; + + if (WidestFixupType) + OS << ", widest fixup type: " << *WidestFixupType; } void LSRUse::dump() const { @@ -1354,6 +1367,10 @@ public: void FilterOutUndesirableDedicatedRegisters(); size_t EstimateSearchSpaceComplexity() const; + void NarrowSearchSpaceByDetectingSupersets(); + void NarrowSearchSpaceByCollapsingUnrolledCode(); + void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); // Add one to the backedge-taken count to get the trip count. - const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); if (IterationCount != SE.getSCEV(Sel)) return Cond; // Check for a max calculation that matches the pattern. There's no check @@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) { LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { - // Search all uses for the formula. This could be more clever. Ignore - // ICmpZero uses because they may contain formulae generated by - // GenerateICmpZeroScales, in which case adding fixup offsets may - // be invalid. + // Search all uses for the formula. This could be more clever. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; + // Check whether this use is close enough to OrigLU, to see whether it's + // worthwhile looking through its formulae. + // Ignore ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero && LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && + LU.WidestFixupType == OrigLU.WidestFixupType && LU.HasFormulaWithSameRegs(OrigF)) { + // Scan through this use's formulae. for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; + // Check to see if this formula has the same registers and symbols + // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale && - LU.Kind) { + F.AM.Scale == OrigF.AM.Scale) { if (F.AM.BaseOffs == 0) return &LU; + // This is the formula where all the registers and symbols matched; + // there aren't going to be any others. Since we declined it, we + // can skip the rest of the formulae and procede to the next LSRUse. break; } } } } + // Nothing looked good. return 0; } @@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { for (SmallSetVector<const SCEV *, 4>::const_iterator I = Strides.begin(), E = Strides.end(); I != E; ++I) for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = - next(I); NewStrideIter != E; ++NewStrideIter) { + llvm::next(I); NewStrideIter != E; ++NewStrideIter) { const SCEV *OldStride = *I; const SCEV *NewStride = *NewStrideIter; @@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); InsertSupplementalFormula(U, LU, LF.LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; @@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, - SmallVectorImpl<const SCEV *> &UninterestingOps, const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); + CollectSubexprs(*I, C, Ops, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. @@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), AR->getLoop()), - C, Ops, UninterestingOps, L, SE); - CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); + C, Ops, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, UninterestingOps, L, SE); + Ops, L, SE); return; } } - // Otherwise use the value itself. Loop-variant "unknown" values are - // uninteresting; we won't be able to do anything meaningful with them. - if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) - UninterestingOps.push_back(S); - else - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself, optionally with a scale applied. + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; - CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); - - // Add any uninteresting values as one register, as we won't be able to - // form any interesting reassociation opportunities with them. They'll - // just have to be added inside the loop no matter what we do. - if (!UninterestingAddOps.empty()) - AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + SmallVector<const SCEV *, 8> AddOps; + CollectSubexprs(BaseReg, 0, AddOps, L, SE); if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), JE = AddOps.end(); J != JE; ++J) { + + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L)) + continue; + // Don't pull a constant into a register if the constant could be folded // into an immediate field. if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, @@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Collect all operands except *J. SmallVector<const SCEV *, 8> InnerAddOps - ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); InnerAddOps.append - (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); + (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. if (NewG->isZero()) { std::swap(F.BaseRegs[i], F.BaseRegs.back()); @@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() { } GenerateCrossUseConstantOffsets(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); } /// If their are multiple formulae with the same set of registers used @@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const { return Power; } -/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of -/// formulae to choose from, use some rough heuristics to prune down the number -/// of formulae. This keeps the main solver from taking an extraordinary amount -/// of time in some worst-case scenarios. -void LSRInstance::NarrowSearchSpaceUsingHeuristics() { +/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset +/// of the registers of another formula, it won't help reduce register +/// pressure (though it may not necessarily hurt register pressure); remove +/// it to simplify the system. +void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} +/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers +/// for expressions like A, A+1, A+2, etc., allocate a single register for +/// them. +void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { if (Fixup.LUIdx == LUIdx) { Fixup.LUIdx = LUThatHas - &Uses.front(); Fixup.Offset += F.AM.BaseOffs; - DEBUG(errs() << "New fixup has offset " + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } if (Fixup.LUIdx == NumUses-1) @@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} + +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that +/// we've done more filtering, as it may be able to find more formulae to +/// eliminate. +void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by re-filtering out " + "undesirable dedicated registers.\n"); + + FilterOutUndesirableDedicatedRegisters(); + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } +} +/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely +/// to be profitable, and then in any use which has any reference to that +/// register, delete all formulae which do not reference that register. +void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // With all other options exhausted, loop until the system is simple // enough to handle. SmallPtrSet<const SCEV *, 4> Taken; @@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { } } +/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extraordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + NarrowSearchSpaceByDetectingSupersets(); + NarrowSearchSpaceByCollapsingUnrolledCode(); + NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + NarrowSearchSpaceByPickingWinnerRegs(); +} + /// SolveRecurse - This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, Cost &SolutionCost, @@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // to formulate the values needed for the uses. GenerateAllReuseFormulae(); - DEBUG(dbgs() << "\n" - "After generating reuse formulae:\n"; - print_uses(dbgs())); - FilterOutUndesirableDedicatedRegisters(); NarrowSearchSpaceUsingHeuristics(); @@ -3724,15 +3792,15 @@ private: } char LoopStrengthReduce::ID = 0; -static RegisterPass<LoopStrengthReduce> -X("loop-reduce", "Loop Strength Reduction"); +INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false); Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(&ID), TLI(tli) {} + : LoopPass(ID), TLI(tli) {} void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 4ad41ae4b59f..d0edfa220051 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -26,7 +27,7 @@ using namespace llvm; static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, +UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); static cl::opt<unsigned> @@ -42,7 +43,7 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(&ID) {} + LoopUnroll() : LoopPass(ID) {} /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much @@ -55,23 +56,24 @@ namespace { /// loop preheaders be inserted into the CFG... /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<LoopInfo>(); AU.addPreservedID(LCSSAID); - AU.addPreserved<LoopInfo>(); + AU.addPreserved<ScalarEvolution>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. // For now, recreate dom info, if loop is unrolled. AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } }; } char LoopUnroll::ID = 0; -static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops"); +INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false); Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } @@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; // FIXME: Reconstruct dom info, because it is not preserved properly. - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - if (DT) { + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) DT->runOnFunction(*F); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); - if (DF) - DF->runOnFunction(*F); - } return true; } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 0c900ffc4027..9afe428ba569 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -77,7 +77,6 @@ namespace { bool redoLoop; Loop *currentLoop; - DominanceFrontier *DF; DominatorTree *DT; BasicBlock *loopHeader; BasicBlock *loopPreheader; @@ -92,15 +91,15 @@ namespace { public: static char ID; // Pass ID, replacement for typeid explicit LoopUnswitch(bool Os = false) : - LoopPass(&ID), OptimizeForSize(Os), redoLoop(false), - currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL), + LoopPass(ID), OptimizeForSize(Os), redoLoop(false), + currentLoop(NULL), DT(NULL), loopHeader(NULL), loopPreheader(NULL) {} bool runOnLoop(Loop *L, LPPassManager &LPM); bool processCurrentLoop(); /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... + /// loop preheaders be inserted into the CFG. /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); @@ -110,7 +109,6 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } private: @@ -160,7 +158,7 @@ namespace { }; } char LoopUnswitch::ID = 0; -static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops"); +INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false); Pass *llvm::createLoopUnswitchPass(bool Os) { return new LoopUnswitch(Os); @@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { LI = &getAnalysis<LoopInfo>(); LPM = &LPM_Ref; - DF = getAnalysisIfAvailable<DominanceFrontier>(); DT = getAnalysisIfAvailable<DominatorTree>(); currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); @@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { // FIXME: Reconstruct dom info, because it is not preserved properly. if (DT) DT->runOnFunction(*F); - if (DF) - DF->runOnFunction(*F); } return Changed; } @@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } -/// isTrivialLoopExitBlock - Check to see if all paths from BB either: -/// 1. Exit the loop with no side effects. -/// 2. Branch to the latch block with no side-effects. +/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the +/// loop with no side effects (including infinite loops). /// -/// If these conditions are true, we return true and set ExitBB to the block we +/// If true, we return true and set ExitBB to the block we /// exit through. /// static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, BasicBlock *&ExitBB, std::set<BasicBlock*> &Visited) { if (!Visited.insert(BB).second) { - // Already visited and Ok, end of recursion. - return true; + // Already visited. Without more analysis, this could indicate an infinte loop. + return false; } else if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the // first exit. @@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, /// process. If so, return the block that is exited to, otherwise return null. static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set<BasicBlock*> Visited; - Visited.insert(L->getHeader()); // Branches to header are ok. + Visited.insert(L->getHeader()); // Branches to header make infinite loops. BasicBlock *ExitBB = 0; if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) return ExitBB; @@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, if (!BI->isConditional() || BI->getCondition() != Cond) return false; - // Check to see if a successor of the branch is guaranteed to go to the - // latch block or exit through a one exit block without having any + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any // side-effects. If so, determine the value of Cond that causes it to do // this. if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp new file mode 100644 index 000000000000..973ffe7e6a40 --- /dev/null +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -0,0 +1,161 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loweratomic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +namespace { + +bool LowerAtomicIntrinsic(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + + Function *Callee = CI->getCalledFunction(); + if (!Callee) + return false; + + unsigned IID = Callee->getIntrinsicID(); + switch (IID) { + case Intrinsic::memory_barrier: + break; + + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: { + Value *Ptr = CI->getArgOperand(0); + Value *Delta = CI->getArgOperand(1); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Res = NULL; + switch (IID) { + default: assert(0 && "Unrecognized atomic modify operation"); + case Intrinsic::atomic_load_add: + Res = Builder.CreateAdd(Orig, Delta); + break; + case Intrinsic::atomic_load_sub: + Res = Builder.CreateSub(Orig, Delta); + break; + case Intrinsic::atomic_load_and: + Res = Builder.CreateAnd(Orig, Delta); + break; + case Intrinsic::atomic_load_nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); + break; + case Intrinsic::atomic_load_or: + Res = Builder.CreateOr(Orig, Delta); + break; + case Intrinsic::atomic_load_xor: + Res = Builder.CreateXor(Orig, Delta); + break; + case Intrinsic::atomic_load_max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Delta, + Orig); + break; + case Intrinsic::atomic_load_min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Orig, + Delta); + break; + case Intrinsic::atomic_load_umax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Delta, + Orig); + break; + case Intrinsic::atomic_load_umin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Orig, + Delta); + break; + } + Builder.CreateStore(Res, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + case Intrinsic::atomic_swap: { + Value *Ptr = CI->getArgOperand(0); + Value *Val = CI->getArgOperand(1); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Builder.CreateStore(Val, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + case Intrinsic::atomic_cmp_swap: { + Value *Ptr = CI->getArgOperand(0); + Value *Cmp = CI->getArgOperand(1); + Value *Val = CI->getArgOperand(2); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + default: + return false; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); + + return true; +} + +struct LowerAtomic : public BasicBlockPass { + static char ID; + LowerAtomic() : BasicBlockPass(ID) {} + bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { + Instruction *Inst = DI++; + if (CallInst *CI = dyn_cast<CallInst>(Inst)) + Changed |= LowerAtomicIntrinsic(CI); + } + return Changed; + } + +}; + +} + +char LowerAtomic::ID = 0; +INITIALIZE_PASS(LowerAtomic, "loweratomic", + "Lower atomic intrinsics to non-atomic form", + false, false); + +Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0e566c5bd9be..24fae423d2f7 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -304,7 +304,7 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - MemCpyOpt() : FunctionPass(&ID) {} + MemCpyOpt() : FunctionPass(ID) {} private: // This transformation requires dominator postdominator info @@ -331,8 +331,7 @@ namespace { // createMemCpyOptPass - The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } -static RegisterPass<MemCpyOpt> X("memcpyopt", - "MemCpy Optimization"); +INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false); @@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // If the call is readnone, ignore it, otherwise bail out. We don't even // allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). - if (AA.getModRefBehavior(CallSite::get(BI)) == + if (AA.getModRefBehavior(CallSite(BI)) == AliasAnalysis::DoesNotAccessMemory) continue; @@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); - CallSite CS = CallSite::get(C); + CallSite CS(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. @@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { return true; } -/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which -/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be -/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). -/// This allows later passes to remove the first memcpy altogether. +/// processMemCpy - perform simplification of memcpy's. If we have memcpy A +/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite +/// B to be a memcpy from X to Z (or potentially a memmove, depending on +/// circumstances). This allows later passes to remove the first memcpy +/// altogether. bool MemCpyOpt::processMemCpy(MemCpyInst *M) { MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); @@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { const Type *ArgTys[3] = { M->getRawDest()->getType(), M->getRawSource()->getType(), M->getLength()->getType() }; - M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); + M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, + ArgTys, 3)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 98452f5d82c4..b8afcc12d927 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -77,7 +77,7 @@ namespace { bool MadeChange; public: static char ID; // Pass identification, replacement for typeid - Reassociate() : FunctionPass(&ID) {} + Reassociate() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -103,7 +103,8 @@ namespace { } char Reassociate::ID = 0; -static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions"); +INITIALIZE_PASS(Reassociate, "reassociate", + "Reassociate expressions", false, false); // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 13222ac22004..506b72ac34e0 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); namespace { struct RegToMem : public FunctionPass { static char ID; // Pass identification, replacement for typeid - RegToMem() : FunctionPass(&ID) {} + RegToMem() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); @@ -59,8 +59,8 @@ namespace { } char RegToMem::ID = 0; -static RegisterPass<RegToMem> -X("reg2mem", "Demote all values to stack slots"); +INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false); bool RegToMem::runOnFunction(Function &F) { @@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) { // createDemoteRegisterToMemory - Provide an entry point to create this pass. // -const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; +char &llvm::DemoteRegisterToMemoryID = RegToMem::ID; FunctionPass *llvm::createDemoteRegisterToMemoryPass() { return new RegToMem(); } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 907ece8fcce9..6115c05c20ac 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -275,12 +275,12 @@ public: return I->second; } - LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { + /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = StructValueState.find(std::make_pair(V, i)); assert(I != StructValueState.end() && "V is not in valuemap!"); return I->second; - } + }*/ /// getTrackedRetVals - Get the inferred return value map. /// @@ -508,17 +508,16 @@ private: void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); void visitCallInst (CallInst &I) { - visitCallSite(CallSite::get(&I)); + visitCallSite(&I); } void visitInvokeInst (InvokeInst &II) { - visitCallSite(CallSite::get(&II)); + visitCallSite(&II); visitTerminatorInst(II); } void visitCallSite (CallSite CS); void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVANextInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } void visitInstruction(Instruction &I) { @@ -1586,7 +1585,7 @@ namespace { /// struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid - SCCP() : FunctionPass(&ID) {} + SCCP() : FunctionPass(ID) {} // runOnFunction - Run the Sparse Conditional Constant Propagation // algorithm, and return true if the function was modified. @@ -1600,8 +1599,8 @@ namespace { } // end anonymous namespace char SCCP::ID = 0; -static RegisterPass<SCCP> -X("sccp", "Sparse Conditional Constant Propagation"); +INITIALIZE_PASS(SCCP, "sccp", + "Sparse Conditional Constant Propagation", false, false); // createSCCPPass - This is the public interface to this file. FunctionPass *llvm::createSCCPPass() { @@ -1702,14 +1701,15 @@ namespace { /// struct IPSCCP : public ModulePass { static char ID; - IPSCCP() : ModulePass(&ID) {} + IPSCCP() : ModulePass(ID) {} bool runOnModule(Module &M); }; } // end anonymous namespace char IPSCCP::ID = 0; -static RegisterPass<IPSCCP> -Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation"); +INITIALIZE_PASS(IPSCCP, "ipsccp", + "Interprocedural Sparse Conditional Constant Propagation", + false, false); // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { @@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) { bool IPSCCP::runOnModule(Module &M) { SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + // AddressTakenFunctions - This set keeps track of the address-taken functions + // that are in the input. As IPSCCP runs through and simplifies code, + // functions that were address taken can end up losing their + // address-taken-ness. Because of this, we keep track of their addresses from + // the first pass so we can use them for the later simplification pass. + SmallPtrSet<Function*, 32> AddressTakenFunctions; + // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. // @@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) { // If this function only has direct calls that we can see, we can track its // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. - if (F->hasLocalLinkage() && !AddressIsTaken(F)) { - Solver.AddArgumentTrackedFunction(F); - continue; + if (F->hasLocalLinkage()) { + if (AddressIsTaken(F)) + AddressTakenFunctions.insert(F); + else { + Solver.AddArgumentTrackedFunction(F); + continue; + } } // Assume the function is called. @@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) { continue; // We can only do this if we know that nothing else can call the function. - if (!F->hasLocalLinkage() || AddressIsTaken(F)) + if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F)) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index dd445f63320a..fee317dbd9ab 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -28,6 +28,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" @@ -51,7 +52,7 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid - explicit SROA(signed T = -1) : FunctionPass(&ID) { + explicit SROA(signed T = -1) : FunctionPass(ID) { if (T == -1) SRThreshold = 128; else @@ -114,8 +115,7 @@ namespace { void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); void DeleteDeadInstructions(); - AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, @@ -135,7 +135,8 @@ namespace { } char SROA::ID = 0; -static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); +INITIALIZE_PASS(SROA, "scalarrepl", + "Scalar Replacement of Aggregates", false, false); // Public interface to the ScalarReplAggregates pass FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { @@ -193,6 +194,27 @@ private: }; } // end anonymous namespace. + +/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't +/// allowed to form. We do this to avoid MMX types, which is a complete hack, +/// but is required until the backend is fixed. +static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) { + StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple()); + if (!Triple.startswith("i386") && + !Triple.startswith("x86_64")) + return false; + + // Reject all the MMX vector types. + switch (VTy->getNumElements()) { + default: return false; + case 1: return VTy->getElementType()->isIntegerTy(64); + case 2: return VTy->getElementType()->isIntegerTy(32); + case 4: return VTy->getElementType()->isIntegerTy(16); + case 8: return VTy->getElementType()->isIntegerTy(8); + } +} + + /// TryConvert - Analyze the specified alloca, and if it is safe to do so, /// rewrite it to be a new alloca which is mem2reg'able. This returns the new /// alloca if possible or null if not. @@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + if (VectorTy && VectorTy->isVectorTy() && HadAVector && + !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) { DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. @@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == CallInst::ArgOffset, Info); + UI.getOperandNo() == 0, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) + return HasPadding(ATy->getElementType(), TD); + + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + return HasPadding(VTy->getElementType(), TD); + if (const StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned PrevFieldBitOffset = 0; @@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { if (PrevFieldEnd < SL->getSizeInBits()) return true; } - - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - return HasPadding(ATy->getElementType(), TD); - } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { - return HasPadding(VTy->getElementType(), TD); } + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } @@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != CallInst::ArgOffset) return false; + if (UI.getOperandNo() != 0) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 49d93a2fcc27..360749caf111 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGSimplifyPass() : FunctionPass(&ID) {} + CFGSimplifyPass() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); }; } char CFGSimplifyPass::ID = 0; -static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG"); +INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg", + "Simplify the CFG", false, false); // Public interface to the CFGSimplification pass FunctionPass *llvm::createCFGSimplificationPass() { @@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks (except the first one) and remove them - // if they are unneeded... + // Loop over all of the basic blocks and remove them if they are unneeded... // - for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) { + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { if (SimplifyCFG(BBIt++, TD)) { LocalChange = true; ++NumSimpl; diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index c3408e77807f..3ec70ec2e024 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -32,7 +32,7 @@ namespace { const TargetData *TD; public: static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {} + SimplifyHalfPowrLibCalls() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -46,8 +46,8 @@ namespace { char SimplifyHalfPowrLibCalls::ID = 0; } // end anonymous namespace. -static RegisterPass<SimplifyHalfPowrLibCalls> -X("simplify-libcalls-halfpowr", "Simplify half_powr library calls"); +INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", + "Simplify half_powr library calls", false, false); // Public interface to the Simplify HalfPowr LibCalls pass. FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index b1c619125c35..d7ce53f36715 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization { StrLen, B, TD); for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { - ICmpInst *Old = cast<ICmpInst>(UI++); + ICmpInst *Old = cast<ICmpInst>(*UI++); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); @@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), - CI->getType()); + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), + ToFindStr[0], B, TD), CI->getType()); return 0; } }; @@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), - false); + Value *Val = B.CreateIntCast(CI->getArgOperand(1), + Type::getInt8Ty(*Context), false); EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); return CI->getArgOperand(0); } @@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()+1), 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the + ConstantInt::get(TD->getIntPtrType(*Context), // nul byte. + FormatStr.size() + 1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization { Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), + IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1236,7 +1237,7 @@ namespace { bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {} + SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {} void InitOptimizations(); bool runOnFunction(Function &F); @@ -1253,8 +1254,8 @@ namespace { char SimplifyLibCalls::ID = 0; } // end anonymous namespace. -static RegisterPass<SimplifyLibCalls> -X("simplify-libcalls", "Simplify well-known library calls"); +INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false); // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { @@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * pow(pow(x,y),z)-> pow(x,y*z) // // puts: -// * puts("") -> putchar("\n") +// * puts("") -> putchar('\n') // // round, roundf, roundl: // * round(cnst) -> cnst' diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index b88ba4850509..95d3dedfb62d 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -35,7 +35,7 @@ namespace { public: static char ID; // Pass identification - Sinking() : FunctionPass(&ID) {} + Sinking() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -56,8 +56,7 @@ namespace { } // end anonymous namespace char Sinking::ID = 0; -static RegisterPass<Sinking> -X("sink", "Code sinking"); +INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false); FunctionPass *llvm::createSinkingPass() { return new Sinking(); } diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 9208238f4ba5..2e437ac778c8 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -49,7 +49,7 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - TailDup() : FunctionPass(&ID) {} + TailDup() : FunctionPass(ID) {} private: inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); @@ -59,7 +59,7 @@ namespace { } char TailDup::ID = 0; -static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication"); +INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false); // Public interface to the Tail Duplication pass FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 01c8e5d6fcf4..371725467a24 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid - TailCallElim() : FunctionPass(&ID) {} + TailCallElim() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -87,7 +87,8 @@ namespace { } char TailCallElim::ID = 0; -static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination"); +INITIALIZE_PASS(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false); // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { @@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; - for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) - if (RI != IgnoreRI) { - Value *RetOp = RI->getOperand(0); - - // We can only perform this transformation if the value returned is - // evaluatable at the start of the initial invocation of the function, - // instead of at the end of the evaluation. - // - if (!isDynamicConstant(RetOp, CI, RI)) - return 0; - - if (ReturnedValue && RetOp != ReturnedValue) - return 0; // Cannot transform if differing values are returned. - ReturnedValue = RetOp; - } + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { + ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); + if (RI == 0 || RI == IgnoreRI) continue; + + // We can only perform this transformation if the value returned is + // evaluatable at the start of the initial invocation of the function, + // instead of at the end of the evaluation. + // + Value *RetOp = RI->getOperand(0); + if (!isDynamicConstant(RetOp, CI, RI)) + return 0; + + if (ReturnedValue && RetOp != ReturnedValue) + return 0; // Cannot transform if differing values are returned. + ReturnedValue = RetOp; + } return ReturnedValue; } @@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, assert(I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"); - // Exactly one operand should be the result of the call instruction... + // Exactly one operand should be the result of the call instruction. if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || (I->getOperand(0) != CI && I->getOperand(1) != CI)) return 0; @@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. - for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) - if (!CanMoveAboveCall(BBI, CI)) { - // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed - // using accumulator recursion elimination. Check to see if this is the - // case, and if so, remember the initial accumulator value for later. - if ((AccumulatorRecursionEliminationInitVal = - CanTransformAccumulatorRecursion(BBI, CI))) { - // Yes, this is accumulator recursion. Remember which instruction - // accumulates. - AccumulatorRecursionInstr = BBI; - } else { - return false; // Otherwise, we cannot eliminate the tail recursion! - } + for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) { + if (CanMoveAboveCall(BBI, CI)) continue; + + // If we can't move the instruction above the call, it might be because it + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. + if ((AccumulatorRecursionEliminationInitVal = + CanTransformAccumulatorRecursion(BBI, CI))) { + // Yes, this is accumulator recursion. Remember which instruction + // accumulates. + AccumulatorRecursionInstr = BBI; + } else { + return false; // Otherwise, we cannot eliminate the tail recursion! } + } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a |