32 files changed, 1310 insertions, 1749 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
deleted file mode 100644
index dcf14a6860da..000000000000
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ /dev/null
@@ -1,1112 +0,0 @@
-//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass removes redundant branch instructions. This algorithm was
-// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
-// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
-// Algorithm was created to remove array bound checks for strongly typed
-// languages. This implementation expands the idea and removes any conditional
-// branches that can be proved redundant, not only those used in array bound
-// checks. With the SSI representation, each variable has a
-// constraint. By analyzing these constraints we can prove that a branch is
-// redundant. When a branch is proved redundant it means that
-// one direction will always be taken; thus, we can change this branch into an
-// unconditional jump.
-// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
-// after ABCD to clean up the code.
-// This implementation was created based on the implementation of the ABCD
-// algorithm implemented for the compiler Jitrino.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "abcd"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-
-using namespace llvm;
-
-STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
-STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
-
-namespace {
-
-class ABCD : public FunctionPass {
- public:
-  static char ID;  // Pass identification, replacement for typeid.
-  ABCD() : FunctionPass(&ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<SSI>();
-  }
-
-  bool runOnFunction(Function &F);
-
- private:
-  /// Keep track of whether we've modified the program yet.
-  bool modified;
-
-  enum ProveResult {
-    False = 0,
-    Reduced = 1,
-    True = 2
-  };
-
-  typedef ProveResult (*meet_function)(ProveResult, ProveResult);
-  static ProveResult max(ProveResult res1, ProveResult res2) {
-    return (ProveResult) std::max(res1, res2);
-  }
-  static ProveResult min(ProveResult res1, ProveResult res2) {
-    return (ProveResult) std::min(res1, res2);
-  }
-
-  class Bound {
-   public:
-    Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
-    Bound(const Bound &b, int cnst)
-      : value(b.value - cnst), upper_bound(b.upper_bound) {}
-    Bound(const Bound &b, const APInt &cnst)
-      : value(b.value - cnst), upper_bound(b.upper_bound) {}
-
-    /// Test if Bound is an upper bound
-    bool isUpperBound() const { return upper_bound; }
-
-    /// Get the bitwidth of this bound
-    int32_t getBitWidth() const { return value.getBitWidth(); }
-
-    /// Creates a Bound incrementing the one received
-    static Bound createIncrement(const Bound &b) {
-      return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
-                   b.upper_bound);
-    }
-
-    /// Creates a Bound decrementing the one received
-    static Bound createDecrement(const Bound &b) {
-      return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
-                   b.upper_bound);
-    }
-
-    /// Test if two bounds are equal
-    static bool eq(const Bound *a, const Bound *b) {
-      if (!a || !b) return false;
-
-      assert(a->isUpperBound() == b->isUpperBound());
-      return a->value == b->value;
-    }
-
-    /// Test if val is less than or equal to Bound b
-    static bool leq(APInt val, const Bound &b) {
-      return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
-    }
-
-    /// Test if Bound a is less then or equal to Bound
-    static bool leq(const Bound &a, const Bound &b) {
-      assert(a.isUpperBound() == b.isUpperBound());
-      return a.isUpperBound() ? a.value.sle(b.value) :
-                                a.value.sge(b.value);
-    }
-
-    /// Test if Bound a is less then Bound b
-    static bool lt(const Bound &a, const Bound &b) {
-      assert(a.isUpperBound() == b.isUpperBound());
-      return a.isUpperBound() ? a.value.slt(b.value) :
-                                a.value.sgt(b.value);
-    }
-
-    /// Test if Bound b is greater then or equal val
-    static bool geq(const Bound &b, APInt val) {
-      return leq(val, b);
-    }
-
-    /// Test if Bound a is greater then or equal Bound b
-    static bool geq(const Bound &a, const Bound &b) {
-      return leq(b, a);
-    }
-
-   private:
-    APInt value;
-    bool upper_bound;
-  };
-
-  /// This class is used to store results some parts of the graph,
-  /// so information does not need to be recalculated. The maximum false,
-  /// minimum true and minimum reduced results are stored
-  class MemoizedResultChart {
-   public:
-     MemoizedResultChart() {}
-     MemoizedResultChart(const MemoizedResultChart &other) {
-       if (other.max_false)
-         max_false.reset(new Bound(*other.max_false));
-       if (other.min_true)
-         min_true.reset(new Bound(*other.min_true));
-       if (other.min_reduced)
-         min_reduced.reset(new Bound(*other.min_reduced));
-     }
-
-    /// Returns the max false
-    const Bound *getFalse() const { return max_false.get(); }
-
-    /// Returns the min true
-    const Bound *getTrue() const { return min_true.get(); }
-
-    /// Returns the min reduced
-    const Bound *getReduced() const { return min_reduced.get(); }
-
-    /// Return the stored result for this bound
-    ProveResult getResult(const Bound &bound) const;
-
-    /// Stores a false found
-    void addFalse(const Bound &bound);
-
-    /// Stores a true found
-    void addTrue(const Bound &bound);
-
-    /// Stores a Reduced found
-    void addReduced(const Bound &bound);
-
-    /// Clears redundant reduced
-    /// If a min_true is smaller than a min_reduced then the min_reduced
-    /// is unnecessary and then removed. It also works for min_reduced
-    /// begin smaller than max_false.
-    void clearRedundantReduced();
-
-    void clear() {
-      max_false.reset();
-      min_true.reset();
-      min_reduced.reset();
-    }
-
-  private:
-    OwningPtr<Bound> max_false, min_true, min_reduced;
-  };
-
-  /// This class stores the result found for a node of the graph,
-  /// so these results do not need to be recalculated, only searched for.
-  class MemoizedResult {
-  public:
-    /// Test if there is true result stored from b to a
-    /// that is less then the bound
-    bool hasTrue(Value *b, const Bound &bound) const {
-      const Bound *trueBound = map.lookup(b).getTrue();
-      return trueBound && Bound::leq(*trueBound, bound);
-    }
-
-    /// Test if there is false result stored from b to a
-    /// that is less then the bound
-    bool hasFalse(Value *b, const Bound &bound) const {
-      const Bound *falseBound = map.lookup(b).getFalse();
-      return falseBound && Bound::leq(*falseBound, bound);
-    }
-
-    /// Test if there is reduced result stored from b to a
-    /// that is less then the bound
-    bool hasReduced(Value *b, const Bound &bound) const {
-      const Bound *reducedBound = map.lookup(b).getReduced();
-      return reducedBound && Bound::leq(*reducedBound, bound);
-    }
-
-    /// Returns the stored bound for b
-    ProveResult getBoundResult(Value *b, const Bound &bound) {
-      return map[b].getResult(bound);
-    }
-
-    /// Clears the map
-    void clear() {
-      DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
-      DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
-      for (; begin != end; ++begin) {
-        begin->second.clear();
-      }
-      map.clear();
-    }
-
-    /// Stores the bound found
-    void updateBound(Value *b, const Bound &bound, const ProveResult res);
-
-  private:
-    // Maps a nod in the graph with its results found.
-    DenseMap<Value*, MemoizedResultChart> map;
-  };
-
-  /// This class represents an edge in the inequality graph used by the
-  /// ABCD algorithm. An edge connects node v to node u with a value c if
-  /// we could infer a constraint v <= u + c in the source program.
-  class Edge {
-  public:
-    Edge(Value *V, APInt val, bool upper)
-      : vertex(V), value(val), upper_bound(upper) {}
-
-    Value *getVertex() const { return vertex; }
-    const APInt &getValue() const { return value; }
-    bool isUpperBound() const { return upper_bound; }
-
-  private:
-    Value *vertex;
-    APInt value;
-    bool upper_bound;
-  };
-
-  /// Weighted and Directed graph to represent constraints.
-  /// There is one type of constraint, a <= b + X, which will generate an
-  /// edge from b to a with weight X.
-  class InequalityGraph {
-  public:
-
-    /// Adds an edge from V_from to V_to with weight value
-    void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
-
-    /// Test if there is a node V
-    bool hasNode(Value *V) const { return graph.count(V); }
-
-    /// Test if there is any edge from V in the upper direction
-    bool hasEdge(Value *V, bool upper) const;
-
-    /// Returns all edges pointed by vertex V
-    SmallVector<Edge, 16> getEdges(Value *V) const {
-      return graph.lookup(V);
-    }
-
-    /// Prints the graph in dot format.
-    /// Blue edges represent upper bound and Red lower bound.
-    void printGraph(raw_ostream &OS, Function &F) const {
-      printHeader(OS, F);
-      printBody(OS);
-      printFooter(OS);
-    }
-
-    /// Clear the graph
-    void clear() {
-      graph.clear();
-    }
-
-  private:
-    DenseMap<Value *, SmallVector<Edge, 16> > graph;
-
-    /// Prints the header of the dot file
-    void printHeader(raw_ostream &OS, Function &F) const;
-
-    /// Prints the footer of the dot file
-    void printFooter(raw_ostream &OS) const {
-      OS << "}\n";
-    }
-
-    /// Prints the body of the dot file
-    void printBody(raw_ostream &OS) const;
-
-    /// Prints vertex source to the dot file
-    void printVertex(raw_ostream &OS, Value *source) const;
-
-    /// Prints the edge to the dot file
-    void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
-
-    void printName(raw_ostream &OS, Value *info) const;
-  };
-
-  /// Iterates through all BasicBlocks, if the Terminator Instruction
-  /// uses an Comparator Instruction, all operands of this comparator
-  /// are sent to be transformed to SSI. Only Instruction operands are
-  /// transformed.
-  void createSSI(Function &F);
-
-  /// Creates the graphs for this function.
-  /// It will look for all comparators used in branches, and create them.
-  /// These comparators will create constraints for any instruction as an
-  /// operand.
-  void executeABCD(Function &F);
-
-  /// Seeks redundancies in the comparator instruction CI.
-  /// If the ABCD algorithm can prove that the comparator CI always
-  /// takes one way, then the Terminator Instruction TI is substituted from
-  /// a conditional branch to a unconditional one.
-  /// This code basically receives a comparator, and verifies which kind of
-  /// instruction it is. Depending on the kind of instruction, we use different
-  /// strategies to prove its redundancy.
-  void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
-
-  /// Substitutes Terminator Instruction TI, that is a conditional branch,
-  /// with one unconditional branch. Succ_edge determines if the new
-  /// unconditional edge will be the first or second edge of the former TI
-  /// instruction.
-  void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
-
-  /// When an conditional branch is removed, the BasicBlock that is no longer
-  /// reachable will have problems in phi functions. This method fixes these
-  /// phis removing the former BasicBlock from the list of incoming BasicBlocks
-  /// of all phis. In case the phi remains with no predecessor it will be
-  /// marked to be removed later.
-  void fixPhi(BasicBlock *BB, BasicBlock *Succ);
-
-  /// Removes phis that have no predecessor
-  void removePhis();
-
-  /// Creates constraints for Instructions.
-  /// If the constraint for this instruction has already been created
-  /// nothing is done.
-  void createConstraintInstruction(Instruction *I);
-
-  /// Creates constraints for Binary Operators.
-  /// It will create constraints only for addition and subtraction,
-  /// the other binary operations are not treated by ABCD.
-  /// For additions in the form a = b + X and a = X + b, where X is a constant,
-  /// the constraint a <= b + X can be obtained. For this constraint, an edge
-  /// a->b with weight X is added to the lower bound graph, and an edge
-  /// b->a with weight -X is added to the upper bound graph.
-  /// Only subtractions in the format a = b - X is used by ABCD.
-  /// Edges are created using the same semantic as addition.
-  void createConstraintBinaryOperator(BinaryOperator *BO);
-
-  /// Creates constraints for Comparator Instructions.
-  /// Only comparators that have any of the following operators
-  /// are used to create constraints: >=, >, <=, <. And only if
-  /// at least one operand is an Instruction. In a Comparator Instruction
-  /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-  /// t and f represent sigma for operands in true and false branches. The
-  /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-  /// b_f <= b. There are two more constraints that depend on the operator.
-  /// For the operator <= : a_t <= b_t   and b_f <= a_f-1
-  /// For the operator <  : a_t <= b_t-1 and b_f <= a_f
-  /// For the operator >= : b_t <= a_t   and a_f <= b_f-1
-  /// For the operator >  : b_t <= a_t-1 and a_f <= b_f
-  void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
-
-  /// Creates constraints for PHI nodes.
-  /// In a PHI node a = phi(b,c) we can create the constraint
-  /// a<= max(b,c). With this constraint there will be the edges,
-  /// b->a and c->a with weight 0 in the lower bound graph, and the edges
-  /// a->b and a->c with weight 0 in the upper bound graph.
-  void createConstraintPHINode(PHINode *PN);
-
-  /// Given a binary operator, we are only interest in the case
-  /// that one operand is an Instruction and the other is a ConstantInt. In
-  /// this case the method returns true, otherwise false. It also obtains the
-  /// Instruction and ConstantInt from the BinaryOperator and returns it.
-  bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
-                                Instruction **I2, ConstantInt **C1,
-                                ConstantInt **C2);
-
-  /// This method creates a constraint between a Sigma and an Instruction.
-  /// These constraints are created as soon as we find a comparator that uses a
-  /// SSI variable.
-  void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
-                               BasicBlock *BB_succ_f, PHINode **SIG_op_t,
-                               PHINode **SIG_op_f);
-
-  /// If PN_op1 and PN_o2 are different from NULL, create a constraint
-  /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-  /// with the respective V_op#, if V_op# is a ConstantInt.
-  void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, 
-                              ConstantInt *V_op1, ConstantInt *V_op2,
-                              APInt value);
-
-  /// Returns the sigma representing the Instruction I in BasicBlock BB.
-  /// Returns NULL in case there is no sigma for this Instruction in this
-  /// Basic Block. This methods assume that sigmas are the first instructions
-  /// in a block, and that there can be only two sigmas in a block. So it will
-  /// only look on the first two instructions of BasicBlock BB.
-  PHINode *findSigma(BasicBlock *BB, Instruction *I);
-
-  /// Original ABCD algorithm to prove redundant checks.
-  /// This implementation works on any kind of inequality branch.
-  bool demandProve(Value *a, Value *b, int c, bool upper_bound);
-
-  /// Prove that distance between b and a is <= bound
-  ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
-
-  /// Updates the distance value for a and b
-  void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
-                         meet_function meet);
-
-  InequalityGraph inequality_graph;
-  MemoizedResult mem_result;
-  DenseMap<Value*, const Bound*> active;
-  SmallPtrSet<Value*, 16> created;
-  SmallVector<PHINode *, 16> phis_to_remove;
-};
-
-}  // end anonymous namespace.
-
-char ABCD::ID = 0;
-static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand");
-
-
-bool ABCD::runOnFunction(Function &F) {
-  modified = false;
-  createSSI(F);
-  executeABCD(F);
-  DEBUG(inequality_graph.printGraph(dbgs(), F));
-  removePhis();
-
-  inequality_graph.clear();
-  mem_result.clear();
-  active.clear();
-  created.clear();
-  phis_to_remove.clear();
-  return modified;
-}
-
-/// Iterates through all BasicBlocks, if the Terminator Instruction
-/// uses an Comparator Instruction, all operands of this comparator
-/// are sent to be transformed to SSI. Only Instruction operands are
-/// transformed.
-void ABCD::createSSI(Function &F) {
-  SSI *ssi = &getAnalysis<SSI>();
-
-  SmallVector<Instruction *, 16> Insts;
-
-  for (Function::iterator begin = F.begin(), end = F.end();
-       begin != end; ++begin) {
-    BasicBlock *BB = begin;
-    TerminatorInst *TI = BB->getTerminator();
-    if (TI->getNumOperands() == 0)
-      continue;
-
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
-      if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
-        modified = true;  // XXX: but yet createSSI might do nothing
-        Insts.push_back(I);
-      }
-      if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
-        modified = true;
-        Insts.push_back(I);
-      }
-    }
-  }
-  ssi->createSSI(Insts);
-}
-
-/// Creates the graphs for this function.
-/// It will look for all comparators used in branches, and create them.
-/// These comparators will create constraints for any instruction as an
-/// operand.
-void ABCD::executeABCD(Function &F) {
-  for (Function::iterator begin = F.begin(), end = F.end();
-       begin != end; ++begin) {
-    BasicBlock *BB = begin;
-    TerminatorInst *TI = BB->getTerminator();
-    if (TI->getNumOperands() == 0)
-      continue;
-
-    ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
-    if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
-      continue;
-
-    createConstraintCmpInst(ICI, TI);
-    seekRedundancy(ICI, TI);
-  }
-}
-
-/// Seeks redundancies in the comparator instruction CI.
-/// If the ABCD algorithm can prove that the comparator CI always
-/// takes one way, then the Terminator Instruction TI is substituted from
-/// a conditional branch to a unconditional one.
-/// This code basically receives a comparator, and verifies which kind of
-/// instruction it is. Depending on the kind of instruction, we use different
-/// strategies to prove its redundancy.
-void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
-  CmpInst::Predicate Pred = ICI->getPredicate();
-
-  Value *source, *dest;
-  int distance1, distance2;
-  bool upper;
-
-  switch(Pred) {
-    case CmpInst::ICMP_SGT: // signed greater than
-      upper = false;
-      distance1 = 1;
-      distance2 = 0;
-      break;
-
-    case CmpInst::ICMP_SGE: // signed greater or equal
-      upper = false;
-      distance1 = 0;
-      distance2 = -1;
-      break;
-
-    case CmpInst::ICMP_SLT: // signed less than
-      upper = true;
-      distance1 = -1;
-      distance2 = 0;
-      break;
-
-    case CmpInst::ICMP_SLE: // signed less or equal
-      upper = true;
-      distance1 = 0;
-      distance2 = 1;
-      break;
-
-    default:
-      return;
-  }
-
-  ++NumBranchTested;
-  source = ICI->getOperand(0);
-  dest = ICI->getOperand(1);
-  if (demandProve(dest, source, distance1, upper)) {
-    removeRedundancy(TI, true);
-  } else if (demandProve(dest, source, distance2, !upper)) {
-    removeRedundancy(TI, false);
-  }
-}
-
-/// Substitutes Terminator Instruction TI, that is a conditional branch,
-/// with one unconditional branch. Succ_edge determines if the new
-/// unconditional edge will be the first or second edge of the former TI
-/// instruction.
-void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
-  BasicBlock *Succ;
-  if (Succ_edge) {
-    Succ = TI->getSuccessor(0);
-    fixPhi(TI->getParent(), TI->getSuccessor(1));
-  } else {
-    Succ = TI->getSuccessor(1);
-    fixPhi(TI->getParent(), TI->getSuccessor(0));
-  }
-
-  BranchInst::Create(Succ, TI);
-  TI->eraseFromParent();  // XXX: invoke
-  ++NumBranchRemoved;
-  modified = true;
-}
-
-/// When an conditional branch is removed, the BasicBlock that is no longer
-/// reachable will have problems in phi functions. This method fixes these
-/// phis removing the former BasicBlock from the list of incoming BasicBlocks
-/// of all phis. In case the phi remains with no predecessor it will be
-/// marked to be removed later.
-void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
-  BasicBlock::iterator begin = Succ->begin();
-  while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
-    PN->removeIncomingValue(BB, false);
-    if (PN->getNumIncomingValues() == 0)
-      phis_to_remove.push_back(PN);
-  }
-}
-
-/// Removes phis that have no predecessor
-void ABCD::removePhis() {
-  for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
-    PHINode *PN = phis_to_remove[i];
-    PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-    PN->eraseFromParent();
-  }
-}
-
-/// Creates constraints for Instructions.
-/// If the constraint for this instruction has already been created
-/// nothing is done.
-void ABCD::createConstraintInstruction(Instruction *I) {
-  // Test if this instruction has not been created before
-  if (created.insert(I)) {
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-      createConstraintBinaryOperator(BO);
-    } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
-      createConstraintPHINode(PN);
-    }
-  }
-}
-
-/// Creates constraints for Binary Operators.
-/// It will create constraints only for addition and subtraction,
-/// the other binary operations are not treated by ABCD.
-/// For additions in the form a = b + X and a = X + b, where X is a constant,
-/// the constraint a <= b + X can be obtained. For this constraint, an edge
-/// a->b with weight X is added to the lower bound graph, and an edge
-/// b->a with weight -X is added to the upper bound graph.
-/// Only subtractions in the format a = b - X is used by ABCD.
-/// Edges are created using the same semantic as addition.
-void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
-  Instruction *I1 = NULL, *I2 = NULL;
-  ConstantInt *CI1 = NULL, *CI2 = NULL;
-
-  // Test if an operand is an Instruction and the other is a Constant
-  if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
-    return;
-
-  Instruction *I = 0;
-  APInt value;
-
-  switch (BO->getOpcode()) {
-    case Instruction::Add:
-      if (I1) {
-        I = I1;
-        value = CI2->getValue();
-      } else if (I2) {
-        I = I2;
-        value = CI1->getValue();
-      }
-      break;
-
-    case Instruction::Sub:
-      // Instructions like a = X-b, where X is a constant are not represented
-      // in the graph.
-      if (!I1)
-        return;
-
-      I = I1;
-      value = -CI2->getValue();
-      break;
-
-    default:
-      return;
-  }
-
-  inequality_graph.addEdge(I, BO, value, true);
-  inequality_graph.addEdge(BO, I, -value, false);
-  createConstraintInstruction(I);
-}
-
-/// Given a binary operator, we are only interest in the case
-/// that one operand is an Instruction and the other is a ConstantInt. In
-/// this case the method returns true, otherwise false. It also obtains the
-/// Instruction and ConstantInt from the BinaryOperator and returns it.
-bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
-                                    Instruction **I2, ConstantInt **C1,
-                                    ConstantInt **C2) {
-  Value *op1 = BO->getOperand(0);
-  Value *op2 = BO->getOperand(1);
-
-  if ((*I1 = dyn_cast<Instruction>(op1))) {
-    if ((*C2 = dyn_cast<ConstantInt>(op2)))
-      return true; // First is Instruction and second ConstantInt
-
-    return false; // Both are Instruction
-  } else {
-    if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
-        (*I2 = dyn_cast<Instruction>(op2)))
-      return true; // First is ConstantInt and second Instruction
-
-    return false; // Both are not Instruction
-  }
-}
-
-/// Creates constraints for Comparator Instructions.
-/// Only comparators that have any of the following operators
-/// are used to create constraints: >=, >, <=, <. And only if
-/// at least one operand is an Instruction. In a Comparator Instruction
-/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-/// t and f represent sigma for operands in true and false branches. The
-/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-/// b_f <= b. There are two more constraints that depend on the operator.
-/// For the operator <= : a_t <= b_t   and b_f <= a_f-1
-/// For the operator <  : a_t <= b_t-1 and b_f <= a_f
-/// For the operator >= : b_t <= a_t   and a_f <= b_f-1
-/// For the operator >  : b_t <= a_t-1 and a_f <= b_f
-void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
-  Value *V_op1 = ICI->getOperand(0);
-  Value *V_op2 = ICI->getOperand(1);
-
-  if (!V_op1->getType()->isIntegerTy())
-    return;
-
-  Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
-  Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
-
-  // Test if at least one operand is an Instruction
-  if (!I_op1 && !I_op2)
-    return;
-
-  BasicBlock *BB_succ_t = TI->getSuccessor(0);
-  BasicBlock *BB_succ_f = TI->getSuccessor(1);
-
-  PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
-          *SIG_op2_t = NULL, *SIG_op2_f = NULL;
-
-  createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
-  createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
-
-  int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
-  APInt MinusOne = APInt::getAllOnesValue(width);
-  APInt Zero = APInt::getNullValue(width);
-
-  CmpInst::Predicate Pred = ICI->getPredicate();
-  ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
-  ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
-  switch (Pred) {
-  case CmpInst::ICMP_SGT:  // signed greater than
-    createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
-    createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
-    break;
-
-  case CmpInst::ICMP_SGE:  // signed greater or equal
-    createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
-    createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
-    break;
-
-  case CmpInst::ICMP_SLT:  // signed less than
-    createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
-    createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
-    break;
-
-  case CmpInst::ICMP_SLE:  // signed less or equal
-    createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
-    createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
-    break;
-
-  default:
-    break;
-  }
-
-  if (I_op1)
-    createConstraintInstruction(I_op1);
-  if (I_op2)
-    createConstraintInstruction(I_op2);
-}
-
-/// Creates constraints for PHI nodes.
-/// In a PHI node a = phi(b,c) we can create the constraint
-/// a<= max(b,c). With this constraint there will be the edges,
-/// b->a and c->a with weight 0 in the lower bound graph, and the edges
-/// a->b and a->c with weight 0 in the upper bound graph.
-void ABCD::createConstraintPHINode(PHINode *PN) {
-  // FIXME: We really want to disallow sigma nodes, but I don't know the best
-  // way to detect the other than this.
-  if (PN->getNumOperands() == 2) return;
-  
-  int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *V = PN->getIncomingValue(i);
-    if (Instruction *I = dyn_cast<Instruction>(V)) {
-      createConstraintInstruction(I);
-    }
-    inequality_graph.addEdge(V, PN, APInt(width, 0), true);
-    inequality_graph.addEdge(V, PN, APInt(width, 0), false);
-  }
-}
-
-/// This method creates a constraint between a Sigma and an Instruction.
-/// These constraints are created as soon as we find a comparator that uses a
-/// SSI variable.
-void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
-                                   BasicBlock *BB_succ_f, PHINode **SIG_op_t,
-                                   PHINode **SIG_op_f) {
-  *SIG_op_t = findSigma(BB_succ_t, I_op);
-  *SIG_op_f = findSigma(BB_succ_f, I_op);
-
-  if (*SIG_op_t) {
-    int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
-    inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
-    inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
-  }
-  if (*SIG_op_f) {
-    int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
-    inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
-    inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
-  }
-}
-
-/// If PN_op1 and PN_o2 are different from NULL, create a constraint
-/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-/// with the respective V_op#, if V_op# is a ConstantInt.
-void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
-                                  ConstantInt *V_op1, ConstantInt *V_op2,
-                                  APInt value) {
-  if (SIG_op1 && SIG_op2) {
-    inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
-    inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
-  } else if (SIG_op1 && V_op2) {
-    inequality_graph.addEdge(V_op2, SIG_op1, value, true);
-    inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
-  } else if (SIG_op2 && V_op1) {
-    inequality_graph.addEdge(SIG_op2, V_op1, value, true);
-    inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
-  }
-}
-
-/// Returns the sigma representing the Instruction I in BasicBlock BB.
-/// Returns NULL in case there is no sigma for this Instruction in this
-/// Basic Block. This methods assume that sigmas are the first instructions
-/// in a block, and that there can be only two sigmas in a block. So it will
-/// only look on the first two instructions of BasicBlock BB.
-PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
-  // BB has more than one predecessor, BB cannot have sigmas.
-  if (I == NULL || BB->getSinglePredecessor() == NULL)
-    return NULL;
-
-  BasicBlock::iterator begin = BB->begin();
-  BasicBlock::iterator end = BB->end();
-
-  for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
-    Instruction *I_succ = begin;
-    if (PHINode *PN = dyn_cast<PHINode>(I_succ))
-      if (PN->getIncomingValue(0) == I)
-        return PN;
-  }
-
-  return NULL;
-}
-
-/// Original ABCD algorithm to prove redundant checks.
-/// This implementation works on any kind of inequality branch.
-bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
-  int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
-  Bound bound(APInt(width, c), upper_bound);
-
-  mem_result.clear();
-  active.clear();
-
-  ProveResult res = prove(a, b, bound, 0);
-  return res != False;
-}
-
-/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
-                              unsigned level) {
-  // if (C[b-a<=e] == True for some e <= bound
-  // Same or stronger difference was already proven
-  if (mem_result.hasTrue(b, bound))
-    return True;
-
-  // if (C[b-a<=e] == False for some e >= bound
-  // Same or weaker difference was already disproved
-  if (mem_result.hasFalse(b, bound))
-    return False;
-
-  // if (C[b-a<=e] == Reduced for some e <= bound
-  // b is on a cycle that was reduced for same or stronger difference
-  if (mem_result.hasReduced(b, bound))
-    return Reduced;
-
-  // traversal reached the source vertex
-  if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
-    return True;
-
-  // if b has no predecessor then fail
-  if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
-    return False;
-
-  // a cycle was encountered
-  if (active.count(b)) {
-    if (Bound::leq(*active.lookup(b), bound))
-      return Reduced; // a "harmless" cycle
-
-    return False; // an amplifying cycle
-  }
-
-  active[b] = &bound;
-  PHINode *PN = dyn_cast<PHINode>(b);
-
-  // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
-  // value, then it is a phi, if it has 1 incoming value it is a sigma.
-  if (PN && PN->getNumIncomingValues() > 1)
-    updateMemDistance(a, b, bound, level, min);
-  else
-    updateMemDistance(a, b, bound, level, max);
-
-  active.erase(b);
-
-  ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
-  return res;
-}
-
-/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
-                             unsigned level, meet_function meet) {
-  ABCD::ProveResult res = (meet == max) ? False : True;
-
-  SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
-  SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
-
-  for (; begin != end ; ++begin) {
-    if (((res >= Reduced) && (meet == max)) ||
-       ((res == False) && (meet == min))) {
-      break;
-    }
-    const Edge &in = *begin;
-    if (in.isUpperBound() == bound.isUpperBound()) {
-      Value *succ = in.getVertex();
-      res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
-                            level+1));
-    }
-  }
-
-  mem_result.updateBound(b, bound, res);
-}
-
-/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
-  if (max_false && Bound::leq(bound, *max_false))
-    return False;
-  if (min_true && Bound::leq(*min_true, bound))
-    return True;
-  if (min_reduced && Bound::leq(*min_reduced, bound))
-    return Reduced;
-  return False;
-}
-
-/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
-  if (!max_false || Bound::leq(*max_false, bound))
-    max_false.reset(new Bound(bound));
-
-  if (Bound::eq(max_false.get(), min_reduced.get()))
-    min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
-  if (Bound::eq(max_false.get(), min_true.get()))
-    min_true.reset(new Bound(Bound::createIncrement(*min_true)));
-  if (Bound::eq(min_reduced.get(), min_true.get()))
-    min_reduced.reset();
-  clearRedundantReduced();
-}
-
-/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
-  if (!min_true || Bound::leq(bound, *min_true))
-    min_true.reset(new Bound(bound));
-
-  if (Bound::eq(min_true.get(), min_reduced.get()))
-    min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
-  if (Bound::eq(min_true.get(), max_false.get()))
-    max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-  if (Bound::eq(max_false.get(), min_reduced.get()))
-    min_reduced.reset();
-  clearRedundantReduced();
-}
-
-/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
-  if (!min_reduced || Bound::leq(bound, *min_reduced))
-    min_reduced.reset(new Bound(bound));
-
-  if (Bound::eq(min_reduced.get(), min_true.get()))
-    min_true.reset(new Bound(Bound::createIncrement(*min_true)));
-  if (Bound::eq(min_reduced.get(), max_false.get()))
-    max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-}
-
-/// Clears redundant reduced
-/// If a min_true is smaller than a min_reduced then the min_reduced
-/// is unnecessary and then removed. It also works for min_reduced
-/// begin smaller than max_false.
-void ABCD::MemoizedResultChart::clearRedundantReduced() {
-  if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
-    min_reduced.reset();
-  if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
-    min_reduced.reset();
-}
-
-/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
-                                       const ProveResult res) {
-  if (res == False) {
-    map[b].addFalse(bound);
-  } else if (res == True) {
-    map[b].addTrue(bound);
-  } else {
-    map[b].addReduced(bound);
-  }
-}
-
-/// Adds an edge from V_from to V_to with weight value
-void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
-                                    APInt value, bool upper) {
-  assert(V_from->getType() == V_to->getType());
-  assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
-         value.getBitWidth());
-
-  graph[V_from].push_back(Edge(V_to, value, upper));
-}
-
-/// Test if there is any edge from V in the upper direction
-bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
-  SmallVector<Edge, 16> it = graph.lookup(V);
-
-  SmallVector<Edge, 16>::iterator begin = it.begin();
-  SmallVector<Edge, 16>::iterator end = it.end();
-  for (; begin != end; ++begin) {
-    if (begin->isUpperBound() == upper) {
-      return true;
-    }
-  }
-  return false;
-}
-
-/// Prints the header of the dot file
-void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
-  OS << "digraph dotgraph {\n";
-  OS << "label=\"Inequality Graph for \'";
-  OS << F.getNameStr() << "\' function\";\n";
-  OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
-}
-
-/// Prints the body of the dot file
-void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
-  DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
-      graph.begin(), end = graph.end();
-
-  for (; begin != end ; ++begin) {
-    SmallVector<Edge, 16>::const_iterator begin_par =
-        begin->second.begin(), end_par = begin->second.end();
-    Value *source = begin->first;
-
-    printVertex(OS, source);
-
-    for (; begin_par != end_par ; ++begin_par) {
-      const Edge &edge = *begin_par;
-      printEdge(OS, source, edge);
-    }
-  }
-}
-
-/// Prints vertex source to the dot file
-///
-void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
-  OS << "\"";
-  printName(OS, source);
-  OS << "\"";
-  OS << " [label=\"{";
-  printName(OS, source);
-  OS << "}\"];\n";
-}
-
-/// Prints the edge to the dot file
-void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
-                                      const Edge &edge) const {
-  Value *dest = edge.getVertex();
-  APInt value = edge.getValue();
-  bool upper = edge.isUpperBound();
-
-  OS << "\"";
-  printName(OS, source);
-  OS << "\"";
-  OS << " -> ";
-  OS << "\"";
-  printName(OS, dest);
-  OS << "\"";
-  OS << " [label=\"" << value << "\"";
-  if (upper) {
-    OS << "color=\"blue\"";
-  } else {
-    OS << "color=\"red\"";
-  }
-  OS << "];\n";
-}
-
-void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
-    OS << *CI;
-  } else {
-    if (!info->hasName()) {
-      info->setName("V");
-    }
-    OS << info->getNameStr();
-  }
-}
-
-/// createABCDPass - The public interface to this file...
-FunctionPass *llvm::createABCDPass() {
-  return new ABCD();
-}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 2d19467ce746..ada086e9db76 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed");
 namespace {
   struct ADCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    ADCE() : FunctionPass(&ID) {}
+    ADCE() : FunctionPass(ID) {}
     
     virtual bool runOnFunction(Function& F);
     
@@ -45,7 +45,7 @@ namespace {
 }
 
 char ADCE::ID = 0;
-static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
 
 bool ADCE::runOnFunction(Function& F) {
   SmallPtrSet<Instruction*, 128> alive;
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index 54533f50405f..b144678c6a0e 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
 namespace {
   struct BlockPlacement : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    BlockPlacement() : FunctionPass(&ID) {}
+    BlockPlacement() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F);
 
@@ -74,8 +74,8 @@ namespace {
 }
 
 char BlockPlacement::ID = 0;
-static RegisterPass<BlockPlacement>
-X("block-placement", "Profile Guided Basic Block Placement");
+INITIALIZE_PASS(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false);
 
 FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 1a3b10cc9baa..b7598eace536 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,9 +1,9 @@
 add_llvm_library(LLVMScalarOpts
-  ABCD.cpp
   ADCE.cpp
   BasicBlockPlacement.cpp
   CodeGenPrepare.cpp
   ConstantProp.cpp
+  CorrelatedValuePropagation.cpp
   DCE.cpp
   DeadStoreElimination.cpp
   GEPSplitter.cpp
@@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts
   LoopStrengthReduce.cpp
   LoopUnrollPass.cpp
   LoopUnswitch.cpp
+  LowerAtomic.cpp
   MemCpyOptimizer.cpp
   Reassociate.cpp
   Reg2Mem.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 272066c8c0c4..e07b761e589c 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
@@ -41,6 +42,11 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+                  cl::desc("Split critical edges during codegen prepare"),
+                  cl::init(true), cl::Hidden);
+
 namespace {
   class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -54,7 +60,7 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetLowering *tli = 0)
-      : FunctionPass(&ID), TLI(tli) {}
+      : FunctionPass(ID), TLI(tli) {}
     bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -82,8 +88,8 @@ namespace {
 }
 
 char CodeGenPrepare::ID = 0;
-static RegisterPass<CodeGenPrepare> X("codegenprepare",
-                                      "Optimize for code generation");
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+                "Optimize for code generation", false, false);
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
   return new CodeGenPrepare(TLI);
@@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   // If these values will be promoted, find out what they will be promoted
   // to.  This helps us consider truncates on PPC as noop copies when they
   // are.
-  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
-  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
 
   // If, after promotion, these are the same types, this is a noop copy.
@@ -548,9 +554,9 @@ protected:
     CI->eraseFromParent();
   }
   bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
-      if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
-                                                        - CallInst::ArgOffset)))
-      return SizeCI->isAllOnesValue();
+      if (ConstantInt *SizeCI =
+                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+        return SizeCI->isAllOnesValue();
     return false;
   }
 };
@@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
   bool MadeChange = false;
 
   // Split all critical edges where the dest block has a PHI.
-  TerminatorInst *BBTI = BB.getTerminator();
-  if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
-    for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
-      BasicBlock *SuccBB = BBTI->getSuccessor(i);
-      if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
-        SplitEdgeNicely(BBTI, i, BackEdges, this);
+  if (CriticalEdgeSplit) {
+    TerminatorInst *BBTI = BB.getTerminator();
+    if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+      for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *SuccBB = BBTI->getSuccessor(i);
+        if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+          SplitEdgeNicely(BBTI, i, BackEdges, this);
+      }
     }
   }
 
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index ea208135739d..a0ea369d0cad 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
 namespace {
   struct ConstantPropagation : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    ConstantPropagation() : FunctionPass(&ID) {}
+    ConstantPropagation() : FunctionPass(ID) {}
 
     bool runOnFunction(Function &F);
 
@@ -45,8 +45,8 @@ namespace {
 }
 
 char ConstantPropagation::ID = 0;
-static RegisterPass<ConstantPropagation>
-X("constprop", "Simple constant propagation");
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+                "Simple constant propagation", false, false);
 
 FunctionPass *llvm::createConstantPropagationPass() {
   return new ConstantPropagation();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 000000000000..0d4e45de3466
--- /dev/null
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,200 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis,      "Number of phis propagated");
+STATISTIC(NumSelects,   "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps,      "Number of comparisons propagated");
+
+namespace {
+  class CorrelatedValuePropagation : public FunctionPass {
+    LazyValueInfo *LVI;
+    
+    bool processSelect(SelectInst *SI);
+    bool processPHI(PHINode *P);
+    bool processMemAccess(Instruction *I);
+    bool processCmp(CmpInst *C);
+    
+  public:
+    static char ID;
+    CorrelatedValuePropagation(): FunctionPass(ID) { }
+    
+    bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LazyValueInfo>();
+    }
+  };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false);
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+  return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+  if (S->getType()->isVectorTy()) return false;
+  if (isa<Constant>(S->getOperand(0))) return false;
+  
+  Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+  if (!C) return false;
+  
+  ConstantInt *CI = dyn_cast<ConstantInt>(C);
+  if (!CI) return false;
+  
+  S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+  S->eraseFromParent();
+
+  ++NumSelects;
+  
+  return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+  bool Changed = false;
+  
+  BasicBlock *BB = P->getParent();
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    Value *Incoming = P->getIncomingValue(i);
+    if (isa<Constant>(Incoming)) continue;
+    
+    Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+                                         P->getIncomingBlock(i),
+                                         BB);
+    if (!C) continue;
+    
+    P->setIncomingValue(i, C);
+    Changed = true;
+  }
+  
+  if (Value *ConstVal = P->hasConstantValue()) {
+    P->replaceAllUsesWith(ConstVal);
+    P->eraseFromParent();
+    Changed = true;
+  }
+  
+  ++NumPhis;
+  
+  return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+  Value *Pointer = 0;
+  if (LoadInst *L = dyn_cast<LoadInst>(I))
+    Pointer = L->getPointerOperand();
+  else
+    Pointer = cast<StoreInst>(I)->getPointerOperand();
+  
+  if (isa<Constant>(Pointer)) return false;
+  
+  Constant *C = LVI->getConstant(Pointer, I->getParent());
+  if (!C) return false;
+  
+  ++NumMemAccess;
+  I->replaceUsesOfWith(Pointer, C);
+  return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out.  Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge.  If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+  Value *Op0 = C->getOperand(0);
+  if (isa<Instruction>(Op0) &&
+      cast<Instruction>(Op0)->getParent() == C->getParent())
+    return false;
+  
+  Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+  if (!Op1) return false;
+  
+  pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+  if (PI == PE) return false;
+  
+  LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), 
+                                    C->getOperand(0), Op1, *PI, C->getParent());
+  if (Result == LazyValueInfo::Unknown) return false;
+
+  ++PI;
+  while (PI != PE) {
+    LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), 
+                                    C->getOperand(0), Op1, *PI, C->getParent());
+    if (Res != Result) return false;
+    ++PI;
+  }
+  
+  ++NumCmps;
+  
+  if (Result == LazyValueInfo::True)
+    C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+  else
+    C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+  
+  C->eraseFromParent();
+
+  return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+  LVI = &getAnalysis<LazyValueInfo>();
+  
+  bool FnChanged = false;
+  
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    bool BBChanged = false;
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+      Instruction *II = BI++;
+      switch (II->getOpcode()) {
+      case Instruction::Select:
+        BBChanged |= processSelect(cast<SelectInst>(II));
+        break;
+      case Instruction::PHI:
+        BBChanged |= processPHI(cast<PHINode>(II));
+        break;
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        BBChanged |= processCmp(cast<CmpInst>(II));
+        break;
+      case Instruction::Load:
+      case Instruction::Store:
+        BBChanged |= processMemAccess(II);
+        break;
+      }
+    }
+    
+    // Propagating correlated values might leave cruft around.
+    // Try to clean it up before we continue.
+    if (BBChanged)
+      SimplifyInstructionsInBlock(FI);
+    
+    FnChanged |= BBChanged;
+  }
+  
+  return FnChanged;
+}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 39940c35da5d..87ea8038356a 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,7 @@ namespace {
   //
   struct DeadInstElimination : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
-    DeadInstElimination() : BasicBlockPass(&ID) {}
+    DeadInstElimination() : BasicBlockPass(ID) {}
     virtual bool runOnBasicBlock(BasicBlock &BB) {
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -56,8 +56,8 @@ namespace {
 }
 
 char DeadInstElimination::ID = 0;
-static RegisterPass<DeadInstElimination>
-X("die", "Dead Instruction Elimination");
+INITIALIZE_PASS(DeadInstElimination, "die",
+                "Dead Instruction Elimination", false, false);
 
 Pass *llvm::createDeadInstEliminationPass() {
   return new DeadInstElimination();
@@ -70,7 +70,7 @@ namespace {
   //
   struct DCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    DCE() : FunctionPass(&ID) {}
+    DCE() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F);
 
@@ -81,7 +81,7 @@ namespace {
 }
 
 char DCE::ID = 0;
-static RegisterPass<DCE> Y("dce", "Dead Code Elimination");
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
 
 bool DCE::runOnFunction(Function &F) {
   // Start out with all of the instructions in the worklist...
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e047e4ffa151..c8fd9d9fa556 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -40,7 +40,7 @@ namespace {
     TargetData *TD;
 
     static char ID; // Pass identification, replacement for typeid
-    DSE() : FunctionPass(&ID) {}
+    DSE() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F) {
       bool Changed = false;
@@ -82,7 +82,7 @@ namespace {
 }
 
 char DSE::ID = 0;
-static RegisterPass<DSE> X("dse", "Dead Store Elimination");
+INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
 
 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
@@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       }
       
       continue;
-    } else if (CallSite::get(BBI).getInstruction() != 0) {
+    } else if (CallSite CS = cast<Value>(BBI)) {
       // If this call does not access memory, it can't
       // be undeadifying any of our pointers.
-      CallSite CS = CallSite::get(BBI);
       if (AA.doesNotAccessMemory(CS))
         continue;
       
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
index 610a41dae44b..53dd06d24bb5 100644
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,13 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit GEPSplitter() : FunctionPass(&ID) {}
+    explicit GEPSplitter() : FunctionPass(ID) {}
   };
 }
 
 char GEPSplitter::ID = 0;
-static RegisterPass<GEPSplitter> X("split-geps",
-                                   "split complex GEPs into simple GEPs");
+INITIALIZE_PASS(GEPSplitter, "split-geps",
+                "split complex GEPs into simple GEPs", false, false);
 
 FunctionPass *llvm::createGEPSplitterPass() {
   return new GEPSplitter();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 88b67768fa5d..c62ce1f27f64 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -165,7 +165,6 @@ namespace {
       Expression create_expression(CastInst* C);
       Expression create_expression(GetElementPtrInst* G);
       Expression create_expression(CallInst* C);
-      Expression create_expression(Constant* C);
       Expression create_expression(ExtractValueInst* C);
       Expression create_expression(InsertValueInst* C);
       
@@ -665,7 +664,7 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit GVN(bool noloads = false)
-      : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
+      : FunctionPass(ID), NoLoads(noloads), MD(0) { }
 
   private:
     bool NoLoads;
@@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
   return new GVN(NoLoads);
 }
 
-static RegisterPass<GVN> X("gvn",
-                           "Global Value Numbering");
+INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
 
 void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   errs() << "{\n";
@@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) {
 
   for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
        UI != E; ++UI)
-    if (PHINode* use_phi = dyn_cast<PHINode>(UI))
+    if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
       if (use_phi->getParent() == inst->getParent())
         return false;
 
@@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
   SSAUpdater SSAUpdate(&NewPHIs);
-  SSAUpdate.Initialize(LI);
+  SSAUpdate.Initialize(LI->getType(), LI->getName());
   
   const Type *LoadTy = LI->getType();
   
@@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) {
           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
           isa<DbgInfoIntrinsic>(CurInst))
         continue;
+      
+      // We don't currently value number ANY inline asm calls.
+      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+        if (CallI->isInlineAsm())
+          continue;
 
       uint32_t ValNo = VN.lookup(CurInst);
 
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b5c9dd881df8..af2eafc47cbf 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,7 @@ namespace {
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(&ID) {}
+    IndVarSimplify() : LoopPass(ID) {}
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -102,7 +102,7 @@ namespace {
     void RewriteNonIntegerIVs(Loop *L);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
-                                   Value *IndVar,
+                                   PHINode *IndVar,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter);
@@ -117,8 +117,8 @@ namespace {
 }
 
 char IndVarSimplify::ID = 0;
-static RegisterPass<IndVarSimplify>
-X("indvars", "Canonicalize Induction Variables");
+INITIALIZE_PASS(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false);
 
 Pass *llvm::createIndVarSimplifyPass() {
   return new IndVarSimplify();
@@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() {
 /// is actually a much broader range than just linear tests.
 ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
                                    const SCEV *BackedgeTakenCount,
-                                   Value *IndVar,
+                                   PHINode *IndVar,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter) {
@@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
     // The BackedgeTaken expression contains the number of times that the
     // backedge branches to the loop header.  This is one less than the
     // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = L->getCanonicalInductionVariableIncrement();
+    CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
   } else {
     // We have to use the preincremented value...
     RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
@@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Now that we know the largest of the induction variable expressions
   // in this loop, insert a canonical induction variable of the largest size.
-  Value *IndVar = 0;
+  PHINode *IndVar = 0;
   if (NeedCannIV) {
     // Check to see if the loop already has any canonical-looking induction
     // variables. If any are present and wider than the planned canonical
@@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   // Check Incr uses. One user is PN and the other user is an exit condition
   // used by the conditional terminator.
   Value::use_iterator IncrUse = Incr->use_begin();
-  Instruction *U1 = cast<Instruction>(IncrUse++);
+  Instruction *U1 = cast<Instruction>(*IncrUse++);
   if (IncrUse == Incr->use_end()) return;
-  Instruction *U2 = cast<Instruction>(IncrUse++);
+  Instruction *U2 = cast<Instruction>(*IncrUse++);
   if (IncrUse != Incr->use_end()) return;
 
   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index edce14cd92ea..104d5aecbdd3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -45,7 +46,10 @@ Threshold("jump-threading-threshold",
 
 // Turn on use of LazyValueInfo.
 static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+EnableLVI("enable-jump-threading-lvi",
+          cl::desc("Use LVI for jump threading"),
+          cl::init(true),
+          cl::ReallyHidden);
 
 
 
@@ -74,15 +78,32 @@ namespace {
 #else
     SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
 #endif
+    DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+    
+    // RAII helper for updating the recursion stack.
+    struct RecursionSetRemover {
+      DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+      std::pair<Value*, BasicBlock*> ThePair;
+      
+      RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+                          std::pair<Value*, BasicBlock*> P)
+        : TheSet(S), ThePair(P) { }
+      
+      ~RecursionSetRemover() {
+        TheSet.erase(ThePair);
+      }
+    };
   public:
     static char ID; // Pass identification
-    JumpThreading() : FunctionPass(&ID) {}
+    JumpThreading() : FunctionPass(ID) {}
 
     bool runOnFunction(Function &F);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      if (EnableLVI)
+      if (EnableLVI) {
         AU.addRequired<LazyValueInfo>();
+        AU.addPreserved<LazyValueInfo>();
+      }
     }
     
     void FindLoopHeaders(Function &F);
@@ -111,8 +132,8 @@ namespace {
 }
 
 char JumpThreading::ID = 0;
-static RegisterPass<JumpThreading>
-X("jump-threading", "Jump Threading");
+INITIALIZE_PASS(JumpThreading, "jump-threading",
+                "Jump Threading", false, false);
 
 // Public interface to the Jump Threading pass
 FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) {
         DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
               << "' with terminator: " << *BB->getTerminator() << '\n');
         LoopHeaders.erase(BB);
+        if (LVI) LVI->eraseBlock(BB);
         DeleteDeadBlock(BB);
         Changed = true;
       } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) {
             bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
             BasicBlock *Succ = BI->getSuccessor(0);
             
+            // FIXME: It is always conservatively correct to drop the info
+            // for a block even if it doesn't get erased.  This isn't totally
+            // awesome, but it allows us to use AssertingVH to prevent nasty
+            // dangling pointer issues within LazyValueInfo.
+            if (LVI) LVI->eraseBlock(BB);
             if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
               Changed = true;
               // If we deleted BB and BB was the header of a loop, then the
@@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) {
     LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
 }
 
+// Helper method for ComputeValueKnownInPredecessors.  If Value is a
+// ConstantInt, push it.  If it's an undef, push 0.  Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+                                                        BasicBlock*> > &Result,
+                              Constant *Value, BasicBlock* BB){
+  if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+    Result.push_back(std::make_pair(FoldedCInt, BB));
+  else if (isa<UndefValue>(Value))
+    Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
 /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
 /// if we can infer that the value is a known ConstantInt in any of our
 /// predecessors.  If so, return the known list of value and pred BB in the
@@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) {
 ///
 bool JumpThreading::
 ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+  // This method walks up use-def chains recursively.  Because of this, we could
+  // get into an infinite loop going around loops in the use-def chain.  To
+  // prevent this, keep track of what (value, block) pairs we've already visited
+  // and terminate the search if we loop back to them
+  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+    return false;
+  
+  // An RAII help to remove this pair from the recursion set once the recursion
+  // stack pops back out again.
+  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+  
   // If V is a constantint, then it is known in all predecessors.
   if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
     ConstantInt *CI = dyn_cast<ConstantInt>(V);
     
     for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
       Result.push_back(std::make_pair(CI, *PI));
+    
     return true;
   }
   
@@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
       if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
         ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
         Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+      } else if (LVI) {
+        Constant *CI = LVI->getConstantOnEdge(InVal,
+                                              PN->getIncomingBlock(i), BB);
+        // LVI returns null is no value could be determined.
+        if (!CI) continue;
+        PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
       }
     }
+    
     return !Result.empty();
   }
   
@@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
       else
         InterestingVal = ConstantInt::getFalse(I->getContext());
       
+      SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+      
       // Scan for the sentinel.  If we find an undef, force it to the
       // interesting value: x|undef -> true and x&undef -> false.
       for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
         if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
           Result.push_back(LHSVals[i]);
           Result.back().first = InterestingVal;
+          LHSKnownBBs.insert(LHSVals[i].second);
         }
       for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
         if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
           // If we already inferred a value for this block on the LHS, don't
           // re-add it.
-          bool HasValue = false;
-          for (unsigned r = 0, e = Result.size(); r != e; ++r)
-            if (Result[r].second == RHSVals[i].second) {
-              HasValue = true;
-              break;
-            }
-          
-          if (!HasValue) {
+          if (!LHSKnownBBs.count(RHSVals[i].second)) {
             Result.push_back(RHSVals[i]);
             Result.back().first = InterestingVal;
           }
         }
+      
       return !Result.empty();
     }
     
@@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
         if (Result[i].first)
           Result[i].first =
             cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+      
       return true;
     }
+  
+  // Try to simplify some other binary operator values.
+  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+      SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+    
+      // Try to use constant folding to simplify the binary operator.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+        Constant *V = LHSVals[i].first ? LHSVals[i].first :
+                                 cast<Constant>(UndefValue::get(BO->getType()));
+        Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+        
+        PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+      }
+    }
+      
+    return !Result.empty();
   }
   
   // Handle compare with phi operand, where the PHI is defined in this block.
@@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
           Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
         }
         
-        if (isa<UndefValue>(Res))
-          Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
-        else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
-          Result.push_back(std::make_pair(CI, PredBB));
+        if (Constant *ConstRes = dyn_cast<Constant>(Res))
+          PushConstantIntOrUndef(Result, ConstRes, PredBB);
       }
       
       return !Result.empty();
@@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
     // If comparing a live-in value against a constant, see if we know the
     // live-in value on any predecessors.
     if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
-        Cmp->getType()->isIntegerTy() && // Not vector compare.
-        (!isa<Instruction>(Cmp->getOperand(0)) ||
-         cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
-      Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+        Cmp->getType()->isIntegerTy()) {
+      if (!isa<Instruction>(Cmp->getOperand(0)) ||
+          cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+        Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+          BasicBlock *P = *PI;
+          // If the value is known by LazyValueInfo to be a constant in a
+          // predecessor, use that information to try to thread this block.
+          LazyValueInfo::Tristate Res =
+            LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+                                    RHSCst, P, BB);
+          if (Res == LazyValueInfo::Unknown)
+            continue;
 
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        // If the value is known by LazyValueInfo to be a constant in a
-        // predecessor, use that information to try to thread this block.
-        LazyValueInfo::Tristate
-          Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
-                                        RHSCst, P, BB);
-        if (Res == LazyValueInfo::Unknown)
-          continue;
+          Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+          Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+        }
 
-        Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
-        Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+        return !Result.empty();
       }
-
-      return !Result.empty();
+      
+      // Try to find a constant value for the LHS of a comparison,
+      // and evaluate it statically if we can.
+      if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+        SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+        ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+        
+        for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+          Constant *V = LHSVals[i].first ? LHSVals[i].first :
+                           cast<Constant>(UndefValue::get(CmpConst->getType()));
+          Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+                                                      V, CmpConst);
+          PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+        }
+        
+        return !Result.empty();
+      }
+    }
+  }
+  
+  if (LVI) {
+    // If all else fails, see if LVI can figure out a constant value for us.
+    Constant *CI = LVI->getConstant(V, BB);
+    ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+    if (CInt) {
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        Result.push_back(std::make_pair(CInt, *PI));
     }
+    
+    return !Result.empty();
   }
+  
   return false;
 }
 
@@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      if (LVI) LVI->eraseBlock(SinglePred);
       MergeBasicBlockIntoOnlyPred(BB);
       
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
@@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
           }
       }
     }
+    
+    // For a comparison where the LHS is outside this block, it's possible
+    // that we've branched on it before.  Used LVI to see if we can simplify
+    // the branch based on that.
+    BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+    Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+        (!isa<Instruction>(CondCmp->getOperand(0)) ||
+         cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+      // For predecessor edge, determine if the comparison is true or false
+      // on that edge.  If they're all true or all false, we can simplify the
+      // branch.
+      // FIXME: We could handle mixed true/false by duplicating code.
+      LazyValueInfo::Tristate Baseline =      
+        LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+                                CondConst, *PI, BB);
+      if (Baseline != LazyValueInfo::Unknown) {
+        // Check that all remaining incoming values match the first one.
+        while (++PI != PE) {
+          LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
+                                          CondCmp->getPredicate(),
+                                          CondCmp->getOperand(0),
+                                          CondConst, *PI, BB);
+          if (Ret != Baseline) break;
+        }
+        
+        // If we terminated early, then one of the values didn't match.
+        if (PI == PE) {
+          unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+          unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+          RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+          BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+          CondBr->eraseFromParent();
+          return true;
+        }
+      }
+    }
   }
 
   // Check for some cases that are worth simplifying.  Right now we want to look
@@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
   SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
   if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
     return false;
+  
   assert(!PredValues.empty() &&
          "ComputeValueKnownInPredecessors returned true with no values");
 
@@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
         << ", across block:\n    "
         << *BB << "\n");
   
+  if (LVI)
+    LVI->threadEdge(PredBB, BB, SuccBB);
+  
   // We are going to have to map operands from the original BB block to the new
   // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
   // account for entry from PredBB.
@@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
-    SSAUpdate.Initialize(I);
+    SSAUpdate.Initialize(I->getType(), I->getName());
     SSAUpdate.AddAvailableValue(BB, I);
     SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
     
@@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
-    SSAUpdate.Initialize(I);
+    SSAUpdate.Initialize(I->getType(), I->getName());
     SSAUpdate.AddAvailableValue(BB, I);
     SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
     
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 73473952912e..2ef85446bd9b 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -26,8 +26,7 @@
 //          pointer.  There are no calls in the loop which mod/ref the pointer.
 //     If these conditions are true, we can promote the loads and stores in the
 //     loop of the pointer to use a temporary alloca'd variable.  We then use
-//     the mem2reg functionality to construct the appropriate SSA form for the
-//     variable.
+//     the SSAUpdater to construct the appropriate SSA form for the value.
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,14 +36,15 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
 namespace {
   struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LICM() : LoopPass(&ID) {}
+    LICM() : LoopPass(ID) {}
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -75,39 +75,31 @@ namespace {
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
       AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();  // For scalar promotion (mem2reg)
+      AU.addRequired<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
-      AU.addPreserved<DominanceFrontier>();
       AU.addPreservedID(LoopSimplifyID);
     }
 
     bool doFinalization() {
-      // Free the values stored in the map
-      for (std::map<Loop *, AliasSetTracker *>::iterator
-             I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
-        delete I->second;
-
-      LoopToAliasMap.clear();
+      assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
       return false;
     }
 
   private:
-    // Various analyses that we use...
     AliasAnalysis *AA;       // Current AliasAnalysis information
     LoopInfo      *LI;       // Current LoopInfo
-    DominatorTree *DT;       // Dominator Tree for the current Loop...
-    DominanceFrontier *DF;   // Current Dominance Frontier
+    DominatorTree *DT;       // Dominator Tree for the current Loop.
 
-    // State that is updated as we process loops
+    // State that is updated as we process loops.
     bool Changed;            // Set to true when we change anything.
     BasicBlock *Preheader;   // The preheader block of the current loop...
     Loop *CurLoop;           // The current loop we are working on...
     AliasSetTracker *CurAST; // AliasSet information for the current loop...
-    std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+    DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
 
     /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
     void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
@@ -204,25 +196,12 @@ namespace {
     bool isLoopInvariantInst(Instruction &I);
     bool isNotUsedInLoop(Instruction &I);
 
-    /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
-    /// to scalars as we can.
-    ///
-    void PromoteValuesInLoop();
-
-    /// FindPromotableValuesInLoop - Check the current loop for stores to
-    /// definite pointers, which are not loaded and stored through may aliases.
-    /// If these are found, create an alloca for the value, add it to the
-    /// PromotedValues list, and keep track of the mapping from value to
-    /// alloca...
-    ///
-    void FindPromotableValuesInLoop(
-                   std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
-                                    std::map<Value*, AllocaInst*> &Val2AlMap);
+    void PromoteAliasSet(AliasSet &AS);
   };
 }
 
 char LICM::ID = 0;
-static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
+INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
 
 Pass *llvm::createLICMPass() { return new LICM(); }
 
@@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Get our Loop and Alias Analysis information...
   LI = &getAnalysis<LoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
-  DF = &getAnalysis<DominanceFrontier>();
   DT = &getAnalysis<DominatorTree>();
 
   CurAST = new AliasSetTracker(*AA);
-  // Collect Alias info from subloops
+  // Collect Alias info from subloops.
   for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
        LoopItr != LoopItrE; ++LoopItr) {
     Loop *InnerL = *LoopItr;
-    AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
-    assert (InnerAST && "Where is my AST?");
+    AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+    assert(InnerAST && "Where is my AST?");
 
     // What if InnerLoop was modified by other passes ?
     CurAST->add(*InnerAST);
+    
+    // Once we've incorporated the inner loop's AST into ours, we don't need the
+    // subloop's anymore.
+    delete InnerAST;
+    LoopToAliasSetMap.erase(InnerL);
   }
   
   CurLoop = L;
@@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I) {
     BasicBlock *BB = *I;
-    if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops...
+    if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
       CurAST->add(*BB);                 // Incorporate the specified basic block
   }
 
@@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     HoistRegion(DT->getNode(L->getHeader()));
 
   // Now that all loop invariants have been removed from the loop, promote any
-  // memory references to scalars that we can...
-  if (!DisablePromotion && Preheader && L->hasDedicatedExits())
-    PromoteValuesInLoop();
-
+  // memory references to scalars that we can.
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+    // Loop over all of the alias sets in the tracker object.
+    for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+         I != E; ++I)
+      PromoteAliasSet(*I);
+  }
+  
   // Clear out loops state information for the next iteration
   CurLoop = 0;
   Preheader = 0;
 
-  LoopToAliasMap[L] = CurAST;
+  // If this loop is nested inside of another one, save the alias information
+  // for when we process the outer loop.
+  if (L->getParentLoop())
+    LoopToAliasSetMap[L] = CurAST;
+  else
+    delete CurAST;
   return Changed;
 }
 
@@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
   // If this subregion is not in the top level loop at all, exit.
   if (!CurLoop->contains(BB)) return;
 
-  // We are processing blocks in reverse dfo, so process children first...
+  // We are processing blocks in reverse dfo, so process children first.
   const std::vector<DomTreeNode*> &Children = N->getChildren();
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
     SinkRegion(Children[i]);
@@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) {
 
   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
     Instruction &I = *--II;
+    
+    // If the instruction is dead, we would try to sink it because it isn't used
+    // in the loop, instead, just delete it.
+    if (isInstructionTriviallyDead(&I)) {
+      DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+      ++II;
+      CurAST->deleteValue(&I);
+      I.eraseFromParent();
+      Changed = true;
+      continue;
+    }
 
     // Check to see if we can sink this instruction to the exit blocks
     // of the loop.  We can do this if the all users of the instruction are
@@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) {
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
       Instruction &I = *II++;
 
+      // Try constant folding this instruction.  If all the operands are
+      // constants, it is technically hoistable, but it would be better to just
+      // fold it.
+      if (Constant *C = ConstantFoldInstruction(&I)) {
+        DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
+        CurAST->copyValue(&I, C);
+        CurAST->deleteValue(&I);
+        I.replaceAllUsesWith(C);
+        I.eraseFromParent();
+        continue;
+      }
+      
       // Try hoisting the instruction out to the preheader.  We can only do this
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
@@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
       if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
           isSafeToExecuteUnconditionally(I))
         hoist(I);
-      }
+    }
 
   const std::vector<DomTreeNode*> &Children = N->getChildren();
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
 /// position, and may either delete it or move it to outside of the loop.
 ///
 void LICM::sink(Instruction &I) {
-  DEBUG(dbgs() << "LICM sinking instruction: " << I);
+  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getExitBlocks(ExitBlocks);
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
 
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) {
       // If I has users in unreachable blocks, eliminate.
       // If I is not void type then replaceAllUsesWith undef.
       // This allows ValueHandlers and custom metadata to adjust itself.
-      if (!I.getType()->isVoidTy())
+      if (!I.use_empty())
         I.replaceAllUsesWith(UndefValue::get(I.getType()));
       I.eraseFromParent();
     } else {
       // Move the instruction to the start of the exit block, after any PHI
       // nodes in it.
-      I.removeFromParent();
-      BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
-      ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+      I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+      // This instruction is no longer in the AST for the current loop, because
+      // we just sunk it out of the loop.  If we just sunk it into an outer
+      // loop, we will rediscover the operation when we process it.
+      CurAST->deleteValue(&I);
     }
-  } else if (ExitBlocks.empty()) {
+    return;
+  }
+  
+  if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
     // If I has users in unreachable blocks, eliminate.
     // If I is not void type then replaceAllUsesWith undef.
     // This allows ValueHandlers and custom metadata to adjust itself.
-    if (!I.getType()->isVoidTy())
+    if (!I.use_empty())
       I.replaceAllUsesWith(UndefValue::get(I.getType()));
     I.eraseFromParent();
-  } else {
-    // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
-    // do all of the hard work of inserting PHI nodes as necessary.  We convert
-    // the value into a stack object to get it to do this.
-
-    // Firstly, we create a stack object to hold the value...
-    AllocaInst *AI = 0;
-
-    if (!I.getType()->isVoidTy()) {
-      AI = new AllocaInst(I.getType(), 0, I.getName(),
-                          I.getParent()->getParent()->getEntryBlock().begin());
-      CurAST->add(AI);
-    }
-
-    // Secondly, insert load instructions for each use of the instruction
-    // outside of the loop.
-    while (!I.use_empty()) {
-      Instruction *U = cast<Instruction>(I.use_back());
-
-      // If the user is a PHI Node, we actually have to insert load instructions
-      // in all predecessor blocks, not in the PHI block itself!
-      if (PHINode *UPN = dyn_cast<PHINode>(U)) {
-        // Only insert into each predecessor once, so that we don't have
-        // different incoming values from the same block!
-        std::map<BasicBlock*, Value*> InsertedBlocks;
-        for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
-          if (UPN->getIncomingValue(i) == &I) {
-            BasicBlock *Pred = UPN->getIncomingBlock(i);
-            Value *&PredVal = InsertedBlocks[Pred];
-            if (!PredVal) {
-              // Insert a new load instruction right before the terminator in
-              // the predecessor block.
-              PredVal = new LoadInst(AI, "", Pred->getTerminator());
-              CurAST->add(cast<LoadInst>(PredVal));
-            }
-
-            UPN->setIncomingValue(i, PredVal);
-          }
-
-      } else {
-        LoadInst *L = new LoadInst(AI, "", U);
-        U->replaceUsesOfWith(&I, L);
-        CurAST->add(L);
-      }
-    }
-
-    // Thirdly, insert a copy of the instruction in each exit block of the loop
-    // that is dominated by the instruction, storing the result into the memory
-    // location.  Be careful not to insert the instruction into any particular
-    // basic block more than once.
-    std::set<BasicBlock*> InsertedBlocks;
-    BasicBlock *InstOrigBB = I.getParent();
-
-    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-      BasicBlock *ExitBlock = ExitBlocks[i];
-
-      if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
-        // If we haven't already processed this exit block, do so now.
-        if (InsertedBlocks.insert(ExitBlock).second) {
-          // Insert the code after the last PHI node...
-          BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
-          // If this is the first exit block processed, just move the original
-          // instruction, otherwise clone the original instruction and insert
-          // the copy.
-          Instruction *New;
-          if (InsertedBlocks.size() == 1) {
-            I.removeFromParent();
-            ExitBlock->getInstList().insert(InsertPt, &I);
-            New = &I;
-          } else {
-            New = I.clone();
-            CurAST->copyValue(&I, New);
-            if (!I.getName().empty())
-              New->setName(I.getName()+".le");
-            ExitBlock->getInstList().insert(InsertPt, New);
-          }
-
-          // Now that we have inserted the instruction, store it into the alloca
-          if (AI) new StoreInst(New, AI, InsertPt);
-        }
-      }
-    }
-
-    // If the instruction doesn't dominate any exit blocks, it must be dead.
-    if (InsertedBlocks.empty()) {
-      CurAST->deleteValue(&I);
-      I.eraseFromParent();
-    }
-
-    // Finally, promote the fine value to SSA form.
-    if (AI) {
-      std::vector<AllocaInst*> Allocas;
-      Allocas.push_back(AI);
-      PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+    return;
+  }
+  
+  // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+  // hard work of inserting PHI nodes as necessary.
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  
+  if (!I.use_empty())
+    SSA.Initialize(I.getType(), I.getName());
+  
+  // Insert a copy of the instruction in each exit block of the loop that is
+  // dominated by the instruction.  Each exit block is known to only be in the
+  // ExitBlocks list once.
+  BasicBlock *InstOrigBB = I.getParent();
+  unsigned NumInserted = 0;
+  
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    
+    if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+      continue;
+    
+    // Insert the code after the last PHI node.
+    BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+    
+    // If this is the first exit block processed, just move the original
+    // instruction, otherwise clone the original instruction and insert
+    // the copy.
+    Instruction *New;
+    if (NumInserted++ == 0) {
+      I.moveBefore(InsertPt);
+      New = &I;
+    } else {
+      New = I.clone();
+      if (!I.getName().empty())
+        New->setName(I.getName()+".le");
+      ExitBlock->getInstList().insert(InsertPt, New);
     }
+    
+    // Now that we have inserted the instruction, inform SSAUpdater.
+    if (!I.use_empty())
+      SSA.AddAvailableValue(ExitBlock, New);
   }
+  
+  // If the instruction doesn't dominate any exit blocks, it must be dead.
+  if (NumInserted == 0) {
+    CurAST->deleteValue(&I);
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    I.eraseFromParent();
+    return;
+  }
+  
+  // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+    // Grab the use before incrementing the iterator.
+    Use &U = UI.getUse();
+    // Increment the iterator before removing the use from the list.
+    ++UI;
+    SSA.RewriteUseAfterInsertions(U);
+  }
+  
+  // Update CurAST for NewPHIs if I had pointer type.
+  if (I.getType()->isPointerTy())
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(&I, NewPHIs[i]);
+  
+  // Finally, remove the instruction from CurAST.  It is no longer in the loop.
+  CurAST->deleteValue(&I);
 }
 
 /// hoist - When an instruction is found to only use loop invariant operands
@@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) {
   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
         << I << "\n");
 
-  // Remove the instruction from its current basic block... but don't delete the
-  // instruction.
-  I.removeFromParent();
-
-  // Insert the new node in Preheader, before the terminator.
-  Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+  // Move the new node to the Preheader, before its terminator.
+  I.moveBefore(Preheader->getTerminator());
 
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   return true;
 }
 
-
-/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
 /// stores out of the loop and moving loads to before the loop.  We do this by
 /// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant.  We promote these memory locations to use allocas
-/// instead.  These allocas can easily be raised to register values by the
-/// PromoteMem2Reg functionality.
+/// which are loop invariant.
 ///
-void LICM::PromoteValuesInLoop() {
-  // PromotedValues - List of values that are promoted out of the loop.  Each
-  // value has an alloca instruction for it, and a canonical version of the
-  // pointer.
-  std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
-  std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
-
-  FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
-  if (ValueToAllocaMap.empty()) return;   // If there are values to promote.
-
-  Changed = true;
-  NumPromoted += PromotedValues.size();
-
-  std::vector<Value*> PointerValueNumbers;
-
-  // Emit a copy from the value into the alloca'd value in the loop preheader
-  TerminatorInst *LoopPredInst = Preheader->getTerminator();
-  for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
-    Value *Ptr = PromotedValues[i].second;
-
-    // If we are promoting a pointer value, update alias information for the
-    // inserted load.
-    Value *LoadValue = 0;
-    if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
-      // Locate a load or store through the pointer, and assign the same value
-      // to LI as we are loading or storing.  Since we know that the value is
-      // stored in this loop, this will always succeed.
-      for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
-           UI != E; ++UI) {
-        User *U = *UI;
-        if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-          LoadValue = LI;
-          break;
-        } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-          if (SI->getOperand(1) == Ptr) {
-            LoadValue = SI->getOperand(0);
-            break;
-          }
-        }
-      }
-      assert(LoadValue && "No store through the pointer found!");
-      PointerValueNumbers.push_back(LoadValue);  // Remember this for later.
-    }
-
-    // Load from the memory we are promoting.
-    LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
-
-    if (LoadValue) CurAST->copyValue(LoadValue, LI);
-
-    // Store into the temporary alloca.
-    new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
-  }
+void LICM::PromoteAliasSet(AliasSet &AS) {
+  // We can promote this alias set if it has a store, if it is a "Must" alias
+  // set, if the pointer is loop invariant, and if we are not eliminating any
+  // volatile loads or stores.
+  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+    return;
+  
+  assert(!AS.empty() &&
+         "Must alias set should have at least one pointer element in it!");
+  Value *SomePtr = AS.begin()->getValue();
 
-  // Scan the basic blocks in the loop, replacing uses of our pointers with
-  // uses of the allocas in question.
+  // It isn't safe to promote a load/store from the loop if the load/store is
+  // conditional.  For example, turning:
   //
-  for (Loop::block_iterator I = CurLoop->block_begin(),
-         E = CurLoop->block_end(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    // Rewrite all loads and stores in the block of the pointer...
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
-        std::map<Value*, AllocaInst*>::iterator
-          I = ValueToAllocaMap.find(L->getOperand(0));
-        if (I != ValueToAllocaMap.end())
-          L->setOperand(0, I->second);    // Rewrite load instruction...
-      } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
-        std::map<Value*, AllocaInst*>::iterator
-          I = ValueToAllocaMap.find(S->getOperand(1));
-        if (I != ValueToAllocaMap.end())
-          S->setOperand(1, I->second);    // Rewrite store instruction...
-      }
-    }
-  }
-
-  // Now that the body of the loop uses the allocas instead of the original
-  // memory locations, insert code to copy the alloca value back into the
-  // original memory location on all exits from the loop.  Note that we only
-  // want to insert one copy of the code in each exit block, though the loop may
-  // exit to the same block more than once.
+  //    for () { if (c) *P += 1; }
   //
-  SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
-
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getExitBlocks(ExitBlocks);
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-    if (!ProcessedBlocks.insert(ExitBlocks[i]))
-      continue;
-  
-    // Copy all of the allocas into their memory locations.
-    BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
-    Instruction *InsertPos = BI;
-    unsigned PVN = 0;
-    for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
-      // Load from the alloca.
-      LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
-      // If this is a pointer type, update alias info appropriately.
-      if (LI->getType()->isPointerTy())
-        CurAST->copyValue(PointerValueNumbers[PVN++], LI);
-
-      // Store into the memory we promoted.
-      new StoreInst(LI, PromotedValues[i].second, InsertPos);
-    }
-  }
-
-  // Now that we have done the deed, use the mem2reg functionality to promote
-  // all of the new allocas we just created into real SSA registers.
+  // into:
   //
-  std::vector<AllocaInst*> PromotedAllocas;
-  PromotedAllocas.reserve(PromotedValues.size());
-  for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
-    PromotedAllocas.push_back(PromotedValues[i].first);
-  PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
-}
-
-/// FindPromotableValuesInLoop - Check the current loop for stores to definite
-/// pointers, which are not loaded and stored through may aliases and are safe
-/// for promotion.  If these are found, create an alloca for the value, add it 
-/// to the PromotedValues list, and keep track of the mapping from value to 
-/// alloca. 
-void LICM::FindPromotableValuesInLoop(
-                   std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
-                             std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
-  Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
-
-  // Loop over all of the alias sets in the tracker object.
-  for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
-       I != E; ++I) {
-    AliasSet &AS = *I;
-    // We can promote this alias set if it has a store, if it is a "Must" alias
-    // set, if the pointer is loop invariant, and if we are not eliminating any
-    // volatile loads or stores.
-    if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
-        AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
-      continue;
+  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
+  //
+  // is not safe, because *P may only be valid to access if 'c' is true.
+  // 
+  // It is safe to promote P if all uses are direct load/stores and if at
+  // least one is guaranteed to be executed.
+  bool GuaranteedToExecute = false;
+  
+  SmallVector<Instruction*, 64> LoopUses;
+  SmallPtrSet<Value*, 4> PointerMustAliases;
+
+  // Check that all of the pointers in the alias set have the same type.  We
+  // cannot (yet) promote a memory location that is loaded and stored in
+  // different sizes.
+  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+    Value *ASIV = ASI->getValue();
+    PointerMustAliases.insert(ASIV);
     
-    assert(!AS.empty() &&
-           "Must alias set should have at least one pointer element in it!");
-    Value *V = AS.begin()->getValue();
-
     // Check that all of the pointers in the alias set have the same type.  We
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
-    {
-      bool PointerOk = true;
-      for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
-        if (V->getType() != I->getValue()->getType()) {
-          PointerOk = false;
-          break;
-        }
-      if (!PointerOk)
-        continue;
-    }
-
-    // It isn't safe to promote a load/store from the loop if the load/store is
-    // conditional.  For example, turning:
-    //
-    //    for () { if (c) *P += 1; }
-    //
-    // into:
-    //
-    //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
-    //
-    // is not safe, because *P may only be valid to access if 'c' is true.
-    // 
-    // It is safe to promote P if all uses are direct load/stores and if at
-    // least one is guaranteed to be executed.
-    bool GuaranteedToExecute = false;
-    bool InvalidInst = false;
-    for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+    if (SomePtr->getType() != ASIV->getType())
+      return;
+    
+    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
          UI != UE; ++UI) {
-      // Ignore instructions not in this loop.
+      // Ignore instructions that are outside the loop.
       Instruction *Use = dyn_cast<Instruction>(*UI);
       if (!Use || !CurLoop->contains(Use))
         continue;
-
-      if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
-        InvalidInst = true;
-        break;
-      }
+      
+      // If there is an non-load/store instruction in the loop, we can't promote
+      // it.
+      if (isa<LoadInst>(Use))
+        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+      else if (isa<StoreInst>(Use)) {
+        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+        if (Use->getOperand(0) == ASIV) return;
+      } else
+        return; // Not a load or store.
       
       if (!GuaranteedToExecute)
         GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+      
+      LoopUses.push_back(Use);
     }
+  }
+  
+  // If there isn't a guaranteed-to-execute instruction, we can't promote.
+  if (!GuaranteedToExecute)
+    return;
+  
+  // Otherwise, this is safe to promote, lets do it!
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
+  Changed = true;
+  ++NumPromoted;
 
-    // If there is an non-load/store instruction in the loop, we can't promote
-    // it.  If there isn't a guaranteed-to-execute instruction, we can't
-    // promote.
-    if (InvalidInst || !GuaranteedToExecute)
+  // We use the SSAUpdater interface to insert phi nodes as required.
+  SmallVector<PHINode*, 16> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  
+  // It wants to know some value of the same type as what we'll be inserting.
+  Value *SomeValue;
+  if (isa<LoadInst>(LoopUses[0]))
+    SomeValue = LoopUses[0];
+  else
+    SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+  SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+  // First step: bucket up uses of the pointers by the block they occur in.
+  // This is important because we have to handle multiple defs/uses in a block
+  // ourselves: SSAUpdater is purely for cross-block references.
+  // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+    Instruction *User = LoopUses[i];
+    UsesByBlock[User->getParent()].push_back(User);
+  }
+  
+  // Okay, now we can iterate over all the blocks in the loop with uses,
+  // processing them.  Keep track of which loads are loading a live-in value.
+  SmallVector<LoadInst*, 32> LiveInLoads;
+  DenseMap<Value*, Value*> ReplacedLoads;
+  
+  for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+    Instruction *User = LoopUses[LoopUse];
+    std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+    
+    // If this block has already been processed, ignore this repeat use.
+    if (BlockUses.empty()) continue;
+    
+    // Okay, this is the first use in the block.  If this block just has a
+    // single user in it, we can rewrite it trivially.
+    if (BlockUses.size() == 1) {
+      // If it is a store, it is a trivial def of the value in the block.
+      if (isa<StoreInst>(User)) {
+        SSA.AddAvailableValue(User->getParent(),
+                              cast<StoreInst>(User)->getOperand(0));
+      } else {
+        // Otherwise it is a load, queue it to rewrite as a live-in load.
+        LiveInLoads.push_back(cast<LoadInst>(User));
+      }
+      BlockUses.clear();
       continue;
+    }
     
-    const Type *Ty = cast<PointerType>(V->getType())->getElementType();
-    AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
-    PromotedValues.push_back(std::make_pair(AI, V));
+    // Otherwise, check to see if this block is all loads.  If so, we can queue
+    // them all as live in loads.
+    bool HasStore = false;
+    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+      if (isa<StoreInst>(BlockUses[i])) {
+        HasStore = true;
+        break;
+      }
+    }
+    
+    if (!HasStore) {
+      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      BlockUses.clear();
+      continue;
+    }
 
-    // Update the AST and alias analysis.
-    CurAST->copyValue(V, AI);
+    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+    // Since SSAUpdater is purely for cross-block values, we need to determine
+    // the order of these instructions in the block.  If the first use in the
+    // block is a load, then it uses the live in value.  The last store defines
+    // the live out value.  We handle this by doing a linear scan of the block.
+    BasicBlock *BB = User->getParent();
+    Value *StoredValue = 0;
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+        // If this is a load from an unrelated pointer, ignore it.
+        if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+        // If we haven't seen a store yet, this is a live in use, otherwise
+        // use the stored value.
+        if (StoredValue) {
+          L->replaceAllUsesWith(StoredValue);
+          ReplacedLoads[L] = StoredValue;
+        } else {
+          LiveInLoads.push_back(L);
+        }
+        continue;
+      }
+      
+      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+        // If this is a store to an unrelated pointer, ignore it.
+        if (!PointerMustAliases.count(S->getOperand(1))) continue;
 
-    for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
-      ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+        // Remember that this is the active value in the block.
+        StoredValue = S->getOperand(0);
+      }
+    }
+    
+    // The last stored value that happened is the live-out for the block.
+    assert(StoredValue && "Already checked that there is a store in block");
+    SSA.AddAvailableValue(BB, StoredValue);
+    BlockUses.clear();
+  }
+  
+  // Now that all the intra-loop values are classified, set up the preheader.
+  // It gets a load of the pointer we're promoting, and it is the live-out value
+  // from the preheader.
+  LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+                                         Preheader->getTerminator());
+  SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+  // Now that the preheader is good to go, set up the exit blocks.  Each exit
+  // block gets a store of the live-out values that feed them.  Since we've
+  // already told the SSA updater about the defs in the loop and the preheader
+  // definition, it is all set and we can start using it.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+    Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+    new StoreInst(LiveInValue, SomePtr, InsertPos);
+  }
 
-    DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
+  // Okay, now we rewrite all loads that use live-in values in the loop,
+  // inserting PHI nodes as necessary.
+  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+    LoadInst *ALoad = LiveInLoads[i];
+    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+    ALoad->replaceAllUsesWith(NewVal);
+    CurAST->copyValue(ALoad, NewVal);
+    ReplacedLoads[ALoad] = NewVal;
+  }
+  
+  // If the preheader load is itself a pointer, we need to tell alias analysis
+  // about the new pointer we created in the preheader block and about any PHI
+  // nodes that just got inserted.
+  if (PreheaderLoad->getType()->isPointerTy()) {
+    // Copy any value stored to or loaded from a must-alias of the pointer.
+    CurAST->copyValue(SomeValue, PreheaderLoad);
+    
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(SomeValue, NewPHIs[i]);
   }
+  
+  // Now that everything is rewritten, delete the old instructions from the body
+  // of the loop.  They should all be dead now.
+  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+    Instruction *User = LoopUses[i];
+    
+    // If this is a load that still has uses, then the load must have been added
+    // as a live value in the SSAUpdate data structure for a block (e.g. because
+    // the loaded value was stored later).  In this case, we need to recursively
+    // propagate the updates until we get to the real value.
+    if (!User->use_empty()) {
+      Value *NewVal = ReplacedLoads[User];
+      assert(NewVal && "not a replaced load?");
+      
+      // Propagate down to the ultimate replacee.  The intermediately loads
+      // could theoretically already have been deleted, so we don't want to
+      // dereference the Value*'s.
+      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+      while (RLI != ReplacedLoads.end()) {
+        NewVal = RLI->second;
+        RLI = ReplacedLoads.find(NewVal);
+      }
+      
+      User->replaceAllUsesWith(NewVal);
+      CurAST->copyValue(User, NewVal);
+    }
+    
+    CurAST->deleteValue(User);
+    User->eraseFromParent();
+  }
+  
+  // fwew, we're done!
 }
 
+
 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
 void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
-  AliasSetTracker *AST = LoopToAliasMap[L];
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
     return;
 
@@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
 /// set.
 void LICM::deleteAnalysisValue(Value *V, Loop *L) {
-  AliasSetTracker *AST = LoopToAliasMap[L];
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
     return;
 
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index e4894e99b68f..543dfc1cba09 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -28,7 +28,7 @@ namespace {
   class LoopDeletion : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopDeletion() : LoopPass(&ID) {}
+    LoopDeletion() : LoopPass(ID) {}
     
     // Possibly eliminate loop L if it is dead.
     bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -38,9 +38,9 @@ namespace {
                     bool &Changed, BasicBlock *Preheader);
 
     virtual void getAnalysisUsage(AnalysisUsage& AU) const {
-      AU.addRequired<ScalarEvolution>();
       AU.addRequired<DominatorTree>();
       AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       
@@ -55,7 +55,8 @@ namespace {
 }
   
 char LoopDeletion::ID = 0;
-static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops");
+INITIALIZE_PASS(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false);
 
 Pass* llvm::createLoopDeletionPass() {
   return new LoopDeletion();
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 31058e5759a4..a4336743a8f0 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -74,7 +74,7 @@ namespace {
   class LoopIndexSplit : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopIndexSplit() : LoopPass(&ID) {}
+    LoopIndexSplit() : LoopPass(ID) {}
 
     // Index split Loop L. Return true if loop is split.
     bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -197,8 +197,8 @@ namespace {
 }
 
 char LoopIndexSplit::ID = 0;
-static RegisterPass<LoopIndexSplit>
-X("loop-index-split", "Index Split Loops");
+INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
+                "Index Split Loops", false, false);
 
 Pass *llvm::createLoopIndexSplitPass() {
   return new LoopIndexSplit();
@@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
       for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
           PI != PE; ++PI) {
         BasicBlock *P = *PI;
-        if (P == DeadBB || DT->dominates(DeadBB, P))
+        if (DT->dominates(DeadBB, P))
           PredBlocks.push_back(P);
       }
 
@@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
   // the dominance frontiers.
   for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
        I != E; ++I) {
-    if (*I == CondBB || !DT->dominates(CondBB, *I)) continue;
+    if (!DT->properlyDominates(CondBB, *I)) continue;
     DominanceFrontier::iterator BBDF = DF->find(*I);
     DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
     DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
@@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
     bool usedOutsideBB = false;
     for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 
          UI != UE; ++UI) {
-      Instruction *U = cast<Instruction>(UI);
+      Instruction *U = cast<Instruction>(*UI);
       if (U->getParent() != BB)
         usedOutsideBB = true;
     }
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 16c4a15d3550..65acc1d9257a 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -35,7 +35,7 @@ namespace {
   class LoopRotate : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopRotate() : LoopPass(&ID) {}
+    LoopRotate() : LoopPass(ID) {}
 
     // Rotate Loop L as many times as possible. Return true if
     // loop is rotated at least once.
@@ -43,15 +43,15 @@ namespace {
 
     // LCSSA form makes instruction renaming easier.
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<DominanceFrontier>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
-      AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
     }
 
     // Helper functions
@@ -79,7 +79,7 @@ namespace {
 }
   
 char LoopRotate::ID = 0;
-static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
+INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
 
 Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 
@@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 
     // The value now exits in two versions: the initial value in the preheader
     // and the loop "next" value in the original header.
-    SSA.Initialize(OrigHeaderVal);
+    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
     SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
     SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
 
@@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   // NewHeader is now the header of the loop.
   L->moveToHeader(NewHeader);
 
+  // Move the original header to the bottom of the loop, where it now more
+  // naturally belongs. This isn't necessary for correctness, and CodeGen can
+  // usually reorder blocks on its own to fix things like this up, but it's
+  // still nice to keep the IR readable.
+  //
+  // The original header should have only one predecessor at this point, since
+  // we checked that the loop had a proper preheader and unique backedge before
+  // we started.
+  assert(OrigHeader->getSinglePredecessor() &&
+         "Original loop header has too many predecessors after loop rotation!");
+  OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+  // Also, since this original header only has one predecessor, zap its
+  // PHI nodes, which are now trivial.
+  FoldSingleEntryPHINodes(OrigHeader);
+
+  // TODO: We could just go ahead and merge OrigHeader into its predecessor
+  // at this point, if we don't mind updating dominator info.
+
+  // Establish a new preheader, update dominators, etc.
   preserveCanonicalLoopForm(LPM);
 
   ++NumRotated;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1f9b4156b9cd..e8dc5d3a640e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) {
 
 bool
 RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
-  if (!RegUsesMap.count(Reg)) return false;
-  const SmallBitVector &UsedByIndices =
-    RegUsesMap.find(Reg)->second.UsedByIndices;
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  if (I == RegUsesMap.end())
+    return false;
+  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
   int i = UsedByIndices.find_first();
   if (i == -1) return false;
   if ((size_t)i != LUIdx) return true;
@@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
     if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
-      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
-                                       IgnoreSignificantBits);
-      if (!Start) return 0;
       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
                                       IgnoreSignificantBits);
       if (!Step) return 0;
+      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+                                       IgnoreSignificantBits);
+      if (!Start) return 0;
       return SE.getAddRecExpr(Start, Step, AR->getLoop());
     }
     return 0;
@@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
     int64_t Result = ExtractImmediate(NewOps.front(), SE);
-    S = SE.getAddExpr(NewOps);
+    if (Result != 0)
+      S = SE.getAddExpr(NewOps);
     return Result;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     int64_t Result = ExtractImmediate(NewOps.front(), SE);
-    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    if (Result != 0)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop());
     return Result;
   }
   return 0;
@@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
     GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
-    S = SE.getAddExpr(NewOps);
+    if (Result)
+      S = SE.getAddExpr(NewOps);
     return Result;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
-    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    if (Result)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop());
     return Result;
   }
   return 0;
@@ -965,6 +970,12 @@ public:
   /// may be used.
   bool AllFixupsOutsideLoop;
 
+  /// WidestFixupType - This records the widest use type for any fixup using
+  /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+  /// max fixup widths to be equivalent, because the narrower one may be relying
+  /// on the implicit truncation to truncate away bogus bits.
+  const Type *WidestFixupType;
+
   /// Formulae - A list of ways to build a value that can satisfy this user.
   /// After the list is populated, one of these is selected heuristically and
   /// used to formulate a replacement for OperandValToReplace in UserInst.
@@ -976,15 +987,14 @@ public:
   LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
                                       MinOffset(INT64_MAX),
                                       MaxOffset(INT64_MIN),
-                                      AllFixupsOutsideLoop(true) {}
+                                      AllFixupsOutsideLoop(true),
+                                      WidestFixupType(0) {}
 
   bool HasFormulaWithSameRegs(const Formula &F) const;
   bool InsertFormula(const Formula &F);
   void DeleteFormula(Formula &F);
   void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
 
-  void check() const;
-
   void print(raw_ostream &OS) const;
   void dump() const;
 };
@@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const {
   for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
        E = Offsets.end(); I != E; ++I) {
     OS << *I;
-    if (next(I) != E)
+    if (llvm::next(I) != E)
       OS << ',';
   }
   OS << '}';
 
   if (AllFixupsOutsideLoop)
     OS << ", all-fixups-outside-loop";
+
+  if (WidestFixupType)
+    OS << ", widest fixup type: " << *WidestFixupType;
 }
 
 void LSRUse::dump() const {
@@ -1354,6 +1367,10 @@ public:
   void FilterOutUndesirableDedicatedRegisters();
 
   size_t EstimateSearchSpaceComplexity() const;
+  void NarrowSearchSpaceByDetectingSupersets();
+  void NarrowSearchSpaceByCollapsingUnrolledCode();
+  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceByPickingWinnerRegs();
   void NarrowSearchSpaceUsingHeuristics();
 
   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
 
   // Add one to the backedge-taken count to get the trip count.
-  const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
+  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
   if (IterationCount != SE.getSCEV(Sel)) return Cond;
 
   // Check for a max calculation that matches the pattern. There's no check
@@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) {
 LSRUse *
 LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
                                        const LSRUse &OrigLU) {
-  // Search all uses for the formula. This could be more clever. Ignore
-  // ICmpZero uses because they may contain formulae generated by
-  // GenerateICmpZeroScales, in which case adding fixup offsets may
-  // be invalid.
+  // Search all uses for the formula. This could be more clever.
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
+    // Check whether this use is close enough to OrigLU, to see whether it's
+    // worthwhile looking through its formulae.
+    // Ignore ICmpZero uses because they may contain formulae generated by
+    // GenerateICmpZeroScales, in which case adding fixup offsets may
+    // be invalid.
     if (&LU != &OrigLU &&
         LU.Kind != LSRUse::ICmpZero &&
         LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+        LU.WidestFixupType == OrigLU.WidestFixupType &&
         LU.HasFormulaWithSameRegs(OrigF)) {
+      // Scan through this use's formulae.
       for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
            E = LU.Formulae.end(); I != E; ++I) {
         const Formula &F = *I;
+        // Check to see if this formula has the same registers and symbols
+        // as OrigF.
         if (F.BaseRegs == OrigF.BaseRegs &&
             F.ScaledReg == OrigF.ScaledReg &&
             F.AM.BaseGV == OrigF.AM.BaseGV &&
-            F.AM.Scale == OrigF.AM.Scale &&
-            LU.Kind) {
+            F.AM.Scale == OrigF.AM.Scale) {
           if (F.AM.BaseOffs == 0)
             return &LU;
+          // This is the formula where all the registers and symbols matched;
+          // there aren't going to be any others. Since we declined it, we
+          // can skip the rest of the formulae and procede to the next LSRUse.
           break;
         }
       }
     }
   }
 
+  // Nothing looked good.
   return 0;
 }
 
@@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
   for (SmallSetVector<const SCEV *, 4>::const_iterator
        I = Strides.begin(), E = Strides.end(); I != E; ++I)
     for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
-         next(I); NewStrideIter != E; ++NewStrideIter) {
+         llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
       const SCEV *OldStride = *I;
       const SCEV *NewStride = *NewStrideIter;
 
@@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LF.Offset = P.second;
     LSRUse &LU = Uses[LF.LUIdx];
     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+    if (!LU.WidestFixupType ||
+        SE.getTypeSizeInBits(LU.WidestFixupType) <
+        SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+      LU.WidestFixupType = LF.OperandValToReplace->getType();
 
     // If this is the first use of this LSRUse, give it a formula.
     if (LU.Formulae.empty()) {
@@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
         LF.Offset = P.second;
         LSRUse &LU = Uses[LF.LUIdx];
         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+        if (!LU.WidestFixupType ||
+            SE.getTypeSizeInBits(LU.WidestFixupType) <
+            SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+          LU.WidestFixupType = LF.OperandValToReplace->getType();
         InsertSupplementalFormula(U, LU, LF.LUIdx);
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
         break;
@@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
 /// separate registers. If C is non-null, multiply each subexpression by C.
 static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
                             SmallVectorImpl<const SCEV *> &Ops,
-                            SmallVectorImpl<const SCEV *> &UninterestingOps,
                             const Loop *L,
                             ScalarEvolution &SE) {
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     // Break out add operands.
     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
          I != E; ++I)
-      CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
+      CollectSubexprs(*I, C, Ops, L, SE);
     return;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // Split a non-zero base out of an addrec.
@@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
       CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
                                        AR->getLoop()),
-                      C, Ops, UninterestingOps, L, SE);
-      CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
+                      C, Ops, L, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, L, SE);
       return;
     }
   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
             dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
         CollectSubexprs(Mul->getOperand(1),
                         C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
-                        Ops, UninterestingOps, L, SE);
+                        Ops, L, SE);
         return;
       }
   }
 
-  // Otherwise use the value itself. Loop-variant "unknown" values are
-  // uninteresting; we won't be able to do anything meaningful with them.
-  if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
-    UninterestingOps.push_back(S);
-  else
-    Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+  // Otherwise use the value itself, optionally with a scale applied.
+  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
 }
 
 /// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
     const SCEV *BaseReg = Base.BaseRegs[i];
 
-    SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
-    CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
-
-    // Add any uninteresting values as one register, as we won't be able to
-    // form any interesting reassociation opportunities with them. They'll
-    // just have to be added inside the loop no matter what we do.
-    if (!UninterestingAddOps.empty())
-      AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+    SmallVector<const SCEV *, 8> AddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, L, SE);
 
     if (AddOps.size() == 1) continue;
 
     for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
          JE = AddOps.end(); J != JE; ++J) {
+
+      // Loop-variant "unknown" values are uninteresting; we won't be able to
+      // do anything meaningful with them.
+      if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+        continue;
+
       // Don't pull a constant into a register if the constant could be folded
       // into an immediate field.
       if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
@@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Collect all operands except *J.
       SmallVector<const SCEV *, 8> InnerAddOps
-        (         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+        (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
       InnerAddOps.append
-        (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+        (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
 
       // Don't leave just a constant behind in a register if the constant could
       // be folded into an immediate field.
@@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
       if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
                      LU.Kind, LU.AccessTy, TLI)) {
         // Add the offset to the base register.
-        const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+        const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
         // If it cancelled out, drop the base register, otherwise update it.
         if (NewG->isZero()) {
           std::swap(F.BaseRegs[i], F.BaseRegs.back());
@@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() {
   }
 
   GenerateCrossUseConstantOffsets();
+
+  DEBUG(dbgs() << "\n"
+                  "After generating reuse formulae:\n";
+        print_uses(dbgs()));
 }
 
 /// If their are multiple formulae with the same set of registers used
@@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
   return Power;
 }
 
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
-void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     DEBUG(dbgs() << "The search space is too complex.\n");
 
@@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
     DEBUG(dbgs() << "After pre-selection:\n";
           print_uses(dbgs()));
   }
+}
 
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     DEBUG(dbgs() << "The search space is too complex.\n");
 
@@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
                 if (Fixup.LUIdx == LUIdx) {
                   Fixup.LUIdx = LUThatHas - &Uses.front();
                   Fixup.Offset += F.AM.BaseOffs;
-                  DEBUG(errs() << "New fixup has offset "
+                  DEBUG(dbgs() << "New fixup has offset "
                                << Fixup.Offset << '\n');
                 }
                 if (Fixup.LUIdx == NumUses-1)
@@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
     DEBUG(dbgs() << "After pre-selection:\n";
           print_uses(dbgs()));
   }
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call 
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+                    "undesirable dedicated registers.\n");
+
+    FilterOutUndesirableDedicatedRegisters();
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
 
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
   // With all other options exhausted, loop until the system is simple
   // enough to handle.
   SmallPtrSet<const SCEV *, 4> Taken;
@@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
   }
 }
 
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+  NarrowSearchSpaceByDetectingSupersets();
+  NarrowSearchSpaceByCollapsingUnrolledCode();
+  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  NarrowSearchSpaceByPickingWinnerRegs();
+}
+
 /// SolveRecurse - This is the recursive solver.
 void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
                                Cost &SolutionCost,
@@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   // to formulate the values needed for the uses.
   GenerateAllReuseFormulae();
 
-  DEBUG(dbgs() << "\n"
-                  "After generating reuse formulae:\n";
-        print_uses(dbgs()));
-
   FilterOutUndesirableDedicatedRegisters();
   NarrowSearchSpaceUsingHeuristics();
 
@@ -3724,15 +3792,15 @@ private:
 }
 
 char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false);
 
 Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
   return new LoopStrengthReduce(TLI);
 }
 
 LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(&ID), TLI(tli) {}
+  : LoopPass(ID), TLI(tli) {}
 
 void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4ad41ae4b59f..d0edfa220051 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -26,7 +27,7 @@
 using namespace llvm;
 
 static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
   cl::desc("The cut-off point for automatic loop unrolling"));
 
 static cl::opt<unsigned>
@@ -42,7 +43,7 @@ namespace {
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll() : LoopPass(&ID) {}
+    LoopUnroll() : LoopPass(ID) {}
 
     /// A magic value for use with the Threshold parameter to indicate
     /// that the loop unroll should be performed regardless of how much
@@ -55,23 +56,24 @@ namespace {
     /// loop preheaders be inserted into the CFG...
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
       AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
-      AU.addRequired<LoopInfo>();
       AU.addPreservedID(LCSSAID);
-      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ScalarEvolution>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
       // If loop unroll does not preserve dom info then LCSSA pass on next
       // loop will receive invalid dom info.
       // For now, recreate dom info, if loop is unrolled.
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
     }
   };
 }
 
 char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
 
 Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
 
@@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
 
   // FIXME: Reconstruct dom info, because it is not preserved properly.
-  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
-  if (DT) {
+  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
     DT->runOnFunction(*F);
-    DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
-    if (DF)
-      DF->runOnFunction(*F);
-  }
   return true;
 }
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 0c900ffc4027..9afe428ba569 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -77,7 +77,6 @@ namespace {
     bool redoLoop;
 
     Loop *currentLoop;
-    DominanceFrontier *DF;
     DominatorTree *DT;
     BasicBlock *loopHeader;
     BasicBlock *loopPreheader;
@@ -92,15 +91,15 @@ namespace {
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LoopUnswitch(bool Os = false) : 
-      LoopPass(&ID), OptimizeForSize(Os), redoLoop(false), 
-      currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
+      LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 
+      currentLoop(NULL), DT(NULL), loopHeader(NULL),
       loopPreheader(NULL) {}
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
     bool processCurrentLoop();
 
     /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG...
+    /// loop preheaders be inserted into the CFG.
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(LoopSimplifyID);
@@ -110,7 +109,6 @@ namespace {
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
     }
 
   private:
@@ -160,7 +158,7 @@ namespace {
   };
 }
 char LoopUnswitch::ID = 0;
-static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
+INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
 
 Pass *llvm::createLoopUnswitchPass(bool Os) { 
   return new LoopUnswitch(Os); 
@@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
 bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
   LI = &getAnalysis<LoopInfo>();
   LPM = &LPM_Ref;
-  DF = getAnalysisIfAvailable<DominanceFrontier>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   currentLoop = L;
   Function *F = currentLoop->getHeader()->getParent();
@@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
     // FIXME: Reconstruct dom info, because it is not preserved properly.
     if (DT)
       DT->runOnFunction(*F);
-    if (DF)
-      DF->runOnFunction(*F);
   }
   return Changed;
 }
@@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() {
   return Changed;
 }
 
-/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
-///   1. Exit the loop with no side effects.
-///   2. Branch to the latch block with no side-effects.
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
 ///
-/// If these conditions are true, we return true and set ExitBB to the block we
+/// If true, we return true and set ExitBB to the block we
 /// exit through.
 ///
 static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
                                          BasicBlock *&ExitBB,
                                          std::set<BasicBlock*> &Visited) {
   if (!Visited.insert(BB).second) {
-    // Already visited and Ok, end of recursion.
-    return true;
+    // Already visited. Without more analysis, this could indicate an infinte loop.
+    return false;
   } else if (!L->contains(BB)) {
     // Otherwise, this is a loop exit, this is fine so long as this is the
     // first exit.
@@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
 /// process.  If so, return the block that is exited to, otherwise return null.
 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
   std::set<BasicBlock*> Visited;
-  Visited.insert(L->getHeader());  // Branches to header are ok.
+  Visited.insert(L->getHeader());  // Branches to header make infinite loops.
   BasicBlock *ExitBB = 0;
   if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
     return ExitBB;
@@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
     if (!BI->isConditional() || BI->getCondition() != Cond)
       return false;
   
-    // Check to see if a successor of the branch is guaranteed to go to the
-    // latch block or exit through a one exit block without having any 
+    // Check to see if a successor of the branch is guaranteed to 
+    // exit through a unique exit block without having any 
     // side-effects.  If so, determine the value of Cond that causes it to do
     // this.
     if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 000000000000..973ffe7e6a40
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,161 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+bool LowerAtomicIntrinsic(CallInst *CI) {
+  IRBuilder<> Builder(CI->getParent(), CI);
+
+  Function *Callee = CI->getCalledFunction();
+  if (!Callee)
+    return false;
+
+  unsigned IID = Callee->getIntrinsicID();
+  switch (IID) {
+  case Intrinsic::memory_barrier:
+    break;
+
+  case Intrinsic::atomic_load_add:
+  case Intrinsic::atomic_load_sub:
+  case Intrinsic::atomic_load_and:
+  case Intrinsic::atomic_load_nand:
+  case Intrinsic::atomic_load_or:
+  case Intrinsic::atomic_load_xor:
+  case Intrinsic::atomic_load_max:
+  case Intrinsic::atomic_load_min:
+  case Intrinsic::atomic_load_umax:
+  case Intrinsic::atomic_load_umin: {
+    Value *Ptr = CI->getArgOperand(0);
+    Value *Delta = CI->getArgOperand(1);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Value *Res = NULL;
+    switch (IID) {
+      default: assert(0 && "Unrecognized atomic modify operation");
+      case Intrinsic::atomic_load_add:
+        Res = Builder.CreateAdd(Orig, Delta);
+        break;
+      case Intrinsic::atomic_load_sub:
+        Res = Builder.CreateSub(Orig, Delta);
+        break;
+      case Intrinsic::atomic_load_and:
+        Res = Builder.CreateAnd(Orig, Delta);
+        break;
+      case Intrinsic::atomic_load_nand:
+        Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+        break;
+      case Intrinsic::atomic_load_or:
+        Res = Builder.CreateOr(Orig, Delta);
+        break;
+      case Intrinsic::atomic_load_xor:
+        Res = Builder.CreateXor(Orig, Delta);
+        break;
+      case Intrinsic::atomic_load_max:
+        Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                   Delta,
+                                   Orig);
+        break;
+      case Intrinsic::atomic_load_min:
+        Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                   Orig,
+                                   Delta);
+        break;
+      case Intrinsic::atomic_load_umax:
+        Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                   Delta,
+                                   Orig);
+        break;
+      case Intrinsic::atomic_load_umin:
+        Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                   Orig,
+                                   Delta);
+        break;
+    }
+    Builder.CreateStore(Res, Ptr);
+
+    CI->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  case Intrinsic::atomic_swap: {
+    Value *Ptr = CI->getArgOperand(0);
+    Value *Val = CI->getArgOperand(1);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Builder.CreateStore(Val, Ptr);
+
+    CI->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  case Intrinsic::atomic_cmp_swap: {
+    Value *Ptr = CI->getArgOperand(0);
+    Value *Cmp = CI->getArgOperand(1);
+    Value *Val = CI->getArgOperand(2);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+    Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+    Builder.CreateStore(Res, Ptr);
+
+    CI->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  default:
+    return false;
+  }
+
+  assert(CI->use_empty() &&
+         "Lowering should have eliminated any uses of the intrinsic call!");
+  CI->eraseFromParent();
+
+  return true;
+}
+
+struct LowerAtomic : public BasicBlockPass {
+  static char ID;
+  LowerAtomic() : BasicBlockPass(ID) {}
+  bool runOnBasicBlock(BasicBlock &BB) {
+    bool Changed = false;
+    for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+      Instruction *Inst = DI++;
+      if (CallInst *CI = dyn_cast<CallInst>(Inst))
+        Changed |= LowerAtomicIntrinsic(CI);
+    }
+    return Changed;
+  }
+
+};
+
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+                "Lower atomic intrinsics to non-atomic form",
+                false, false);
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0e566c5bd9be..24fae423d2f7 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -304,7 +304,7 @@ namespace {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    MemCpyOpt() : FunctionPass(&ID) {}
+    MemCpyOpt() : FunctionPass(ID) {}
 
   private:
     // This transformation requires dominator postdominator info
@@ -331,8 +331,7 @@ namespace {
 // createMemCpyOptPass - The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
 
-static RegisterPass<MemCpyOpt> X("memcpyopt",
-                                 "MemCpy Optimization");
+INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
 
 
 
@@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       // If the call is readnone, ignore it, otherwise bail out.  We don't even
       // allow readonly here because we don't want something like:
       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
-      if (AA.getModRefBehavior(CallSite::get(BI)) ==
+      if (AA.getModRefBehavior(CallSite(BI)) ==
             AliasAnalysis::DoesNotAccessMemory)
         continue;
       
@@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   // because we'll need to do type comparisons based on the underlying type.
   Value *cpyDest = cpy->getDest();
   Value *cpySrc = cpy->getSource();
-  CallSite CS = CallSite::get(C);
+  CallSite CS(C);
 
   // We need to be able to reason about the size of the memcpy, so we require
   // that it be a constant.
@@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   return true;
 }
 
-/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
-/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
-/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
-///  This allows later passes to remove the first memcpy altogether.
+/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
 bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
   MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
 
@@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   const Type *ArgTys[3] = { M->getRawDest()->getType(),
                             M->getRawSource()->getType(),
                             M->getLength()->getType() };
-  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
+  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+                                                 ArgTys, 3));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 98452f5d82c4..b8afcc12d927 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,7 @@ namespace {
     bool MadeChange;
   public:
     static char ID; // Pass identification, replacement for typeid
-    Reassociate() : FunctionPass(&ID) {}
+    Reassociate() : FunctionPass(ID) {}
 
     bool runOnFunction(Function &F);
 
@@ -103,7 +103,8 @@ namespace {
 }
 
 char Reassociate::ID = 0;
-static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
+INITIALIZE_PASS(Reassociate, "reassociate",
+                "Reassociate expressions", false, false);
 
 // Public interface to the Reassociate pass
 FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 13222ac22004..506b72ac34e0 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
 namespace {
   struct RegToMem : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    RegToMem() : FunctionPass(&ID) {}
+    RegToMem() : FunctionPass(ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,8 +59,8 @@ namespace {
 }
   
 char RegToMem::ID = 0;
-static RegisterPass<RegToMem>
-X("reg2mem", "Demote all values to stack slots");
+INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false);
 
 
 bool RegToMem::runOnFunction(Function &F) {
@@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) {
 
 // createDemoteRegisterToMemory - Provide an entry point to create this pass.
 //
-const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
 FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
   return new RegToMem();
 }
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 907ece8fcce9..6115c05c20ac 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -275,12 +275,12 @@ public:
     return I->second;
   }
   
-  LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+  /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
     DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = 
       StructValueState.find(std::make_pair(V, i));
     assert(I != StructValueState.end() && "V is not in valuemap!");
     return I->second;
-  }
+  }*/
 
   /// getTrackedRetVals - Get the inferred return value map.
   ///
@@ -508,17 +508,16 @@ private:
   void visitLoadInst      (LoadInst &I);
   void visitGetElementPtrInst(GetElementPtrInst &I);
   void visitCallInst      (CallInst &I) {
-    visitCallSite(CallSite::get(&I));
+    visitCallSite(&I);
   }
   void visitInvokeInst    (InvokeInst &II) {
-    visitCallSite(CallSite::get(&II));
+    visitCallSite(&II);
     visitTerminatorInst(II);
   }
   void visitCallSite      (CallSite CS);
   void visitUnwindInst    (TerminatorInst &I) { /*returns void*/ }
   void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
   void visitAllocaInst    (Instruction &I) { markOverdefined(&I); }
-  void visitVANextInst    (Instruction &I) { markOverdefined(&I); }
   void visitVAArgInst     (Instruction &I) { markAnythingOverdefined(&I); }
 
   void visitInstruction(Instruction &I) {
@@ -1586,7 +1585,7 @@ namespace {
   ///
   struct SCCP : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    SCCP() : FunctionPass(&ID) {}
+    SCCP() : FunctionPass(ID) {}
 
     // runOnFunction - Run the Sparse Conditional Constant Propagation
     // algorithm, and return true if the function was modified.
@@ -1600,8 +1599,8 @@ namespace {
 } // end anonymous namespace
 
 char SCCP::ID = 0;
-static RegisterPass<SCCP>
-X("sccp", "Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(SCCP, "sccp",
+                "Sparse Conditional Constant Propagation", false, false);
 
 // createSCCPPass - This is the public interface to this file.
 FunctionPass *llvm::createSCCPPass() {
@@ -1702,14 +1701,15 @@ namespace {
   ///
   struct IPSCCP : public ModulePass {
     static char ID;
-    IPSCCP() : ModulePass(&ID) {}
+    IPSCCP() : ModulePass(ID) {}
     bool runOnModule(Module &M);
   };
 } // end anonymous namespace
 
 char IPSCCP::ID = 0;
-static RegisterPass<IPSCCP>
-Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+                "Interprocedural Sparse Conditional Constant Propagation",
+                false, false);
 
 // createIPSCCPPass - This is the public interface to this file.
 ModulePass *llvm::createIPSCCPPass() {
@@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) {
 bool IPSCCP::runOnModule(Module &M) {
   SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
 
+  // AddressTakenFunctions - This set keeps track of the address-taken functions
+  // that are in the input.  As IPSCCP runs through and simplifies code,
+  // functions that were address taken can end up losing their
+  // address-taken-ness.  Because of this, we keep track of their addresses from
+  // the first pass so we can use them for the later simplification pass.
+  SmallPtrSet<Function*, 32> AddressTakenFunctions;
+  
   // Loop over all functions, marking arguments to those with their addresses
   // taken or that are external as overdefined.
   //
@@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) {
     // If this function only has direct calls that we can see, we can track its
     // arguments and return value aggressively, and can assume it is not called
     // unless we see evidence to the contrary.
-    if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
-      Solver.AddArgumentTrackedFunction(F);
-      continue;
+    if (F->hasLocalLinkage()) {
+      if (AddressIsTaken(F))
+        AddressTakenFunctions.insert(F);
+      else {
+        Solver.AddArgumentTrackedFunction(F);
+        continue;
+      }
     }
 
     // Assume the function is called.
@@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) {
       continue;
   
     // We can only do this if we know that nothing else can call the function.
-    if (!F->hasLocalLinkage() || AddressIsTaken(F))
+    if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
       continue;
     
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index dd445f63320a..fee317dbd9ab 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Target/TargetData.h"
@@ -51,7 +52,7 @@ STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
 namespace {
   struct SROA : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    explicit SROA(signed T = -1) : FunctionPass(&ID) {
+    explicit SROA(signed T = -1) : FunctionPass(ID) {
       if (T == -1)
         SRThreshold = 128;
       else
@@ -114,8 +115,7 @@ namespace {
     void DoScalarReplacement(AllocaInst *AI, 
                              std::vector<AllocaInst*> &WorkList);
     void DeleteDeadInstructions();
-    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
-    
+   
     void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
                               SmallVector<AllocaInst*, 32> &NewElts);
     void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -135,7 +135,8 @@ namespace {
 }
 
 char SROA::ID = 0;
-static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
+INITIALIZE_PASS(SROA, "scalarrepl",
+                "Scalar Replacement of Aggregates", false, false);
 
 // Public interface to the ScalarReplAggregates pass
 FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 
@@ -193,6 +194,27 @@ private:
 };
 } // end anonymous namespace.
 
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form.  We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+  StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+  if (!Triple.startswith("i386") &&
+      !Triple.startswith("x86_64"))
+    return false;
+  
+  // Reject all the MMX vector types.
+  switch (VTy->getNumElements()) {
+  default: return false;
+  case 1: return VTy->getElementType()->isIntegerTy(64);
+  case 2: return VTy->getElementType()->isIntegerTy(32);
+  case 4: return VTy->getElementType()->isIntegerTy(16);
+  case 8: return VTy->getElementType()->isIntegerTy(8);
+  }
+}
+
+
 /// TryConvert - Analyze the specified alloca, and if it is safe to do so,
 /// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
 /// alloca if possible or null if not.
@@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   // we just get a lot of insert/extracts.  If at least one vector is
   // involved, then we probably really do have a union of vector/array.
   const Type *NewTy;
-  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+  if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+      !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
     DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
@@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
       if (Length)
         isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
-                        UI.getOperandNo() == CallInst::ArgOffset, Info);
+                        UI.getOperandNo() == 0, Info);
       else
         MarkUnsafe(Info);
     } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
 /// HasPadding - Return true if the specified type has any structure or
 /// alignment padding, false otherwise.
 static bool HasPadding(const Type *Ty, const TargetData &TD) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+    return HasPadding(ATy->getElementType(), TD);
+  
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return HasPadding(VTy->getElementType(), TD);
+  
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
     const StructLayout *SL = TD.getStructLayout(STy);
     unsigned PrevFieldBitOffset = 0;
@@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
       if (PrevFieldEnd < SL->getSizeInBits())
         return true;
     }
-
-  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    return HasPadding(ATy->getElementType(), TD);
-  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
-    return HasPadding(VTy->getElementType(), TD);
   }
+  
   return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
 }
 
@@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
     if (isOffset) return false;
 
     // If the memintrinsic isn't using the alloca as the dest, reject it.
-    if (UI.getOperandNo() != CallInst::ArgOffset) return false;
+    if (UI.getOperandNo() != 0) return false;
     
     // If the source of the memcpy/move is not a constant global, reject it.
     if (!PointsToConstantGlobal(MI->getSource()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 49d93a2fcc27..360749caf111 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
 namespace {
   struct CFGSimplifyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGSimplifyPass() : FunctionPass(&ID) {}
+    CFGSimplifyPass() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F);
   };
 }
 
 char CFGSimplifyPass::ID = 0;
-static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+                "Simplify the CFG", false, false);
 
 // Public interface to the CFGSimplification pass
 FunctionPass *llvm::createCFGSimplificationPass() {
@@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
   while (LocalChange) {
     LocalChange = false;
     
-    // Loop over all of the basic blocks (except the first one) and remove them
-    // if they are unneeded...
+    // Loop over all of the basic blocks and remove them if they are unneeded...
     //
-    for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
       if (SimplifyCFG(BBIt++, TD)) {
         LocalChange = true;
         ++NumSimpl;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index c3408e77807f..3ec70ec2e024 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,7 @@ namespace {
     const TargetData *TD;
   public:
     static char ID; // Pass identification
-    SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+    SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
 
     bool runOnFunction(Function &F);
 
@@ -46,8 +46,8 @@ namespace {
   char SimplifyHalfPowrLibCalls::ID = 0;
 } // end anonymous namespace.
 
-static RegisterPass<SimplifyHalfPowrLibCalls>
-X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
+INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
+                "Simplify half_powr library calls", false, false);
 
 // Public interface to the Simplify HalfPowr LibCalls pass.
 FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b1c619125c35..d7ce53f36715 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization {
                                    StrLen, B, TD);
       for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
            UI != UE; ) {
-        ICmpInst *Old = cast<ICmpInst>(UI++);
+        ICmpInst *Old = cast<ICmpInst>(*UI++);
         Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
                                   ConstantInt::getNullValue(StrNCmp->getType()),
                                   "cmp");
@@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization {
 
     // fold strstr(x, "y") -> strchr(x, 'y').
     if (HasStr2 && ToFindStr.size() == 1)
-      return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
-                             CI->getType());
+      return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+                             ToFindStr[0], B, TD), CI->getType());
     return 0;
   }
 };
@@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization {
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
-    Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
-                                 false);
+    Value *Val = B.CreateIntCast(CI->getArgOperand(1),
+                                 Type::getInt8Ty(*Context), false);
     EmitMemSet(CI->getArgOperand(0), Val,  CI->getArgOperand(2), false, B, TD);
     return CI->getArgOperand(0);
   }
@@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization {
       if (!TD) return 0;
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
-      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
-                 ConstantInt::get(TD->getIntPtrType(*Context),
-                 FormatStr.size()+1), 1, false, B, TD);
+      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),   // Copy the
+                 ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
+                 FormatStr.size() + 1), 1, false, B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization {
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
-      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
+      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+                 IncLen, 1, false, B, TD);
 
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
@@ -1236,7 +1237,7 @@ namespace {
     bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
-    SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
+    SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
@@ -1253,8 +1254,8 @@ namespace {
   char SimplifyLibCalls::ID = 0;
 } // end anonymous namespace.
 
-static RegisterPass<SimplifyLibCalls>
-X("simplify-libcalls", "Simplify well-known library calls");
+INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
+                "Simplify well-known library calls", false, false);
 
 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 //   * pow(pow(x,y),z)-> pow(x,y*z)
 //
 // puts:
-//   * puts("") -> putchar("\n")
+//   * puts("") -> putchar('\n')
 //
 // round, roundf, roundl:
 //   * round(cnst) -> cnst'
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index b88ba4850509..95d3dedfb62d 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,7 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    Sinking() : FunctionPass(&ID) {}
+    Sinking() : FunctionPass(ID) {}
     
     virtual bool runOnFunction(Function &F);
     
@@ -56,8 +56,7 @@ namespace {
 } // end anonymous namespace
   
 char Sinking::ID = 0;
-static RegisterPass<Sinking>
-X("sink", "Code sinking");
+INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
 
 FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
 
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 9208238f4ba5..2e437ac778c8 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -49,7 +49,7 @@ namespace {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    TailDup() : FunctionPass(&ID) {}
+    TailDup() : FunctionPass(ID) {}
 
   private:
     inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +59,7 @@ namespace {
 }
 
 char TailDup::ID = 0;
-static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication");
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
 
 // Public interface to the Tail Duplication pass
 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 01c8e5d6fcf4..371725467a24 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 namespace {
   struct TailCallElim : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    TailCallElim() : FunctionPass(&ID) {}
+    TailCallElim() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F);
 
@@ -87,7 +87,8 @@ namespace {
 }
 
 char TailCallElim::ID = 0;
-static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination");
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+                "Tail Call Elimination", false, false);
 
 // Public interface to the TailCallElimination pass
 FunctionPass *llvm::createTailCallEliminationPass() {
@@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
   Function *F = CI->getParent()->getParent();
   Value *ReturnedValue = 0;
 
-  for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
-      if (RI != IgnoreRI) {
-        Value *RetOp = RI->getOperand(0);
-
-        // We can only perform this transformation if the value returned is
-        // evaluatable at the start of the initial invocation of the function,
-        // instead of at the end of the evaluation.
-        //
-        if (!isDynamicConstant(RetOp, CI, RI))
-          return 0;
-
-        if (ReturnedValue && RetOp != ReturnedValue)
-          return 0;     // Cannot transform if differing values are returned.
-        ReturnedValue = RetOp;
-      }
+  for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+    ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+    if (RI == 0 || RI == IgnoreRI) continue;
+
+    // We can only perform this transformation if the value returned is
+    // evaluatable at the start of the initial invocation of the function,
+    // instead of at the end of the evaluation.
+    //
+    Value *RetOp = RI->getOperand(0);
+    if (!isDynamicConstant(RetOp, CI, RI))
+      return 0;
+
+    if (ReturnedValue && RetOp != ReturnedValue)
+      return 0;     // Cannot transform if differing values are returned.
+    ReturnedValue = RetOp;
+  }
   return ReturnedValue;
 }
 
@@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
   assert(I->getNumOperands() == 2 &&
          "Associative/commutative operations should have 2 args!");
 
-  // Exactly one operand should be the result of the call instruction...
+  // Exactly one operand should be the result of the call instruction.
   if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
       (I->getOperand(0) != CI && I->getOperand(1) != CI))
     return 0;
@@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   // tail call if all of the instructions between the call and the return are
   // movable to above the call itself, leaving the call next to the return.
   // Check that this is the case now.
-  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
-    if (!CanMoveAboveCall(BBI, CI)) {
-      // If we can't move the instruction above the call, it might be because it
-      // is an associative and commutative operation that could be tranformed
-      // using accumulator recursion elimination.  Check to see if this is the
-      // case, and if so, remember the initial accumulator value for later.
-      if ((AccumulatorRecursionEliminationInitVal =
-                             CanTransformAccumulatorRecursion(BBI, CI))) {
-        // Yes, this is accumulator recursion.  Remember which instruction
-        // accumulates.
-        AccumulatorRecursionInstr = BBI;
-      } else {
-        return false;   // Otherwise, we cannot eliminate the tail recursion!
-      }
+  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+    if (CanMoveAboveCall(BBI, CI)) continue;
+    
+    // If we can't move the instruction above the call, it might be because it
+    // is an associative and commutative operation that could be tranformed
+    // using accumulator recursion elimination.  Check to see if this is the
+    // case, and if so, remember the initial accumulator value for later.
+    if ((AccumulatorRecursionEliminationInitVal =
+                           CanTransformAccumulatorRecursion(BBI, CI))) {
+      // Yes, this is accumulator recursion.  Remember which instruction
+      // accumulates.
+      AccumulatorRecursionInstr = BBI;
+    } else {
+      return false;   // Otherwise, we cannot eliminate the tail recursion!
     }
+  }
 
   // We can only transform call/return pairs that either ignore the return value
   // of the call and return void, ignore the value of the call and return a