33 files changed, 1729 insertions, 1196 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
deleted file mode 100644
index 6815e411b421..000000000000
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ /dev/null
@@ -1,577 +0,0 @@
-//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements target addressing mode matcher class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CallSite.h"
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-void ExtAddrMode::print(raw_ostream &OS) const {
-  bool NeedPlus = false;
-  OS << "[";
-  if (BaseGV) {
-    OS << (NeedPlus ? " + " : "")
-       << "GV:";
-    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
-    NeedPlus = true;
-  }
-
-  if (BaseOffs)
-    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
-
-  if (BaseReg) {
-    OS << (NeedPlus ? " + " : "")
-       << "Base:";
-    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
-    NeedPlus = true;
-  }
-  if (Scale) {
-    OS << (NeedPlus ? " + " : "")
-       << Scale << "*";
-    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
-    NeedPlus = true;
-  }
-
-  OS << ']';
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ExtAddrMode::dump() const {
-  print(dbgs());
-  dbgs() << '\n';
-}
-#endif
-
-
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
-/// Return true and update AddrMode if this addr mode is legal for the target,
-/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
-                                             unsigned Depth) {
-  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
-  // mode.  Just process that directly.
-  if (Scale == 1)
-    return MatchAddr(ScaleReg, Depth);
-  
-  // If the scale is 0, it takes nothing to add this.
-  if (Scale == 0)
-    return true;
-  
-  // If we already have a scale of this value, we can add to it, otherwise, we
-  // need an available scale field.
-  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
-    return false;
-
-  ExtAddrMode TestAddrMode = AddrMode;
-
-  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
-  // [A+B + A*7] -> [B+A*8].
-  TestAddrMode.Scale += Scale;
-  TestAddrMode.ScaledReg = ScaleReg;
-
-  // If the new address isn't legal, bail out.
-  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
-    return false;
-
-  // It was legal, so commit it.
-  AddrMode = TestAddrMode;
-  
-  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
-  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
-  // X*Scale + C*Scale to addr mode.
-  ConstantInt *CI = 0; Value *AddLHS = 0;
-  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
-      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
-    TestAddrMode.ScaledReg = AddLHS;
-    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-      
-    // If this addressing mode is legal, commit it and remember that we folded
-    // this instruction.
-    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
-      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
-      AddrMode = TestAddrMode;
-      return true;
-    }
-  }
-
-  // Otherwise, not (x+c)*scale, just return what we have.
-  return true;
-}
-
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it.  This doesn't need to be perfect, but needs to accept at least
-/// the set of instructions that MatchOperationAddr can.
-static bool MightBeFoldableInst(Instruction *I) {
-  switch (I->getOpcode()) {
-  case Instruction::BitCast:
-    // Don't touch identity bitcasts.
-    if (I->getType() == I->getOperand(0)->getType())
-      return false;
-    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
-  case Instruction::PtrToInt:
-    // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return true;
-  case Instruction::IntToPtr:
-    // We know the input is intptr_t, so this is foldable.
-    return true;
-  case Instruction::Add:
-    return true;
-  case Instruction::Mul:
-  case Instruction::Shl:
-    // Can only handle X*C and X << C.
-    return isa<ConstantInt>(I->getOperand(1));
-  case Instruction::GetElementPtr:
-    return true;
-  default:
-    return false;
-  }
-}
-
-
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode.  If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
-                                               unsigned Depth) {
-  // Avoid exponential behavior on extremely deep expression trees.
-  if (Depth >= 5) return false;
-  
-  switch (Opcode) {
-  case Instruction::PtrToInt:
-    // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return MatchAddr(AddrInst->getOperand(0), Depth);
-  case Instruction::IntToPtr:
-    // This inttoptr is a no-op if the integer type is pointer sized.
-    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
-        TLI.getPointerTy())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
-    return false;
-  case Instruction::BitCast:
-    // BitCast is always a noop, and we can handle it as long as it is
-    // int->int or pointer->pointer (we don't want int<->fp or something).
-    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
-         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
-        // Don't touch identity bitcasts.  These were probably put here by LSR,
-        // and we don't want to mess around with them.  Assume it knows what it
-        // is doing.
-        AddrInst->getOperand(0)->getType() != AddrInst->getType())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
-    return false;
-  case Instruction::Add: {
-    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(0), Depth+1))
-      return true;
-    
-    // Restore the old addr mode info.
-    AddrMode = BackupAddrMode;
-    AddrModeInsts.resize(OldSize);
-    
-    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
-    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(1), Depth+1))
-      return true;
-    
-    // Otherwise we definitely can't merge the ADD in.
-    AddrMode = BackupAddrMode;
-    AddrModeInsts.resize(OldSize);
-    break;
-  }
-  //case Instruction::Or:
-  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
-  //break;
-  case Instruction::Mul:
-  case Instruction::Shl: {
-    // Can only handle X*C and X << C.
-    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
-    if (!RHS) return false;
-    int64_t Scale = RHS->getSExtValue();
-    if (Opcode == Instruction::Shl)
-      Scale = 1LL << Scale;
-    
-    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
-  }
-  case Instruction::GetElementPtr: {
-    // Scan the GEP.  We check it if it contains constant offsets and at most
-    // one variable offset.
-    int VariableOperand = -1;
-    unsigned VariableScale = 0;
-    
-    int64_t ConstantOffset = 0;
-    const DataLayout *TD = TLI.getDataLayout();
-    gep_type_iterator GTI = gep_type_begin(AddrInst);
-    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-        const StructLayout *SL = TD->getStructLayout(STy);
-        unsigned Idx =
-          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
-        ConstantOffset += SL->getElementOffset(Idx);
-      } else {
-        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
-          ConstantOffset += CI->getSExtValue()*TypeSize;
-        } else if (TypeSize) {  // Scales of zero don't do anything.
-          // We only allow one variable index at the moment.
-          if (VariableOperand != -1)
-            return false;
-          
-          // Remember the variable index.
-          VariableOperand = i;
-          VariableScale = TypeSize;
-        }
-      }
-    }
-    
-    // A common case is for the GEP to only do a constant offset.  In this case,
-    // just add it to the disp field and check validity.
-    if (VariableOperand == -1) {
-      AddrMode.BaseOffs += ConstantOffset;
-      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
-        // Check to see if we can fold the base pointer in too.
-        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
-          return true;
-      }
-      AddrMode.BaseOffs -= ConstantOffset;
-      return false;
-    }
-
-    // Save the valid addressing mode in case we can't match.
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-
-    // See if the scale and offset amount is valid for this target.
-    AddrMode.BaseOffs += ConstantOffset;
-
-    // Match the base operand of the GEP.
-    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
-      // If it couldn't be matched, just stuff the value in a register.
-      if (AddrMode.HasBaseReg) {
-        AddrMode = BackupAddrMode;
-        AddrModeInsts.resize(OldSize);
-        return false;
-      }
-      AddrMode.HasBaseReg = true;
-      AddrMode.BaseReg = AddrInst->getOperand(0);
-    }
-
-    // Match the remaining variable portion of the GEP.
-    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
-                          Depth)) {
-      // If it couldn't be matched, try stuffing the base into a register
-      // instead of matching it, and retrying the match of the scale.
-      AddrMode = BackupAddrMode;
-      AddrModeInsts.resize(OldSize);
-      if (AddrMode.HasBaseReg)
-        return false;
-      AddrMode.HasBaseReg = true;
-      AddrMode.BaseReg = AddrInst->getOperand(0);
-      AddrMode.BaseOffs += ConstantOffset;
-      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
-                            VariableScale, Depth)) {
-        // If even that didn't work, bail.
-        AddrMode = BackupAddrMode;
-        AddrModeInsts.resize(OldSize);
-        return false;
-      }
-    }
-
-    return true;
-  }
-  }
-  return false;
-}
-
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode.  If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
-///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
-    // Fold in immediates if legal for the target.
-    AddrMode.BaseOffs += CI->getSExtValue();
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.BaseOffs -= CI->getSExtValue();
-  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
-    // If this is a global variable, try to fold it into the addressing mode.
-    if (AddrMode.BaseGV == 0) {
-      AddrMode.BaseGV = GV;
-      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-        return true;
-      AddrMode.BaseGV = 0;
-    }
-  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-
-    // Check to see if it is possible to fold this operation.
-    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
-      // Okay, it's possible to fold this.  Check to see if it is actually
-      // *profitable* to do so.  We use a simple cost model to avoid increasing
-      // register pressure too much.
-      if (I->hasOneUse() ||
-          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
-        AddrModeInsts.push_back(I);
-        return true;
-      }
-      
-      // It isn't profitable to do this, roll back.
-      //cerr << "NOT FOLDING: " << *I;
-      AddrMode = BackupAddrMode;
-      AddrModeInsts.resize(OldSize);
-    }
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
-    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
-      return true;
-  } else if (isa<ConstantPointerNull>(Addr)) {
-    // Null pointer gets folded without affecting the addressing mode.
-    return true;
-  }
-
-  // Worse case, the target should support [reg] addressing modes. :)
-  if (!AddrMode.HasBaseReg) {
-    AddrMode.HasBaseReg = true;
-    AddrMode.BaseReg = Addr;
-    // Still check for legality in case the target supports [imm] but not [i+r].
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.HasBaseReg = false;
-    AddrMode.BaseReg = 0;
-  }
-
-  // If the base register is already taken, see if we can do [r+r].
-  if (AddrMode.Scale == 0) {
-    AddrMode.Scale = 1;
-    AddrMode.ScaledReg = Addr;
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.Scale = 0;
-    AddrMode.ScaledReg = 0;
-  }
-  // Couldn't match.
-  return false;
-}
-
-
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands.  If so, return true, otherwise
-/// return false.
-static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
-                                    const TargetLowering &TLI) {
-  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
-  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-    
-    // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
-    // If this asm operand is our Value*, and if it isn't an indirect memory
-    // operand, we can't fold it!
-    if (OpInfo.CallOperandVal == OpVal &&
-        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
-         !OpInfo.isIndirect))
-      return false;
-  }
-
-  return true;
-}
-
-
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use.  If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
-                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
-                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
-                              const TargetLowering &TLI) {
-  // If we already considered this instruction, we're done.
-  if (!ConsideredInsts.insert(I))
-    return false;
-  
-  // If this is an obviously unfoldable instruction, bail out.
-  if (!MightBeFoldableInst(I))
-    return true;
-
-  // Loop over all the uses, recursively processing them.
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
-
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      unsigned opNo = UI.getOperandNo();
-      if (opNo == 0) return true; // Storing addr, not into addr.
-      MemoryUses.push_back(std::make_pair(SI, opNo));
-      continue;
-    }
-    
-    if (CallInst *CI = dyn_cast<CallInst>(U)) {
-      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
-      if (!IA) return true;
-      
-      // If this is a memory operand, we're cool, otherwise bail out.
-      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
-        return true;
-      continue;
-    }
-    
-    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
-                          TLI))
-      return true;
-  }
-
-  return false;
-}
-
-
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into.  If so, there is no cost to
-/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
-                                                   Value *KnownLive2) {
-  // If Val is either of the known-live values, we know it is live!
-  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
-    return true;
-  
-  // All values other than instructions and arguments (e.g. constants) are live.
-  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-  
-  // If Val is a constant sized alloca in the entry block, it is live, this is
-  // true because it is just a reference to the stack/frame pointer, which is
-  // live for the whole function.
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
-    if (AI->isStaticAlloca())
-      return true;
-  
-  // Check to see if this value is already used in the memory instruction's
-  // block.  If so, it's already live into the block at the very least, so we
-  // can reasonably fold it.
-  return Val->isUsedInBasicBlock(MemoryInst->getParent());
-}
-
-
-
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it.  However, the specified instruction has multiple
-/// uses.  Given this, it may actually increase register pressure to fold it
-/// into the load.  For example, consider this code:
-///
-///     X = ...
-///     Y = X+1
-///     use(Y)   -> nonload/store
-///     Z = Y+1
-///     load Z
-///
-/// In this case, Y has multiple uses, and can be folded into the load of Z
-/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
-/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
-/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
-/// number of computations either.
-///
-/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
-/// X was live across 'load Z' for other reasons, we actually *would* want to
-/// fold the addressing mode in the Z case.  This would make Y die earlier.
-bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
-                                     ExtAddrMode &AMAfter) {
-  if (IgnoreProfitability) return true;
-  
-  // AMBefore is the addressing mode before this instruction was folded into it,
-  // and AMAfter is the addressing mode after the instruction was folded.  Get
-  // the set of registers referenced by AMAfter and subtract out those
-  // referenced by AMBefore: this is the set of values which folding in this
-  // address extends the lifetime of.
-  //
-  // Note that there are only two potential values being referenced here,
-  // BaseReg and ScaleReg (global addresses are always available, as are any
-  // folded immediates).
-  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-  
-  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
-  // lifetime wasn't extended by adding this instruction.
-  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
-    BaseReg = 0;
-  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
-    ScaledReg = 0;
-
-  // If folding this instruction (and it's subexprs) didn't extend any live
-  // ranges, we're ok with it.
-  if (BaseReg == 0 && ScaledReg == 0)
-    return true;
-
-  // If all uses of this instruction are ultimately load/store/inlineasm's,
-  // check to see if their addressing modes will include this instruction.  If
-  // so, we can fold it into all uses, so it doesn't matter if it has multiple
-  // uses.
-  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
-  SmallPtrSet<Instruction*, 16> ConsideredInsts;
-  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
-    return false;  // Has a non-memory, non-foldable use!
-  
-  // Now that we know that all uses of this instruction are part of a chain of
-  // computation involving only operations that could theoretically be folded
-  // into a memory use, loop over each of these uses and see if they could
-  // *actually* fold the instruction.
-  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
-  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
-    Instruction *User = MemoryUses[i].first;
-    unsigned OpNo = MemoryUses[i].second;
-    
-    // Get the access type of this use.  If the use isn't a pointer, we don't
-    // know what it accesses.
-    Value *Address = User->getOperand(OpNo);
-    if (!Address->getType()->isPointerTy())
-      return false;
-    Type *AddressAccessTy =
-      cast<PointerType>(Address->getType())->getElementType();
-    
-    // Do a match against the root of this address, ignoring profitability. This
-    // will tell us if the addressing mode for the memory operation will
-    // *actually* cover the shared instruction.
-    ExtAddrMode Result;
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
-                                  MemoryInst, Result);
-    Matcher.IgnoreProfitability = true;
-    bool Success = Matcher.MatchAddr(Address, 0);
-    (void)Success; assert(Success && "Couldn't select *anything*?");
-
-    // If the match didn't cover I, then it won't be shared by it.
-    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
-                  I) == MatchedAddrModeInsts.end())
-      return false;
-    
-    MatchedAddrModeInsts.clear();
-  }
-  
-  return true;
-}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 9fea11391a1d..ba99d2e662e4 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -13,20 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Constant.h"
-#include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -37,12 +37,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
          // Can delete self loop.
          BB->getSinglePredecessor() == BB) && "Block is not dead!");
   TerminatorInst *BBTerm = BB->getTerminator();
-  
+
   // Loop through all of our successors and make sure they know that one
   // of their predecessors is going away.
   for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
     BBTerm->getSuccessor(i)->removePredecessor(BB);
-  
+
   // Zap all the instructions in the block.
   while (!BB->empty()) {
     Instruction &I = BB->back();
@@ -55,7 +55,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
       I.replaceAllUsesWith(UndefValue::get(I.getType()));
     BB->getInstList().pop_back();
   }
-  
+
   // Zap the block!
   BB->eraseFromParent();
 }
@@ -66,25 +66,25 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 /// when the block has exactly one predecessor.
 void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
   if (!isa<PHINode>(BB->begin())) return;
-  
+
   AliasAnalysis *AA = 0;
   MemoryDependenceAnalysis *MemDep = 0;
   if (P) {
     AA = P->getAnalysisIfAvailable<AliasAnalysis>();
     MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
   }
-  
+
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     if (PN->getIncomingValue(0) != PN)
       PN->replaceAllUsesWith(PN->getIncomingValue(0));
     else
       PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-    
+
     if (MemDep)
       MemDep->removeInstruction(PN);  // Memdep updates AA itself.
     else if (AA && isa<PointerType>(PN->getType()))
       AA->deleteValue(PN);
-    
+
     PN->eraseFromParent();
   }
 }
@@ -115,7 +115,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
 bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Don't merge away blocks who have their address taken.
   if (BB->hasAddressTaken()) return false;
-  
+
   // Can't merge if there are multiple predecessors, or no predecessors.
   BasicBlock *PredBB = BB->getUniquePredecessor();
   if (!PredBB) return false;
@@ -124,7 +124,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   if (PredBB == BB) return false;
   // Don't break invokes.
   if (isa<InvokeInst>(PredBB->getTerminator())) return false;
-  
+
   succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
   BasicBlock *OnlySucc = BB;
   for (; SI != SE; ++SI)
@@ -132,7 +132,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
       OnlySucc = 0;     // There are multiple distinct successors!
       break;
     }
-  
+
   // Can't merge if there are multiple successors.
   if (!OnlySucc) return false;
 
@@ -149,21 +149,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Begin by getting rid of unneeded PHIs.
   if (isa<PHINode>(BB->front()))
     FoldSingleEntryPHINodes(BB, P);
-  
+
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
-  
+
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(PredBB);
-  
+
   // Move all definitions in the successor to the predecessor...
   PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
-  
+
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
     PredBB->takeName(BB);
-  
+
   // Finally, erase the old block and update dominator info.
   if (P) {
     if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
@@ -176,16 +176,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
 
         DT->eraseNode(BB);
       }
-      
+
       if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
         LI->removeBlock(BB);
-      
+
       if (MemoryDependenceAnalysis *MD =
             P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
         MD->invalidateCachedPredecessors();
     }
   }
-  
+
   BB->eraseFromParent();
   return true;
 }
@@ -251,11 +251,11 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
   }
 }
 
-/// SplitEdge -  Split the edge connecting specified block. Pass P must 
-/// not be NULL. 
+/// SplitEdge -  Split the edge connecting specified block. Pass P must
+/// not be NULL.
 BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
   unsigned SuccNum = GetSuccessorNumber(BB, Succ);
-  
+
   // If this is a critical edge, let SplitCriticalEdge do it.
   TerminatorInst *LatchTerm = BB->getTerminator();
   if (SplitCriticalEdge(LatchTerm, SuccNum, P))
@@ -271,11 +271,11 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
     SP = NULL;
     return SplitBlock(Succ, Succ->begin(), P);
   }
-  
+
   // Otherwise, if BB has a single successor, split it at the bottom of the
   // block.
   assert(BB->getTerminator()->getNumSuccessors() == 1 &&
-         "Should have a single succ!"); 
+         "Should have a single succ!");
   return SplitBlock(BB, BB->getTerminator(), P);
 }
 
@@ -301,12 +301,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
     if (DomTreeNode *OldNode = DT->getNode(Old)) {
       std::vector<DomTreeNode *> Children;
       for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
-           I != E; ++I) 
+           I != E; ++I)
         Children.push_back(*I);
 
       DomTreeNode *NewNode = DT->addNewBlock(New,Old);
       for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
-             E = Children.end(); I != E; ++I) 
+             E = Children.end(); I != E; ++I)
         DT->changeImmediateDominator(*I, NewNode);
     }
   }
@@ -424,7 +424,7 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
       PHINode *NewPHI =
         PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
       if (AA) AA->copyValue(PN, NewPHI);
-      
+
       // Move all of the PHI values for 'Preds' to the new PHI.
       for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
         Value *V = PN->removeIncomingValue(Preds[i], false);
@@ -451,16 +451,16 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 /// preserve LoopSimplify (because it's complicated to handle the case where one
 /// of the edges being split is an exit of a loop with other exits).
 ///
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
                                          ArrayRef<BasicBlock*> Preds,
                                          const char *Suffix, Pass *P) {
   // Create new basic block, insert right before the original block.
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                          BB->getParent(), BB);
-  
+
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
-  
+
   // Move the edges from Preds to point to NewBB instead of BB.
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
     // This is slightly more strict than necessary; the minimum requirement
@@ -497,13 +497,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 /// block gets the remaining predecessors of OrigBB. The landingpad instruction
 /// OrigBB is clone into both of the new basic blocks. The new blocks are given
 /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
-/// 
+///
 /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
 /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
 /// it does not preserve LoopSimplify (because it's complicated to handle the
 /// case where one of the edges being split is an exit of a loop with other
 /// exits).
-/// 
+///
 void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                                        ArrayRef<BasicBlock*> Preds,
                                        const char *Suffix1, const char *Suffix2,
@@ -608,11 +608,11 @@ void llvm::FindFunctionBackedges(const Function &F,
   const BasicBlock *BB = &F.getEntryBlock();
   if (succ_begin(BB) == succ_end(BB))
     return;
-  
+
   SmallPtrSet<const BasicBlock*, 8> Visited;
   SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
   SmallPtrSet<const BasicBlock*, 8> InStack;
-  
+
   Visited.insert(BB);
   VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
   InStack.insert(BB);
@@ -620,7 +620,7 @@ void llvm::FindFunctionBackedges(const Function &F,
     std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
     const BasicBlock *ParentBB = Top.first;
     succ_const_iterator &I = Top.second;
-    
+
     bool FoundNew = false;
     while (I != succ_end(ParentBB)) {
       BB = *I++;
@@ -632,7 +632,7 @@ void llvm::FindFunctionBackedges(const Function &F,
       if (InStack.count(BB))
         Result.push_back(std::make_pair(ParentBB, BB));
     }
-    
+
     if (FoundNew) {
       // Go down one level if there is a unvisited successor.
       InStack.insert(BB);
@@ -641,7 +641,7 @@ void llvm::FindFunctionBackedges(const Function &F,
       // Go up one level.
       InStack.erase(VisitStack.pop_back_val().first);
     }
-  } while (!VisitStack.empty()); 
+  } while (!VisitStack.empty());
 }
 
 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
@@ -655,7 +655,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   // Clone the return and add it to the end of the predecessor.
   Instruction *NewRet = RI->clone();
   Pred->getInstList().push_back(NewRet);
-      
+
   // If the return instruction returns a value, and if the value was a
   // PHI node in "BB", propagate the right value into the return.
   for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
@@ -679,7 +679,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
       }
     }
   }
-      
+
   // Update any PHI nodes in the returning block to realize that we no
   // longer branch to them.
   BB->removePredecessor(Pred);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 6b04e3d17b9b..8513772da2e8 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -17,17 +17,17 @@
 
 #define DEBUG_TYPE "break-crit-edges"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
 
 STATISTIC(NumBroken, "Number of blocks inserted");
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 74b2ee10e01d..6d13217df55d 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -12,17 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 
 using namespace llvm;
@@ -40,16 +38,16 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrLen = M->getOrInsertFunction("strlen",
-                                            AttrListPtr::get(M->getContext(),
-                                                             AWI),
+                                            AttributeSet::get(M->getContext(),
+                                                              AS),
                                             TD->getIntPtrType(Context),
                                             B.getInt8PtrTy(),
                                             NULL);
@@ -69,16 +67,16 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrNLen = M->getOrInsertFunction("strnlen",
-                                             AttrListPtr::get(M->getContext(),
-                                                              AWI),
+                                             AttributeSet::get(M->getContext(),
+                                                              AS),
                                              TD->getIntPtrType(Context),
                                              B.getInt8PtrTy(),
                                              TD->getIntPtrType(Context),
@@ -99,16 +97,16 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AttributeWithIndex AWI =
-    AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                            ArrayRef<Attributes::AttrVal>(AVs, 2));
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AttributeSet AS =
+    AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                      ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
   Constant *StrChr = M->getOrInsertFunction("strchr",
-                                            AttrListPtr::get(M->getContext(),
-                                                             AWI),
+                                            AttributeSet::get(M->getContext(),
+                                                             AS),
                                             I8Ptr, I8Ptr, I32Ty, NULL);
   CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
                                ConstantInt::get(I32Ty, C), "strchr");
@@ -125,17 +123,17 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *StrNCmp = M->getOrInsertFunction("strncmp",
-                                          AttrListPtr::get(M->getContext(),
-                                                           AWI),
+                                          AttributeSet::get(M->getContext(),
+                                                           AS),
                                           B.getInt32Ty(),
                                           B.getInt8PtrTy(),
                                           B.getInt8PtrTy(),
@@ -158,13 +156,13 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrCpy = M->getOrInsertFunction(Name,
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          I8Ptr, I8Ptr, I8Ptr, NULL);
   CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
                                Name);
@@ -182,14 +180,14 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrNCpy = M->getOrInsertFunction(Name,
-                                          AttrListPtr::get(M->getContext(),
-                                                           AWI),
+                                          AttributeSet::get(M->getContext(),
+                                                            AS),
                                           I8Ptr, I8Ptr, I8Ptr,
                                           Len->getType(), NULL);
   CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
@@ -209,12 +207,12 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                Attributes::NoUnwind);
+  AttributeSet AS;
+  AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                         Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
@@ -237,13 +235,13 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI;
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS;
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                         ArrayRef<Attribute::AttrKind>(AVs, 2));
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemChr = M->getOrInsertFunction("memchr",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
                                          B.getInt32Ty(),
@@ -265,16 +263,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCmp = M->getOrInsertFunction("memcmp",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          B.getInt32Ty(),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
@@ -293,7 +291,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
 /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
 Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
-                                  const AttrListPtr &Attrs) {
+                                  const AttributeSet &Attrs) {
   SmallString<20> NameBuffer;
   if (!Op->getType()->isDoubleTy()) {
     // If we need to add a suffix, copy into NameBuffer.
@@ -346,13 +344,13 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts",
-                                       AttrListPtr::get(M->getContext(), AWI),
+                                       AttributeSet::get(M->getContext(), AS),
                                        B.getInt32Ty(),
                                        B.getInt8PtrTy(),
                                        NULL);
@@ -370,14 +368,14 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction("fputc",
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AS),
                                B.getInt32Ty(),
                                B.getInt32Ty(), File->getType(),
                                NULL);
@@ -403,16 +401,16 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   StringRef FPutsName = TLI->getName(LibFunc::fputs);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction(FPutsName,
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AS),
                                B.getInt32Ty(),
                                B.getInt8PtrTy(),
                                File->getType(), NULL);
@@ -436,17 +434,17 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture);
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction(FWriteName,
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AS),
                                TD->getIntPtrType(Context),
                                B.getInt8PtrTy(),
                                TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index bee2f7bcb6ea..1f517d038d19 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -16,11 +16,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "bypass-slow-division"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 
 using namespace llvm;
 
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
 
   // Compare operand values and branch
-  Value *ZeroV = MainBuilder.getInt32(0);
+  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
 
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
 
     // Get bitwidth of div/rem instruction
     IntegerType *T = cast<IntegerType>(J->getType());
-    int bitwidth = T->getBitWidth();
+    unsigned int bitwidth = T->getBitWidth();
 
     // Continue if bitwidth is not bypassed
     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 620209bccbc8..b71628bcb28e 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_llvm_library(LLVMTransformUtils
-  AddrModeMatcher.cpp
   BasicBlockUtils.cpp
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 7ba9f6d9d25d..63d7a1d52aa5 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,22 +14,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/ADT/SmallVector.h"
 #include <map>
 using namespace llvm;
 
@@ -94,19 +94,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     //Some arguments were deleted with the VMap. Copy arguments one by one
     for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
            E = OldFunc->arg_end(); I != E; ++I)
-      if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
-        Anew->addAttr( OldFunc->getAttributes()
-                       .getParamAttributes(I->getArgNo() + 1));
+      if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
+        AttributeSet attrs = OldFunc->getAttributes()
+          .getParamAttributes(I->getArgNo() + 1);
+        if (attrs.getNumSlots() > 0)
+          Anew->addAttr(attrs);
+      }
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(NewFunc->getContext(),
-                                    AttrListPtr::ReturnIndex,
-                                    OldFunc->getAttributes()
-                                     .getRetAttributes()));
+                           .addAttributes(NewFunc->getContext(),
+                                          AttributeSet::ReturnIndex,
+                                          OldFunc->getAttributes()));
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(NewFunc->getContext(),
-                                    AttrListPtr::FunctionIndex,
-                                    OldFunc->getAttributes()
-                                     .getFnAttributes()));
+                           .addAttributes(NewFunc->getContext(),
+                                          AttributeSet::FunctionIndex,
+                                          OldFunc->getAttributes()));
 
   }
 
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 1dac6b5b8bce..64df089e1b81 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constant.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
 
@@ -38,10 +38,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
    
-  // Copy all of the dependent libraries over.
-  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
-    New->addLibrary(*I);
-
   // Loop over all of the global variables, making corresponding globals in the
   // new module.  Here we add them to the VMap and to the new Module.  We
   // don't worry about attributes or initializers, they will come later.
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 9b099150a7af..8fa412a18b99 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CmpInstAnalysis.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 281714f4c100..f7c659f2193b 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,25 +14,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 99b58301634a..db525cdc24d8 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -7,11 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
 using namespace llvm;
 
 /// DemoteRegToStack - This function takes a virtual register computed by an
@@ -78,12 +79,21 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
     InsertPt = &I;
     ++InsertPt;
   } else {
-    // We cannot demote invoke instructions to the stack if their normal edge
-    // is critical.
     InvokeInst &II = cast<InvokeInst>(I);
-    assert(II.getNormalDest()->getSinglePredecessor() &&
-           "Cannot demote invoke with a critical successor!");
-    InsertPt = II.getNormalDest()->begin();
+    if (II.getNormalDest()->getSinglePredecessor())
+      InsertPt = II.getNormalDest()->getFirstInsertionPt();
+    else {
+      // We cannot demote invoke instructions to the stack if their normal edge
+      // is critical.  Therefore, split the critical edge and insert the store
+      // in the newly created basic block.
+      unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
+      TerminatorInst *TI = &cast<TerminatorInst>(I);
+      assert (isCriticalEdge(TI, SuccNum) &&
+              "Expected a critical edge!");
+      BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
+      assert (BB && "Unable to split critical edge.");
+      InsertPt = BB->getFirstInsertionPt();
+    }
   }
 
   for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
@@ -124,7 +134,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   }
 
   // Insert a load in place of the PHI and replace all uses.
-  Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+  BasicBlock::iterator InsertPt = P;
+
+  for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+    /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
+
+  Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
   P->replaceAllUsesWith(V);
 
   // Delete PHI.
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 009847f87bce..e9828d60cd55 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,21 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -82,7 +82,8 @@ namespace {
     /// a simple branch. When there is more than one predecessor, we need to
     /// split the landing pad block after the landingpad instruction and jump
     /// to there.
-    void forwardResume(ResumeInst *RI);
+    void forwardResume(ResumeInst *RI,
+                       SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
 
     /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
     /// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
 /// block. When the landing pad block has only one predecessor, this is a simple
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+                               SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
   BasicBlock *Dest = getInnerResumeDest();
+  LandingPadInst *OuterLPad = getLandingPadInst();
   BasicBlock *Src = RI->getParent();
 
   BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
 
   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
   RI->eraseFromParent();
+
+  // Append the clauses from the outer landing pad instruction into the inlined
+  // landing pad instructions.
+  for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+         E = InlinedLPads.end(); I != E; ++I) {
+    LandingPadInst *InlinedLPad = *I;
+    for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+         OuterIdx != OuterNum; ++OuterIdx)
+      InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+  }
 }
 
 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.  If the code doesn't have calls or unwinds, we know there is
-  // nothing to rewrite.
-  if (!InlinedCodeInfo.ContainsCalls) {
-    // Now that everything is happy, we have one final detail.  The PHI nodes in
-    // the exception destination block still have entries due to the original
-    // invoke instruction.  Eliminate these entries (which might even delete the
-    // PHI node) now.
-    InvokeDest->removePredecessor(II->getParent());
-    return;
-  }
-
+  // rewrite.
   InvokeInliningInfo Invoke(II);
-  
+
+  // Get all of the inlined landing pad instructions.
+  SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+  for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+      InlinedLPads.insert(II->getLandingPadInst());
+
   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
     if (InlinedCodeInfo.ContainsCalls)
       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
         continue;
       }
 
+    // Forward any resumes that are remaining here.
     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
-      Invoke.forwardResume(RI);
+      Invoke.forwardResume(RI, InlinedLPads);
   }
 
   // Now that everything is happy, we have one final detail.  The PHI nodes in
   // the exception destination block still have entries due to the original
-  // invoke instruction.  Eliminate these entries (which might even delete the
+  // invoke instruction. Eliminate these entries (which might even delete the
   // PHI node) now.
   InvokeDest->removePredecessor(II->getParent());
 }
@@ -668,10 +678,29 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       if (hasLifetimeMarkers(AI))
         continue;
 
-      builder.CreateLifetimeStart(AI);
+      // Try to determine the size of the allocation.
+      ConstantInt *AllocaSize = 0;
+      if (ConstantInt *AIArraySize =
+          dyn_cast<ConstantInt>(AI->getArraySize())) {
+        if (IFI.TD) {
+          Type *AllocaType = AI->getAllocatedType();
+          uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+          uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+          assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
+          // Check that array size doesn't saturate uint64_t and doesn't
+          // overflow when it's multiplied by type size.
+          if (AllocaArraySize != ~0ULL &&
+              UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+            AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+                                          AllocaArraySize * AllocaTypeSize);
+          }
+        }
+      }
+
+      builder.CreateLifetimeStart(AI, AllocaSize);
       for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
         IRBuilder<> builder(Returns[ri]);
-        builder.CreateLifetimeEnd(AI);
+        builder.CreateLifetimeEnd(AI, AllocaSize);
       }
     }
   }
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 45c15de9437f..a020bc7398f5 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -15,9 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 55227e2714e6..3cb8ded8506a 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -15,11 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "integer-division"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IRBuilder.h"
 #include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
 
@@ -418,3 +418,107 @@ bool llvm::expandDivision(BinaryOperator *Div) {
 
   return true;
 }
+
+/// Generate code to compute the remainder of two integers of bitwidth up to 
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer 
+/// arithmetic.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+  assert((Rem->getOpcode() == Instruction::SRem ||
+          Rem->getOpcode() == Instruction::URem) &&
+          "Trying to expand remainder from a non-remainder function");
+
+  Type *RemTy = Rem->getType();
+  if (RemTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+  if (RemTyBitWidth > 32) 
+    llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+  if (RemTyBitWidth == 32) 
+    return expandRemainder(Rem);
+
+  // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+  // with 32 bit division.
+  IRBuilder<> Builder(Rem);
+
+  Value *ExtDividend;
+  Value *ExtDivisor;
+  Value *ExtRem;
+  Value *Trunc;
+  Type *Int32Ty = Builder.getInt32Ty();
+
+  if (Rem->getOpcode() == Instruction::SRem) {
+    ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+    ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+  } else {
+    ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+    ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+  }
+  Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+  Rem->replaceAllUsesWith(Trunc);
+  Rem->dropAllReferences();
+  Rem->eraseFromParent();
+
+  return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+  assert((Div->getOpcode() == Instruction::SDiv ||
+          Div->getOpcode() == Instruction::UDiv) &&
+          "Trying to expand division from a non-division function");
+
+  Type *DivTy = Div->getType();
+  if (DivTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+  if (DivTyBitWidth > 32)
+    llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+  if (DivTyBitWidth == 32)
+    return expandDivision(Div);
+
+  // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+  // with 32 bit division.
+  IRBuilder<> Builder(Div);
+
+  Value *ExtDividend;
+  Value *ExtDivisor;
+  Value *ExtDiv;
+  Value *Trunc;
+  Type *Int32Ty = Builder.getInt32Ty();
+
+  if (Div->getOpcode() == Instruction::SDiv) {
+    ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+    ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+  } else {
+    ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);  
+  }
+  Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+  Div->replaceAllUsesWith(Trunc);
+  Div->dropAllReferences();
+  Div->eraseFromParent();
+
+  return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 5e05c83c3566..2d1b166c2101 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -29,17 +29,17 @@
 
 #define DEBUG_TYPE "lcssa"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a954d82c05bf..be80d34d960f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -13,33 +13,34 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Operator.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -604,7 +605,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   // possible to handle such cases, but difficult: it requires checking whether
   // BB dominates Succ, which is non-trivial to calculate in the case where
   // Succ has multiple predecessors.  Also, it requires checking whether
-  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // constructing the necessary self-referential PHI node doesn't introduce any
   // conflicts; this isn't too difficult, but the previous code for doing this
   // was incorrect.
   //
@@ -928,3 +929,73 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
 
   return 0;
 }
+
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+                                      DIBuilder &Builder) {
+  DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+  if (!DDI)
+    return false;
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  // Create a copy of the original DIDescriptor for user variable, appending
+  // "deref" operation to a list of address elements, as new llvm.dbg.declare
+  // will take a value storing address of the memory for variable, not
+  // alloca itself.
+  Type *Int64Ty = Type::getInt64Ty(AI->getContext());
+  SmallVector<Value*, 4> NewDIVarAddress;
+  if (DIVar.hasComplexAddress()) {
+    for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) {
+      NewDIVarAddress.push_back(
+          ConstantInt::get(Int64Ty, DIVar.getAddrElement(i)));
+    }
+  }
+  NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref));
+  DIVariable NewDIVar = Builder.createComplexVariable(
+      DIVar.getTag(), DIVar.getContext(), DIVar.getName(),
+      DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(),
+      NewDIVarAddress, DIVar.getArgNumber());
+
+  // Insert llvm.dbg.declare in the same basic block as the original alloca,
+  // and remove old llvm.dbg.declare.
+  BasicBlock *BB = AI->getParent();
+  Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB);
+  DDI->eraseFromParent();
+  return true;
+}
+
+bool llvm::removeUnreachableBlocks(Function &F) {
+  SmallPtrSet<BasicBlock*, 16> Reachable;
+  SmallVector<BasicBlock*, 128> Worklist;
+  Worklist.push_back(&F.getEntryBlock());
+  Reachable.insert(&F.getEntryBlock());
+  do {
+    BasicBlock *BB = Worklist.pop_back_val();
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      if (Reachable.insert(*SI))
+        Worklist.push_back(*SI);
+  } while (!Worklist.empty());
+
+  if (Reachable.size() == F.size())
+    return false;
+
+  assert(Reachable.size() < F.size());
+  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
+    if (Reachable.count(I))
+      continue;
+
+    for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+      if (Reachable.count(*SI))
+        (*SI)->removePredecessor(I);
+    I->dropAllReferences();
+  }
+
+  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
+
+  return true;
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 9d9e20166564..37819cc9c917 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -39,26 +39,26 @@
 
 #define DEBUG_TYPE "loop-simplify"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 20237500c37f..cb581b3d13b9 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -18,12 +18,12 @@
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 67e17f4ca8e8..d801d5f2c2a4 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -23,12 +23,12 @@
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 02bdcda39194..4aee8ff51a4e 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lower-expect-intrinsic"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <vector>
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 930555424ded..9ec84d730e46 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -36,19 +36,19 @@
 
 #define DEBUG_TYPE "lowerinvoke"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <csetjmp>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 1547439b5c6b..955b853533b0 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,16 +14,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index f4ca81af6d87..61b3965d8f11 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -14,12 +14,12 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumPromoted, "Number of alloca's promoted");
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 233bc12d3cfd..3716f586ff06 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -13,16 +13,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/TypeFinder.h"
-
 using namespace llvm;
 
 namespace {
@@ -37,7 +36,7 @@ namespace {
       next = seed;
     }
 
-    int rand(void) {
+    int rand() {
       next = next * 1103515245 + 12345;
       return (unsigned int)(next / 65536) % 32768;
     }
@@ -73,13 +72,23 @@ namespace {
 
       // Rename all aliases
       for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
-           AI != AE; ++AI)
-        AI->setName("alias");
+           AI != AE; ++AI) {
+        StringRef Name = AI->getName();
+        if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+          continue;
 
+        AI->setName("alias");
+      }
+      
       // Rename all global variables
       for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
-           GI != GE; ++GI)
+           GI != GE; ++GI) {
+        StringRef Name = GI->getName();
+        if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+          continue;
+
         GI->setName("global");
+      }
 
       // Rename all struct types
       TypeFinder StructTypes;
@@ -96,6 +105,10 @@ namespace {
       // Rename all functions
       for (Module::iterator FI = M.begin(), FE = M.end();
            FI != FE; ++FI) {
+        StringRef Name = FI->getName();
+        if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+          continue;
+
         FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
         runOnFunction(*FI);
       }
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index dbcf3b2fe268..d090b487213b 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 558de9d12e6c..de335ec1a05c 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -27,26 +27,26 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Metadata.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <queue>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 72d4199a2a69..9d90fbe5654a 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ssaupdater"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
@@ -25,7 +26,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c767da624e19..681bf9c2b7a4 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -13,19 +13,6 @@
 
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -33,18 +20,31 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/NoFolder.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
-#include <set>
 #include <map>
+#include <set>
 using namespace llvm;
 
 static cl::opt<unsigned>
@@ -82,8 +82,8 @@ namespace {
   };
 
 class SimplifyCFGOpt {
+  const TargetTransformInfo &TTI;
   const DataLayout *const TD;
-  const TargetTransformInfo *const TTI;
 
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -103,8 +103,8 @@ class SimplifyCFGOpt {
   bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
 
 public:
-  SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
-      : TD(td), TTI(tti) {}
+  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
+      : TTI(TTI), TD(TD) {}
   bool run(BasicBlock *BB);
 };
 }
@@ -858,7 +858,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
 
       if (PredHasWeights) {
         GetBranchWeights(PTI, Weights);
-        // branch-weight metadata is inconsistant here.
+        // branch-weight metadata is inconsistent here.
         if (Weights.size() != 1 + PredCases.size())
           PredHasWeights = SuccHasWeights = false;
       } else if (SuccHasWeights)
@@ -870,7 +870,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       SmallVector<uint64_t, 8> SuccWeights;
       if (SuccHasWeights) {
         GetBranchWeights(TI, SuccWeights);
-        // branch-weight metadata is inconsistant here.
+        // branch-weight metadata is inconsistent here.
         if (SuccWeights.size() != 1 + BBCases.size())
           PredHasWeights = SuccHasWeights = false;
       } else if (PredHasWeights)
@@ -967,8 +967,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
         for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
                                     PTIHandled.begin(),
                E = PTIHandled.end(); I != E; ++I) {
-          if (PredHasWeights || SuccHasWeights) 
-            Weights.push_back(WeightsForHandled[*I]); 
+          if (PredHasWeights || SuccHasWeights)
+            Weights.push_back(WeightsForHandled[*I]);
           PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
           NewSuccessors.push_back(BBDefault);
         }
@@ -1193,7 +1193,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
        I != E; ++I) {
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
       Value *BB1V = PN->getIncomingValueForBlock(BB1);
-      Value *BB2V = PN->getIncomingValueForBlock(BB2); 
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
       MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
     } else {
       FirstNonPhiInBBEnd = &*I;
@@ -1202,7 +1202,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   }
   if (!FirstNonPhiInBBEnd)
     return false;
-  
+
 
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks.  We scan backward for obviously identical
@@ -1332,149 +1332,180 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   return Changed;
 }
 
-/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
-/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
-/// (for now, restricted to a single instruction that's side effect free) from
-/// the BB1 into the branch block to speculatively execute it.
+/// \brief Speculate a conditional basic block flattening the CFG.
+///
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
 ///
-/// Turn
-/// BB:
-///     %t1 = icmp
-///     br i1 %t1, label %BB1, label %BB2
-/// BB1:
-///     %t3 = add %t2, c
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+///   BB:
+///     %cmp = icmp ult %x, %y
+///     br i1 %cmp, label %EndBB, label %ThenBB
+///   ThenBB:
+///     %sub = sub %x, %y
 ///     br label BB2
-/// BB2:
-/// =>
-/// BB:
-///     %t1 = icmp
-///     %t4 = add %t2, c
-///     %t3 = select i1 %t1, %t2, %t3
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
-  // Only speculatively execution a single instruction (not counting the
-  // terminator) for now.
-  Instruction *HInst = NULL;
-  Instruction *Term = BB1->getTerminator();
-  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+///   EndBB:
+///     %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+///     ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+///   BB:
+///     %cmp = icmp ult %x, %y
+///     %sub = sub %x, %y
+///     %cond = select i1 %cmp, 0, %sub
+///     ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+  // Be conservative for now. FP select instruction can often be expensive.
+  Value *BrCond = BI->getCondition();
+  if (isa<FCmpInst>(BrCond))
+    return false;
+
+  BasicBlock *BB = BI->getParent();
+  BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+
+  // If ThenBB is actually on the false edge of the conditional branch, remember
+  // to swap the select operands later.
+  bool Invert = false;
+  if (ThenBB != BI->getSuccessor(0)) {
+    assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+    Invert = true;
+  }
+  assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+  // Keep a count of how many times instructions are used within CondBB when
+  // they are candidates for sinking into CondBB. Specifically:
+  // - They are defined in BB, and
+  // - They have no side effects, and
+  // - All of their uses are in CondBB.
+  SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+  unsigned SpeculationCost = 0;
+  for (BasicBlock::iterator BBI = ThenBB->begin(),
+                            BBE = llvm::prior(ThenBB->end());
        BBI != BBE; ++BBI) {
     Instruction *I = BBI;
     // Skip debug info.
-    if (isa<DbgInfoIntrinsic>(I)) continue;
-    if (I == Term) break;
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
 
-    if (HInst)
+    // Only speculatively execution a single instruction (not counting the
+    // terminator) for now.
+    ++SpeculationCost;
+    if (SpeculationCost > 1)
       return false;
-    HInst = I;
-  }
-
-  BasicBlock *BIParent = BI->getParent();
 
-  // Check the instruction to be hoisted, if there is one.
-  if (HInst) {
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(HInst))
+    if (!isSafeToSpeculativelyExecute(I))
       return false;
-    if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold)
+    if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
       return false;
 
     // Do not hoist the instruction if any of its operands are defined but not
     // used in this BB. The transformation will prevent the operand from
     // being sunk into the use block.
-    for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+    for (User::op_iterator i = I->op_begin(), e = I->op_end();
          i != e; ++i) {
       Instruction *OpI = dyn_cast<Instruction>(*i);
-      if (OpI && OpI->getParent() == BIParent &&
-          !OpI->mayHaveSideEffects() &&
-          !OpI->isUsedInBasicBlock(BIParent))
-        return false;
+      if (!OpI || OpI->getParent() != BB ||
+          OpI->mayHaveSideEffects())
+        continue; // Not a candidate for sinking.
+
+      ++SinkCandidateUseCounts[OpI];
     }
   }
 
-  // Be conservative for now. FP select instruction can often be expensive.
-  Value *BrCond = BI->getCondition();
-  if (isa<FCmpInst>(BrCond))
-    return false;
-
-  // If BB1 is actually on the false edge of the conditional branch, remember
-  // to swap the select operands later.
-  bool Invert = false;
-  if (BB1 != BI->getSuccessor(0)) {
-    assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
-    Invert = true;
-  }
+  // Consider any sink candidates which are only used in CondBB as costs for
+  // speculation. Note, while we iterate over a DenseMap here, we are summing
+  // and so iteration order isn't significant.
+  for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
+           SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+       I != E; ++I)
+    if (I->first->getNumUses() == I->second) {
+      ++SpeculationCost;
+      if (SpeculationCost > 1)
+        return false;
+    }
 
-  // Collect interesting PHIs, and scan for hazards.
-  SmallSetVector<std::pair<Value *, Value *>, 4> PHIs;
-  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
-  for (BasicBlock::iterator I = BB2->begin();
+  // Check that the PHI nodes can be converted to selects.
+  bool HaveRewritablePHIs = false;
+  for (BasicBlock::iterator I = EndBB->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    Value *BB1V = PN->getIncomingValueForBlock(BB1);
-    Value *BIParentV = PN->getIncomingValueForBlock(BIParent);
+    Value *OrigV = PN->getIncomingValueForBlock(BB);
+    Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
 
     // Skip PHIs which are trivial.
-    if (BB1V == BIParentV)
+    if (ThenV == OrigV)
       continue;
 
-    // Check for saftey.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) {
-      // An unfolded ConstantExpr could end up getting expanded into
-      // Instructions. Don't speculate this and another instruction at
-      // the same time.
-      if (HInst)
-        return false;
-      if (!isSafeToSpeculativelyExecute(CE))
-        return false;
-      if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
-        return false;
-    }
+    HaveRewritablePHIs = true;
+    ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
+    if (!CE)
+      continue; // Known safe and cheap.
 
-    // Ok, we may insert a select for this PHI.
-    PHIs.insert(std::make_pair(BB1V, BIParentV));
+    if (!isSafeToSpeculativelyExecute(CE))
+      return false;
+    if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+      return false;
+
+    // Account for the cost of an unfolded ConstantExpr which could end up
+    // getting expanded into Instructions.
+    // FIXME: This doesn't account for how many operations are combined in the
+    // constant expression.
+    ++SpeculationCost;
+    if (SpeculationCost > 1)
+      return false;
   }
 
   // If there are no PHIs to process, bail early. This helps ensure idempotence
   // as well.
-  if (PHIs.empty())
+  if (!HaveRewritablePHIs)
     return false;
 
   // If we get here, we can hoist the instruction and if-convert.
-  DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
+  DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
-  // Hoist the instruction.
-  if (HInst)
-    BIParent->getInstList().splice(BI, BB1->getInstList(), HInst);
+  // Hoist the instructions.
+  BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
+                           llvm::prior(ThenBB->end()));
 
   // Insert selects and rewrite the PHI operands.
   IRBuilder<true, NoFolder> Builder(BI);
-  for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
-    Value *TrueV = PHIs[i].first;
-    Value *FalseV = PHIs[i].second;
+  for (BasicBlock::iterator I = EndBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned OrigI = PN->getBasicBlockIndex(BB);
+    unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
+    Value *OrigV = PN->getIncomingValue(OrigI);
+    Value *ThenV = PN->getIncomingValue(ThenI);
+
+    // Skip PHIs which are trivial.
+    if (OrigV == ThenV)
+      continue;
 
     // Create a select whose true value is the speculatively executed value and
-    // false value is the previously determined FalseV.
-    SelectInst *SI;
+    // false value is the preexisting value. Swap them if the branch
+    // destinations were inverted.
+    Value *TrueV = ThenV, *FalseV = OrigV;
     if (Invert)
-      SI = cast<SelectInst>
-        (Builder.CreateSelect(BrCond, FalseV, TrueV,
-                              FalseV->getName() + "." + TrueV->getName()));
-    else
-      SI = cast<SelectInst>
-        (Builder.CreateSelect(BrCond, TrueV, FalseV,
-                              TrueV->getName() + "." + FalseV->getName()));
-
-    // Make the PHI node use the select for all incoming values for "then" and
-    // "if" blocks.
-    for (BasicBlock::iterator I = BB2->begin();
-         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      unsigned BB1I = PN->getBasicBlockIndex(BB1);
-      unsigned BIParentI = PN->getBasicBlockIndex(BIParent);
-      Value *BB1V = PN->getIncomingValue(BB1I);
-      Value *BIParentV = PN->getIncomingValue(BIParentI);
-      if (TrueV == BB1V && FalseV == BIParentV) {
-        PN->setIncomingValue(BB1I, SI);
-        PN->setIncomingValue(BIParentI, SI);
-      }
-    }
+      std::swap(TrueV, FalseV);
+    Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
+                                    TrueV->getName() + "." + FalseV->getName());
+    PN->setIncomingValue(OrigI, V);
+    PN->setIncomingValue(ThenI, V);
   }
 
   ++NumSpeculations;
@@ -2522,9 +2553,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 ///
 /// We prefer to split the edge to 'end' so that there is a true/false entry to
 /// the PHI, merging the third icmp into the switch.
-static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
-                                                  const DataLayout *TD,
-                                                  IRBuilder<> &Builder) {
+static bool TryToSimplifyUncondBranchWithICmpInIt(
+    ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
+    const DataLayout *TD) {
   BasicBlock *BB = ICI->getParent();
 
   // If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2557,7 +2588,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
       ICI->eraseFromParent();
     }
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // Ok, the block is reachable from the default dest.  If the constant we're
@@ -2573,7 +2604,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2758,9 +2789,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
       return false;
 
   // Turn all invokes that unwind here into calls and delete the basic block.
+  bool InvokeRequiresTableEntry = false;
+  bool Changed = false;
   for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
     InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+    if (II->hasFnAttr(Attribute::UWTable)) {
+      // Don't remove an `invoke' instruction if the ABI requires an entry into
+      // the table.
+      InvokeRequiresTableEntry = true;
+      continue;
+    }
+
     SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
     // Insert a call instruction before the invoke.
     CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
     Call->takeName(II);
@@ -2780,11 +2822,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
 
     // Finally, delete the invoke instruction!
     II->eraseFromParent();
+    Changed = true;
   }
 
-  // The landingpad is now unreachable.  Zap it.
-  BB->eraseFromParent();
-  return true;
+  if (!InvokeRequiresTableEntry)
+    // The landingpad is now unreachable.  Zap it.
+    BB->eraseFromParent();
+
+  return Changed;
 }
 
 bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3382,7 +3427,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
                                      ConstantInt *Offset,
                const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
                                      Constant *DefaultValue,
-                                     const DataLayout *TD) {
+                                     const DataLayout *TD)
+    : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
   assert(Values.size() && "Can't build lookup table without values!");
   assert(TableSize >= Values.size() && "Can't fit values in table!");
 
@@ -3510,23 +3556,44 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
 /// types of the results.
 static bool ShouldBuildLookupTable(SwitchInst *SI,
                                    uint64_t TableSize,
+                                   const TargetTransformInfo &TTI,
                                    const DataLayout *TD,
                             const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
-  // The table density should be at least 40%. This is the same criterion as for
-  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
-  // FIXME: Find the best cut-off.
   if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
     return false; // TableSize overflowed, or mul below might overflow.
-  if (SI->getNumCases() * 10 >= TableSize * 4)
-    return true;
 
-  // If each table would fit in a register, we should build it anyway.
+  bool AllTablesFitInRegister = true;
+  bool HasIllegalType = false;
   for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
        E = ResultTypes.end(); I != E; ++I) {
-    if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
-      return false;
+    Type *Ty = I->second;
+
+    // Saturate this flag to true.
+    HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+
+    // Saturate this flag to false.
+    AllTablesFitInRegister = AllTablesFitInRegister &&
+      SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+
+    // If both flags saturate, we're done. NOTE: This *only* works with
+    // saturating flags, and all flags have to saturate first due to the
+    // non-deterministic behavior of iterating over a dense map.
+    if (HasIllegalType && !AllTablesFitInRegister)
+      break;
   }
-  return true;
+
+  // If each table would fit in a register, we should build it anyway.
+  if (AllTablesFitInRegister)
+    return true;
+
+  // Don't build a table that doesn't fit in-register if it has illegal types.
+  if (HasIllegalType)
+    return false;
+
+  // The table density should be at least 40%. This is the same criterion as for
+  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+  // FIXME: Find the best cut-off.
+  return SI->getNumCases() * 10 >= TableSize * 4;
 }
 
 /// SwitchToLookupTable - If the switch is only used to initialize one or more
@@ -3534,13 +3601,12 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
 /// replace the switch with lookup tables.
 static bool SwitchToLookupTable(SwitchInst *SI,
                                 IRBuilder<> &Builder,
-                                const DataLayout* TD,
-                                const TargetTransformInfo *TTI) {
+                                const TargetTransformInfo &TTI,
+                                const DataLayout* TD) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
 
   // Only build lookup table when we have a target that supports it.
-  if (!TTI || !TTI->getScalarTargetTransformInfo() ||
-      !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
+  if (!TTI.shouldBuildLookupTables())
     return false;
 
   // FIXME: If the switch is too sparse for a lookup table, perhaps we could
@@ -3607,7 +3673,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
 
   APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
   uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
-  if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
+  if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
     return false;
 
   // Create the BB that does the lookups.
@@ -3672,12 +3738,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     // see if that predecessor totally determines the outcome of this switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
     Value *Cond = SI->getCondition();
     if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
       if (SimplifySwitchOnSelect(SI, Select))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
     // If the block only contains the switch, see if we can fold the block
     // away into any preds.
@@ -3687,22 +3753,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       ++BBI;
     if (SI == &*BBI)
       if (FoldValueComparisonIntoPredecessors(SI, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   // Remove unreachable cases.
   if (EliminateDeadSwitchCases(SI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   if (ForwardSwitchConditionToPHI(SI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
-  if (SwitchToLookupTable(SI, Builder, TD, TTI))
-    return SimplifyCFG(BB) | true;
+  if (SwitchToLookupTable(SI, Builder, TTI, TD))
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   return false;
 }
@@ -3739,7 +3805,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
 
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
-      return SimplifyCFG(BB) | true;
+      return SimplifyCFG(BB, TTI, TD) | true;
   }
   return Changed;
 }
@@ -3763,7 +3829,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
       if (I->isTerminator() &&
-          TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
         return true;
     }
 
@@ -3772,7 +3838,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
   if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
   return false;
 }
 
@@ -3787,7 +3853,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     // switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
@@ -3797,14 +3863,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       ++I;
     if (&*I == BI) {
       if (FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
     } else if (&*I == cast<Instruction>(BI->getCondition())){
       ++I;
       // Ignore dbg intrinsics.
       while (isa<DbgInfoIntrinsic>(I))
         ++I;
       if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
     }
   }
 
@@ -3816,7 +3882,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
   if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
@@ -3825,7 +3891,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
     if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
       if (HoistThenElseCodeToIf(BI))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to successor #1.
@@ -3833,7 +3899,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (Succ0TI->getNumSuccessors() == 1 &&
           Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
         if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
-          return SimplifyCFG(BB) | true;
+          return SimplifyCFG(BB, TTI, TD) | true;
     }
   } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
     // If Successor #0 has multiple preds, we may be able to conditionally
@@ -3842,7 +3908,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     if (Succ1TI->getNumSuccessors() == 1 &&
         Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
       if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // If this is a branch on a phi node in the current block, thread control
@@ -3850,14 +3916,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
       if (FoldCondBranchOnPHI(BI, TD))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
         if (SimplifyCondBranchToCondBranch(PBI, BI))
-          return SimplifyCFG(BB) | true;
+          return SimplifyCFG(BB, TTI, TD) | true;
 
   return false;
 }
@@ -3892,11 +3958,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
 
     // Load from null is undefined.
     if (LoadInst *LI = dyn_cast<LoadInst>(Use))
-      return LI->getPointerAddressSpace() == 0;
+      if (!LI->isVolatile())
+        return LI->getPointerAddressSpace() == 0;
 
     // Store to null is undefined.
     if (StoreInst *SI = dyn_cast<StoreInst>(Use))
-      return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+      if (!SI->isVolatile())
+        return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
   }
   return false;
 }
@@ -3998,7 +4066,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
 /// of the CFG.  It returns true if a modification was made.
 ///
-bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
-                       const TargetTransformInfo *TTI) {
-  return SimplifyCFGOpt(TD, TTI).run(BB);
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+                       const DataLayout *TD) {
+  return SimplifyCFGOpt(TTI, TD).run(BB);
 }
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 110f3808573e..41c207c3d5cb 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -15,18 +15,18 @@
 
 #define DEBUG_TYPE "indvars"
 
-#include "llvm/Instructions.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 65353dc46037..f9687e4d5890 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -15,17 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c3ea63852fed..c231704414fc 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -15,12 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 
@@ -48,6 +53,10 @@ public:
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
     =0;
 
+  /// ignoreCallingConv - Returns false if this transformation could possibly
+  /// change the calling convention.
+  virtual bool ignoreCallingConv() { return false; }
+
   Value *optimizeCall(CallInst *CI, const DataLayout *TD,
                       const TargetLibraryInfo *TLI,
                       const LibCallSimplifier *LCS, IRBuilder<> &B) {
@@ -59,7 +68,7 @@ public:
       Context = &CI->getCalledFunction()->getContext();
 
     // We never change the calling convention.
-    if (CI->getCallingConv() != llvm::CallingConv::C)
+    if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
       return NULL;
 
     return callOptimizer(CI->getCalledFunction(), CI, B);
@@ -100,6 +109,15 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
   return true;
 }
 
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+       it != e; ++it) {
+    if ((*it)->getType()->isFloatingPointTy())
+      return true;
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Fortified Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -713,6 +731,7 @@ struct StrNCpyOpt : public LibCallOptimization {
 };
 
 struct StrLenOpt : public LibCallOptimization {
+  virtual bool ignoreCallingConv() { return true; }
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
@@ -781,8 +800,7 @@ struct StrToOpt : public LibCallOptimization {
     if (isa<ConstantPointerNull>(EndPtr)) {
       // With a null EndPtr, this function won't capture the main argument.
       // It would be readonly too, except that it still may write to errno.
-      CI->addAttribute(1, Attributes::get(Callee->getContext(),
-                                          Attributes::NoCapture));
+      CI->addAttribute(1, Attribute::NoCapture);
     }
 
     return 0;
@@ -951,7 +969,14 @@ struct MemCmpOpt : public LibCallOptimization {
       // Make sure we're not reading out-of-bounds memory.
       if (Len > LHSStr.size() || Len > RHSStr.size())
         return 0;
-      uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      // Fold the memcmp and normalize the result.  This way we get consistent
+      // results across multiple platforms.
+      uint64_t Ret = 0;
+      int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      if (Cmp < 0)
+        Ret = -1;
+      else if (Cmp > 0)
+        Ret = 1;
       return ConstantInt::get(CI->getType(), Ret);
     }
 
@@ -1016,6 +1041,630 @@ struct MemSetOpt : public LibCallOptimization {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+  bool CheckRetType;
+  UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
+      return 0;
+
+    if (CheckRetType) {
+      // Check if all the uses for function like 'sin' are converted to float.
+      for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+          ++UseI) {
+        FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+        if (Cast == 0 || !Cast->getType()->isFloatTy())
+          return 0;
+      }
+    }
+
+    // If this is something like 'floor((double)floatval)', convert to floorf.
+    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+      return 0;
+
+    // floor((double)floatval) -> (double)floorf(floatval)
+    Value *V = Cast->getOperand(0);
+    V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+    return B.CreateFPExt(V, B.getDoubleTy());
+  }
+};
+
+struct UnsafeFPLibCallOptimization : public LibCallOptimization {
+  bool UnsafeFPShrink;
+  UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
+    this->UnsafeFPShrink = UnsafeFPShrink;
+  }
+};
+
+struct CosOpt : public UnsafeFPLibCallOptimization {
+  CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "cos" &&
+        TLI->has(LibFunc::cosf)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+    }
+
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return Ret;
+
+    // cos(-x) -> cos(x)
+    Value *Op1 = CI->getArgOperand(0);
+    if (BinaryOperator::isFNeg(Op1)) {
+      BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+      return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+    }
+    return Ret;
+  }
+};
+
+struct PowOpt : public UnsafeFPLibCallOptimization {
+  PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "pow" &&
+        TLI->has(LibFunc::powf)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+    }
+
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return Ret;
+
+    Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+    if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+      if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
+        return Op1C;
+      if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
+        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+    }
+
+    ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+    if (Op2C == 0) return Ret;
+
+    if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
+      return ConstantFP::get(CI->getType(), 1.0);
+
+    if (Op2C->isExactlyValue(0.5)) {
+      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+      // This is faster than calling pow, and still handles negative zero
+      // and negative infinity correctly.
+      // TODO: In fast-math mode, this could be just sqrt(x).
+      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+      Value *Inf = ConstantFP::getInfinity(CI->getType());
+      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+                                         Callee->getAttributes());
+      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+                                         Callee->getAttributes());
+      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+      return Sel;
+    }
+
+    if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
+      return Op1;
+    if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
+      return B.CreateFMul(Op1, Op1, "pow2");
+    if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+                          Op1, "powrecip");
+    return 0;
+  }
+};
+
+struct Exp2Opt : public UnsafeFPLibCallOptimization {
+  Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+        TLI->has(LibFunc::exp2)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+    }
+
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return Ret;
+
+    Value *Op = CI->getArgOperand(0);
+    // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
+    // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
+    Value *LdExpArg = 0;
+    if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+    } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+    }
+
+    if (LdExpArg) {
+      const char *Name;
+      if (Op->getType()->isFloatTy())
+        Name = "ldexpf";
+      else if (Op->getType()->isDoubleTy())
+        Name = "ldexp";
+      else
+        Name = "ldexpl";
+
+      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+      if (!Op->getType()->isFloatTy())
+        One = ConstantExpr::getFPExtend(One, Op->getType());
+
+      Module *M = Caller->getParent();
+      Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                             Op->getType(),
+                                             B.getInt32Ty(), NULL);
+      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+        CI->setCallingConv(F->getCallingConv());
+
+      return CI;
+    }
+    return Ret;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FFSOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 1 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        !FT->getParamType(0)->isIntegerTy())
+      return 0;
+
+    Value *Op = CI->getArgOperand(0);
+
+    // Constant fold.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+      if (CI->isZero()) // ffs(0) -> 0.
+        return B.getInt32(0);
+      // ffs(c) -> cttz(c)+1
+      return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+    }
+
+    // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+    Type *ArgType = Op->getType();
+    Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+                                         Intrinsic::cttz, ArgType);
+    Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+    V = B.CreateIntCast(V, B.getInt32Ty(), false);
+
+    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+    return B.CreateSelect(Cond, V, B.getInt32(0));
+  }
+};
+
+struct AbsOpt : public LibCallOptimization {
+  virtual bool ignoreCallingConv() { return true; }
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require integer(integer) where the types agree.
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        FT->getParamType(0) != FT->getReturnType())
+      return 0;
+
+    // abs(x) -> x >s -1 ? x : -x
+    Value *Op = CI->getArgOperand(0);
+    Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+                                 "ispos");
+    Value *Neg = B.CreateNeg(Op, "neg");
+    return B.CreateSelect(Pos, Op, Neg);
+  }
+};
+
+struct IsDigitOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isdigit(c) -> (c-'0') <u 10
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+    Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+struct IsAsciiOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isascii(c) -> c <u 128
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+struct ToAsciiOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require i32(i32)
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // toascii(c) -> c & 0x7f
+    return B.CreateAnd(CI->getArgOperand(0),
+                       ConstantInt::get(CI->getType(),0x7F));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct PrintFOpt : public LibCallOptimization {
+  Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+      return 0;
+
+    // Empty format string -> noop.
+    if (FormatStr.empty())  // Tolerate printf's declared void.
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 0);
+
+    // Do not do any of the following transformations if the printf return value
+    // is used, in general the printf return value is not compatible with either
+    // putchar() or puts().
+    if (!CI->use_empty())
+      return 0;
+
+    // printf("x") -> putchar('x'), even for '%'.
+    if (FormatStr.size() == 1) {
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("foo\n") --> puts("foo")
+    if (FormatStr[FormatStr.size()-1] == '\n' &&
+        FormatStr.find('%') == std::string::npos) {  // no format characters.
+      // Create a string literal with no \n on it.  We expect the constant merge
+      // pass to be run after this pass, to merge duplicate strings.
+      FormatStr = FormatStr.drop_back();
+      Value *GV = B.CreateGlobalString(FormatStr, "str");
+      Value *NewCI = EmitPutS(GV, B, TD, TLI);
+      return (CI->use_empty() || !NewCI) ?
+              NewCI :
+              ConstantInt::get(CI->getType(), FormatStr.size()+1);
+    }
+
+    // Optimize specific format strings.
+    // printf("%c", chr) --> putchar(chr)
+    if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isIntegerTy()) {
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("%s\n", str) --> puts(str)
+    if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isPointerTy()) {
+      return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // printf(format, ...) -> iprintf(format, ...) if no floating point
+    // arguments.
+    if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *IPrintFFn =
+        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(IPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct SPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // If we just have a format string (nothing else crazy) transform it.
+    if (CI->getNumArgOperands() == 2) {
+      // Make sure there's no % in the constant array.  We could try to handle
+      // %% -> % in the future if we cared.
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')
+          return 0; // we found a format specifier, bail out.
+
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                                      FormatStr.size() + 1), 1);   // nul byte.
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+      B.CreateStore(V, Ptr);
+      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+      B.CreateStore(B.getInt8(0), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+      if (!Len)
+        return 0;
+      Value *IncLen = B.CreateAdd(Len,
+                                  ConstantInt::get(Len->getType(), 1),
+                                  "leninc");
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+      // The sprintf result is the unincremented number of bytes in the string.
+      return B.CreateIntCast(Len, CI->getType(), false);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct FPrintFOpt : public LibCallOptimization {
+  Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // All the optimizations depend on the format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+    if (CI->getNumArgOperands() == 2) {
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
+          return 0; // We found a format specifier.
+
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+                                ConstantInt::get(TD->getIntPtrType(*Context),
+                                                 FormatStr.size()),
+                                CI->getArgOperand(0), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // fprintf(F, "%c", chr) --> fputc(chr, F)
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+                               TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+    }
+
+    if (FormatStr[1] == 's') {
+      // fprintf(F, "%s", str) --> fputs(str, F)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+        return 0;
+      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct FWriteOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require a pointer, an integer, an integer, a pointer, returning integer.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        !FT->getParamType(3)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    // Get the element size and count.
+    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!SizeC || !CountC) return 0;
+    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+    // If this is writing zero records, remove the call (it's a noop).
+    if (Bytes == 0)
+      return ConstantInt::get(CI->getType(), 0);
+
+    // If this is writing one byte, turn it into fputc.
+    // This optimisation is only valid, if the return value is unused.
+    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+    }
+
+    return 0;
+  }
+};
+
+struct FPutsOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    // Require two pointers.  Also, we can't optimize if return value is used.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !CI->use_empty())
+      return 0;
+
+    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+    uint64_t Len = GetStringLength(CI->getArgOperand(0));
+    if (!Len) return 0;
+    // Known to have no uses (see above).
+    return EmitFWrite(CI->getArgOperand(0),
+                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+                      CI->getArgOperand(1), B, TD, TLI);
+  }
+};
+
+struct PutsOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    // Check for a constant string.
+    StringRef Str;
+    if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+      return 0;
+
+    if (Str.empty() && CI->use_empty()) {
+      // puts("") -> putchar('\n')
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    return 0;
+  }
+};
+
 } // End anonymous namespace.
 
 namespace llvm {
@@ -1024,98 +1673,252 @@ class LibCallSimplifierImpl {
   const DataLayout *TD;
   const TargetLibraryInfo *TLI;
   const LibCallSimplifier *LCS;
-  StringMap<LibCallOptimization*> Optimizations;
-
-  // Fortified library call optimizations.
-  MemCpyChkOpt MemCpyChk;
-  MemMoveChkOpt MemMoveChk;
-  MemSetChkOpt MemSetChk;
-  StrCpyChkOpt StrCpyChk;
-  StpCpyChkOpt StpCpyChk;
-  StrNCpyChkOpt StrNCpyChk;
-
-  // String library call optimizations.
-  StrCatOpt StrCat;
-  StrNCatOpt StrNCat;
-  StrChrOpt StrChr;
-  StrRChrOpt StrRChr;
-  StrCmpOpt StrCmp;
-  StrNCmpOpt StrNCmp;
-  StrCpyOpt StrCpy;
-  StpCpyOpt StpCpy;
-  StrNCpyOpt StrNCpy;
-  StrLenOpt StrLen;
-  StrPBrkOpt StrPBrk;
-  StrToOpt StrTo;
-  StrSpnOpt StrSpn;
-  StrCSpnOpt StrCSpn;
-  StrStrOpt StrStr;
-
-  // Memory library call optimizations.
-  MemCmpOpt MemCmp;
-  MemCpyOpt MemCpy;
-  MemMoveOpt MemMove;
-  MemSetOpt MemSet;
-
-  void initOptimizations();
-  void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
+  bool UnsafeFPShrink;
+
+  // Math library call optimizations.
+  CosOpt Cos;
+  PowOpt Pow;
+  Exp2Opt Exp2;
 public:
   LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
-                        const LibCallSimplifier *LCS) {
+                        const LibCallSimplifier *LCS,
+                        bool UnsafeFPShrink = false)
+    : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
     this->TD = TD;
     this->TLI = TLI;
     this->LCS = LCS;
+    this->UnsafeFPShrink = UnsafeFPShrink;
   }
 
   Value *optimizeCall(CallInst *CI);
+  LibCallOptimization *lookupOptimization(CallInst *CI);
+  bool hasFloatVersion(StringRef FuncName);
 };
 
-void LibCallSimplifierImpl::initOptimizations() {
-  // Fortified library call optimizations.
-  Optimizations["__memcpy_chk"] = &MemCpyChk;
-  Optimizations["__memmove_chk"] = &MemMoveChk;
-  Optimizations["__memset_chk"] = &MemSetChk;
-  Optimizations["__strcpy_chk"] = &StrCpyChk;
-  Optimizations["__stpcpy_chk"] = &StpCpyChk;
-  Optimizations["__strncpy_chk"] = &StrNCpyChk;
-  Optimizations["__stpncpy_chk"] = &StrNCpyChk;
-
-  // String library call optimizations.
-  addOpt(LibFunc::strcat, &StrCat);
-  addOpt(LibFunc::strncat, &StrNCat);
-  addOpt(LibFunc::strchr, &StrChr);
-  addOpt(LibFunc::strrchr, &StrRChr);
-  addOpt(LibFunc::strcmp, &StrCmp);
-  addOpt(LibFunc::strncmp, &StrNCmp);
-  addOpt(LibFunc::strcpy, &StrCpy);
-  addOpt(LibFunc::stpcpy, &StpCpy);
-  addOpt(LibFunc::strncpy, &StrNCpy);
-  addOpt(LibFunc::strlen, &StrLen);
-  addOpt(LibFunc::strpbrk, &StrPBrk);
-  addOpt(LibFunc::strtol, &StrTo);
-  addOpt(LibFunc::strtod, &StrTo);
-  addOpt(LibFunc::strtof, &StrTo);
-  addOpt(LibFunc::strtoul, &StrTo);
-  addOpt(LibFunc::strtoll, &StrTo);
-  addOpt(LibFunc::strtold, &StrTo);
-  addOpt(LibFunc::strtoull, &StrTo);
-  addOpt(LibFunc::strspn, &StrSpn);
-  addOpt(LibFunc::strcspn, &StrCSpn);
-  addOpt(LibFunc::strstr, &StrStr);
-
-  // Memory library call optimizations.
-  addOpt(LibFunc::memcmp, &MemCmp);
-  addOpt(LibFunc::memcpy, &MemCpy);
-  addOpt(LibFunc::memmove, &MemMove);
-  addOpt(LibFunc::memset, &MemSet);
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+  LibFunc::Func Func;
+  SmallString<20> FloatFuncName = FuncName;
+  FloatFuncName += 'f';
+  if (TLI->getLibFunc(FloatFuncName, Func))
+    return TLI->has(Func);
+  return false;
 }
 
-Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
-  if (Optimizations.empty())
-    initOptimizations();
-
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+
+  // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+  LibFunc::Func Func;
   Function *Callee = CI->getCalledFunction();
-  LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+  StringRef FuncName = Callee->getName();
+
+  // Next check for intrinsics.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::pow:
+       return &Pow;
+    case Intrinsic::exp2:
+       return &Exp2;
+    default:
+       return 0;
+    }
+  }
+
+  // Then check for known library functions.
+  if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+    switch (Func) {
+      case LibFunc::strcat:
+        return &StrCat;
+      case LibFunc::strncat:
+        return &StrNCat;
+      case LibFunc::strchr:
+        return &StrChr;
+      case LibFunc::strrchr:
+        return &StrRChr;
+      case LibFunc::strcmp:
+        return &StrCmp;
+      case LibFunc::strncmp:
+        return &StrNCmp;
+      case LibFunc::strcpy:
+        return &StrCpy;
+      case LibFunc::stpcpy:
+        return &StpCpy;
+      case LibFunc::strncpy:
+        return &StrNCpy;
+      case LibFunc::strlen:
+        return &StrLen;
+      case LibFunc::strpbrk:
+        return &StrPBrk;
+      case LibFunc::strtol:
+      case LibFunc::strtod:
+      case LibFunc::strtof:
+      case LibFunc::strtoul:
+      case LibFunc::strtoll:
+      case LibFunc::strtold:
+      case LibFunc::strtoull:
+        return &StrTo;
+      case LibFunc::strspn:
+        return &StrSpn;
+      case LibFunc::strcspn:
+        return &StrCSpn;
+      case LibFunc::strstr:
+        return &StrStr;
+      case LibFunc::memcmp:
+        return &MemCmp;
+      case LibFunc::memcpy:
+        return &MemCpy;
+      case LibFunc::memmove:
+        return &MemMove;
+      case LibFunc::memset:
+        return &MemSet;
+      case LibFunc::cosf:
+      case LibFunc::cos:
+      case LibFunc::cosl:
+        return &Cos;
+      case LibFunc::powf:
+      case LibFunc::pow:
+      case LibFunc::powl:
+        return &Pow;
+      case LibFunc::exp2l:
+      case LibFunc::exp2:
+      case LibFunc::exp2f:
+        return &Exp2;
+      case LibFunc::ffs:
+      case LibFunc::ffsl:
+      case LibFunc::ffsll:
+        return &FFS;
+      case LibFunc::abs:
+      case LibFunc::labs:
+      case LibFunc::llabs:
+        return &Abs;
+      case LibFunc::isdigit:
+        return &IsDigit;
+      case LibFunc::isascii:
+        return &IsAscii;
+      case LibFunc::toascii:
+        return &ToAscii;
+      case LibFunc::printf:
+        return &PrintF;
+      case LibFunc::sprintf:
+        return &SPrintF;
+      case LibFunc::fprintf:
+        return &FPrintF;
+      case LibFunc::fwrite:
+        return &FWrite;
+      case LibFunc::fputs:
+        return &FPuts;
+      case LibFunc::puts:
+        return &Puts;
+      case LibFunc::ceil:
+      case LibFunc::fabs:
+      case LibFunc::floor:
+      case LibFunc::rint:
+      case LibFunc::round:
+      case LibFunc::nearbyint:
+      case LibFunc::trunc:
+        if (hasFloatVersion(FuncName))
+          return &UnaryDoubleFP;
+        return 0;
+      case LibFunc::acos:
+      case LibFunc::acosh:
+      case LibFunc::asin:
+      case LibFunc::asinh:
+      case LibFunc::atan:
+      case LibFunc::atanh:
+      case LibFunc::cbrt:
+      case LibFunc::cosh:
+      case LibFunc::exp:
+      case LibFunc::exp10:
+      case LibFunc::expm1:
+      case LibFunc::log:
+      case LibFunc::log10:
+      case LibFunc::log1p:
+      case LibFunc::log2:
+      case LibFunc::logb:
+      case LibFunc::sin:
+      case LibFunc::sinh:
+      case LibFunc::sqrt:
+      case LibFunc::tan:
+      case LibFunc::tanh:
+        if (UnsafeFPShrink && hasFloatVersion(FuncName))
+         return &UnsafeUnaryDoubleFP;
+        return 0;
+      case LibFunc::memcpy_chk:
+        return &MemCpyChk;
+      default:
+        return 0;
+      }
+  }
+
+  // Finally check for fortified library calls.
+  if (FuncName.endswith("_chk")) {
+    if (FuncName == "__memmove_chk")
+      return &MemMoveChk;
+    else if (FuncName == "__memset_chk")
+      return &MemSetChk;
+    else if (FuncName == "__strcpy_chk")
+      return &StrCpyChk;
+    else if (FuncName == "__stpcpy_chk")
+      return &StpCpyChk;
+    else if (FuncName == "__strncpy_chk")
+      return &StrNCpyChk;
+    else if (FuncName == "__stpncpy_chk")
+      return &StrNCpyChk;
+  }
+
+  return 0;
+
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+  LibCallOptimization *LCO = lookupOptimization(CI);
   if (LCO) {
     IRBuilder<> Builder(CI);
     return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
@@ -1123,14 +1926,10 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
   return 0;
 }
 
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
-  if (TLI->has(F))
-    Optimizations[TLI->getName(F)] = Opt;
-}
-
 LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
-                                     const TargetLibraryInfo *TLI) {
-  Impl = new LibCallSimplifierImpl(TD, TLI, this);
+                                     const TargetLibraryInfo *TLI,
+                                     bool UnsafeFPShrink) {
+  Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
 }
 
 LibCallSimplifier::~LibCallSimplifier() {
@@ -1138,6 +1937,7 @@ LibCallSimplifier::~LibCallSimplifier() {
 }
 
 Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+  if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
   return Impl->optimizeCall(CI);
 }
 
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index b1cad06dffe9..560f58160753 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 char UnifyFunctionExitNodes::ID = 0;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a30b09321b5e..b5941bdf2411 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Metadata.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
 using namespace llvm;
 
 // Out of line method to get vtable etc for class.
@@ -63,14 +63,29 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
+      if (OP == 0) continue;
+      Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+      // Use identity map if Mapped_Op is null and we can ignore missing
+      // entries.
+      if (Mapped_OP == OP ||
+          (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+        continue;
 
       // Ok, at least one operand needs remapping.  
       SmallVector<Value*, 4> Elts;
       Elts.reserve(MD->getNumOperands());
       for (i = 0; i != e; ++i) {
         Value *Op = MD->getOperand(i);
-        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
+        if (Op == 0)
+          Elts.push_back(0);
+        else {
+          Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+          // Use identity map if Mapped_Op is null and we can ignore missing
+          // entries.
+          if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+            Mapped_Op = Op;
+          Elts.push_back(Mapped_Op);
+        }
       }
       MDNode *NewMD = MDNode::get(V->getContext(), Elts);
       Dummy->replaceAllUsesWith(NewMD);